├── readme.MD
└── src
    ├── __pycache__
        ├── config.cpython-35.pyc
        ├── dqn_model.cpython-35.pyc
        ├── schedule.cpython-35.pyc
        ├── Neural_Net.cpython-35.pyc
        ├── config_AMZN.cpython-35.pyc
        ├── config_GOOG.cpython-35.pyc
        ├── model_base.cpython-35.pyc
        ├── order_queue.cpython-35.pyc
        ├── message_queue.cpython-35.pyc
        ├── replay_buffer.cpython-35.pyc
        ├── evaluate_policy.cpython-35.pyc
        └── limit_order_book.cpython-35.pyc
    ├── schedule.py
    ├── order_queue.py
    ├── config.py
    ├── config_GOOG.py
    ├── config_AAPL.py
    ├── config_AMZN.py
    ├── Neural_Net.py
    ├── plot_reward.py
    ├── baseline_market_order.py
    ├── message_queue.py
    ├── replay_buffer.py
    ├── evaluate_policy.py
    ├── test_dqn.py
    ├── market_policy.py
    ├── snl_policy.py
    ├── tree_policy.py
    ├── baseline_dp.py
    ├── test_book_queue.py
    ├── baseline_SnL.py
    ├── dqn_model.py
    ├── model_base.py
    ├── dp_policy.py
    └── limit_order_book.py


/readme.MD:
--------------------------------------------------------------------------------
1 | # Limit-Order-Book-Reinforcement
2 | 


--------------------------------------------------------------------------------
/src/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/config.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/dqn_model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/dqn_model.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/schedule.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/schedule.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/Neural_Net.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/Neural_Net.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/config_AMZN.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/config_AMZN.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/config_GOOG.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/config_GOOG.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/model_base.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/model_base.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/order_queue.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/order_queue.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/message_queue.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/message_queue.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/replay_buffer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/replay_buffer.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/evaluate_policy.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/evaluate_policy.cpython-35.pyc


--------------------------------------------------------------------------------
/src/__pycache__/limit_order_book.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/limit_order_book.cpython-35.pyc


--------------------------------------------------------------------------------
/src/schedule.py:
--------------------------------------------------------------------------------
 1 | class LinearSchedule(object):
 2 | 	def __init__(self, eps_begin, eps_end, nsteps):
 3 | 		self._epsilon = eps_begin
 4 | 		self._eps_begin = eps_begin
 5 | 		self._eps_end = eps_end
 6 | 		self._nsteps = nsteps
 7 | 
 8 | 	def update(self, t):
 9 | 		alpha = 1.0 * t / self._nsteps
10 | 		self._epsilon = max(alpha*self._eps_end+(1-alpha)*self._eps_begin, self._eps_end)
11 | 
12 | 	def get_epsilon(self):
13 | 		return self._epsilon


--------------------------------------------------------------------------------
/src/order_queue.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from message_queue import Message_Queue
 4 | 
 5 | class Order_Queue(object):
 6 | 	def __init__(self, path):
 7 | 		self._df = pd.read_csv(path, header=None)
 8 | 		self._row_idx = -1
 9 | 
10 | 	def create_orderbook_time(self, time, mq):
11 | 		mq.jump_to_time(time)
12 | 		self._row_idx= mq._row_idx
13 | 		row = self._df.iloc[self._row_idx]
14 | 		return self._create_orderbook(row)
15 | 
16 | 	def _create_orderbook(self, row):
17 | 		len_row= int(len(row)/4)
18 | 		ask= np.array([int(row[4*i]) for i in range(len_row)])
19 | 		ask_size= np.array([int(row[4*i+1]) for i in range(len_row)])
20 | 		bid = np.array([int(row[4*i+2]) for i in range(len_row)])
21 | 		bid_size = np.array([int(row[4*i+3]) for i in range(len_row)])
22 | 		orderbook = {'ask':ask, 'ask_size':ask_size, 'bid':bid, 'bid_size':bid_size}
23 | 		return orderbook


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
 1 | class Config:
 2 | 	def __init__(self):
 3 | 		self.mode = 'train'
 4 | 
 5 | 		self.nsteps_train = 1000
 6 | 		self.print_freq = 50
 7 | 		self.target_update_freq = 1000
 8 | 		self.saving_freq = 250
 9 | 		self.simulation_freq = 1000
10 | 		self.model_output = '../output_GOOG/GOOG'
11 | 
12 | 		self.eps_begin = 1.0
13 | 		self.eps_end = 0.1
14 | 		self.nsteps = 1000
15 | 		self.dropout= 0.9
16 | 
17 | 		self.lr_begin = 0.00025
18 | 		self.lr_end = 0.00005
19 | 		self.lr_nsteps = self.nsteps_train / 2
20 | 
21 | 		self.gamma = 0.99
22 | 		self.grad_clip = True
23 | 		self.clip_val = 10
24 | 		self.batch_size = 32
25 | 
26 | 		self.order_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_orderbook_10.csv'
27 | 		self.message_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_message_10.csv'
28 | 		self.depth = 3
29 | 
30 | 		self.H = 600
31 | 		self.T = 20
32 | 		self.L = 10
33 | 		self.direction = -1
34 | 		self.base_point = 100
35 | 		self.train_start = 34200
36 | 		self.train_end = 46800
37 | 		self.test_start= 46800
38 | 		self.test_end= 57600
39 | 		self.I = 8000
40 | 		self.hidden_size= 10
41 | 
42 | 		self.state_shape = [self.depth, 4]
43 | 		self.state_history = 2
44 | 


--------------------------------------------------------------------------------
/src/config_GOOG.py:
--------------------------------------------------------------------------------
 1 | class Config:
 2 | 	def __init__(self):
 3 | 		self.mode = 'train'
 4 | 
 5 | 		self.nsteps_train = 1000000
 6 | 		self.print_freq = 50
 7 | 		self.target_update_freq = 1000
 8 | 		self.saving_freq = 25000
 9 | 		self.simulation_freq = 1000
10 | 		self.model_output = '../output_GOOG_Neural'
11 | 
12 | 		self.eps_begin = 1.0
13 | 		self.eps_end = 0.1
14 | 		self.nsteps = 1000
15 | 		self.dropout= 0.9
16 | 
17 | 		self.lr_begin = 0.00025
18 | 		self.lr_end = 0.00005
19 | 		self.lr_nsteps = self.nsteps_train / 2
20 | 
21 | 		self.gamma = 1
22 | 		self.grad_clip = True
23 | 		self.clip_val = 10
24 | 		self.batch_size = 32
25 | 
26 | 		self.order_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_orderbook_10.csv'
27 | 		self.message_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_message_10.csv'
28 | 		self.depth = 30
29 | 
30 | 		self.H = 600
31 | 		self.T = 20
32 | 		self.L = 10
33 | 		self.direction = -1
34 | 		self.base_point = 100
35 | 		self.train_start = 34200
36 | 		self.train_end = 46800
37 | 		self.test_start= 46800
38 | 		self.test_end= 57600
39 | 		self.I = 8000
40 | 		self.hidden_size= 20
41 | 
42 | 		self.state_shape = [self.depth, 4]
43 | 		self.state_history = 4
44 | 


--------------------------------------------------------------------------------
/src/config_AAPL.py:
--------------------------------------------------------------------------------
 1 | class Config:
 2 | 	def __init__(self):
 3 | 		self.mode = 'train'
 4 | 
 5 | 		self.nsteps_train = 100000
 6 | 		self.print_freq = 50
 7 | 		self.target_update_freq = 1000
 8 | 		self.saving_freq = 25000
 9 | 		self.simulation_freq = 1000
10 | 		self.model_output = '../output_AAPL_linear'
11 | 
12 | 		self.eps_begin = 1.0
13 | 		self.eps_end = 0.1
14 | 		self.nsteps = 1000
15 | 		self.dropout= 0.9
16 | 
17 | 		self.lr_begin = 0.00025
18 | 		self.lr_end = 0.00005
19 | 		self.lr_nsteps = self.nsteps_train / 2
20 | 
21 | 		self.gamma = 0.99
22 | 		self.grad_clip = True
23 | 		self.clip_val = 10
24 | 		self.batch_size = 32
25 | 
26 | 		self.order_path = '../datasets/LOBSTER_SampleFile_AAPL_2012-06-21_10/AAPL_2012-06-21_34200000_57600000_orderbook_10.csv'
27 | 		self.message_path = '../datasets/LOBSTER_SampleFile_AAPL_2012-06-21_10/AAPL_2012-06-21_34200000_57600000_message_10.csv'
28 | 		self.depth = 30
29 | 
30 | 		self.H = 600
31 | 		self.T = 20
32 | 		self.L = 10
33 | 		self.direction = -1
34 | 		self.base_point = 100
35 | 		self.train_start = 34200
36 | 		self.train_end = 46800
37 | 		self.test_start= 46800
38 | 		self.test_end= 57600
39 | 		self.I = 8000
40 | 		self.hidden_size= 10
41 | 
42 | 		self.state_shape = [self.depth, 4]
43 | 		self.state_history = 2
44 | 


--------------------------------------------------------------------------------
/src/config_AMZN.py:
--------------------------------------------------------------------------------
 1 | class Config:
 2 | 	def __init__(self):
 3 | 		self.mode = 'train'
 4 | 
 5 | 		self.nsteps_train = 1000000
 6 | 		self.print_freq = 50
 7 | 		self.target_update_freq = 1000
 8 | 		self.saving_freq = 25000
 9 | 		self.simulation_freq = 1000
10 | 		self.model_output = '../output_AMZN_linear'
11 | 
12 | 		self.eps_begin = 1.0
13 | 		self.eps_end = 0.1
14 | 		self.nsteps = 1000
15 | 		self.dropout= 0.9
16 | 
17 | 		self.lr_begin = 0.00025
18 | 		self.lr_end = 0.00005
19 | 		self.lr_nsteps = self.nsteps_train / 2
20 | 
21 | 		self.gamma = 0.99
22 | 		self.grad_clip = True
23 | 		self.clip_val = 10
24 | 		self.batch_size = 32
25 | 
26 | 		self.order_path = '../datasets/LOBSTER_SampleFile_AMZN_2012-06-21_10/AMZN_2012-06-21_34200000_57600000_orderbook_10.csv'
27 | 		self.message_path = '../datasets/LOBSTER_SampleFile_AMZN_2012-06-21_10/AMZN_2012-06-21_34200000_57600000_message_10.csv'
28 | 		self.depth = 30
29 | 
30 | 		self.H = 600
31 | 		self.T = 20
32 | 		self.L = 10
33 | 		self.direction = -1
34 | 		self.base_point = 100
35 | 		self.train_start = 34200
36 | 		self.train_end = 46800
37 | 		self.test_start= 46800
38 | 		self.test_end= 57600
39 | 		self.I = 4000
40 | 		self.hidden_size= 10
41 | 
42 | 		self.state_shape = [self.depth, 4]
43 | 		self.state_history = 2
44 | 


--------------------------------------------------------------------------------
/src/Neural_Net.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import tensorflow.contrib.layers as layers
 5 | import os
 6 | 
 7 | from config_GOOG import Config
 8 | from model_base import model
 9 | from dqn_model import DQN
10 | 
11 | class Neural_DQN(DQN):
12 | 	def get_q_values_op(self, state, scope, reuse= False):
13 | 		num_actions = self._config.L + 1 # 1 for market order
14 | 		state_book, state_it = state
15 | 
16 | 		with tf.variable_scope(scope, reuse=reuse):
17 | 			conv_1 = layers.conv2d(inputs=state_book, num_outputs=4, kernel_size=[3,3], stride=[1,1], activation_fn=tf.nn.relu, padding='same')
18 | 			conv_2 = layers.conv2d(inputs=conv_1, num_outputs=4, kernel_size=[3,3], stride=[1,1], activation_fn=tf.nn.relu, padding='same')
19 | 			conv_2_flattened = layers.flatten(inputs=conv_2)
20 | 			state_out = tf.concat([conv_2_flattened, state_it], axis=1)
21 | 			state_out= tf.nn.dropout(state_out, self._config.dropout)
22 | 			state_out= layers.fully_connected(state_out, num_outputs= self._config.hidden_size)
23 | 			out = layers.fully_connected(state_out, num_actions, activation_fn=None)
24 | 		return out
25 | 
26 | if __name__ == '__main__':
27 | 	config = Config()
28 | 	model = Neural_DQN(config)
29 | 	model.initialize()
30 | 	model.train()
31 | 	
32 | 


--------------------------------------------------------------------------------
/src/plot_reward.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import pandas as pd
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | parser = argparse.ArgumentParser(description='Plot Reward')
 7 | parser.add_argument('--file', default= '../reward.csv', help='File Path', type=str)
 8 | parser.add_argument('--ylabel', default= 'Raw Reward', help='Raw Reward/Relative Reward', type=str)
 9 | args = parser.parse_args()
10 | 
11 | def plot(reward, 
12 | 	ticker=['AMZN','AAPL','GOOG','INTC','MSFT'],
13 | 	algo=['Market Order','SnL','Tree Search','Nevmyvaka'],
14 | 	ylabel='Raw Reward'):
15 | 	### reward is a N * 4 array
16 | 	N_ticker, N_algo = reward.shape
17 | 	ind = np.arange(N_ticker)
18 | 	width = 1.0 / (N_algo + 1)
19 | 	fig, ax = plt.subplots()
20 | 	
21 | 	rects = dict()
22 | 	for i in range(N_algo):
23 | 		c = (i + 0.5) / N_algo
24 | 		rects[i] = ax.bar(ind+width*i, reward[:,i], width, color=(c,c,1-c))
25 | 	ax.set_ylabel(ylabel)
26 | 	ax.set_xticks(ind+width*(N_algo-1)/2)
27 | 	ax.set_xticklabels(ticker)
28 | 	ax.legend((rects[i] for i in range(N_algo)), algo)
29 | 	plt.show()
30 | 
31 | def plot_file(path, ylabel):
32 | 	df = pd.read_csv(path)
33 | 	header = df.axes[1]
34 | 	algo = list(header[1:])
35 | 	ticker = list(df[header[0]])
36 | 	reward = df[header[1:]].values
37 | 	plot(reward, ticker, algo, ylabel)
38 | 
39 | if __name__ == '__main__':
40 | 	plot_file(args.file, args.ylabel)


--------------------------------------------------------------------------------
/src/baseline_market_order.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from limit_order_book import Limit_Order_book
 4 | from message_queue import Message_Queue
 5 | 
 6 | parser = argparse.ArgumentParser(description='Dynamic Programming Solution')
 7 | parser.add_argument('--file_msg', help='Message File Path')
 8 | parser.add_argument('--base_size', default=1, help='Base Order Size', type=int)
 9 | parser.add_argument('--order_size', default=12, help='Order Size', type=int)
10 | parser.add_argument('--order_direction', default=1, help='Buy 1, Sell -1', type=int)
11 | parser.add_argument('--start', default=34200, help='Start Time', type=float)
12 | parser.add_argument('--end', default=34500, help='End Time', type=float)
13 | parser.add_argument('--adj_freq', default=100, help='Adjustment Frequency', type=float)
14 | parser.add_argument('--tol', default=1e-8, help='Remaining Time To Submit Market Order', type=float)
15 | args = parser.parse_args()
16 | 
17 | mq = Message_Queue(args.file_msg)
18 | lob = Limit_Order_book(own_amount_to_trade=args.order_size,
19 | 					own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE,
20 | 					own_trade_type=args.order_direction)
21 | for idx, message in mq.pop_to_next_time(args.start):
22 | 	lob.process(**message)
23 | 
24 | lob.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE)
25 | 
26 | current_time = args.start
27 | while lob.own_amount_to_trade > 0 and not mq.finished():
28 | 	current_time += args.adj_freq
29 | 	for idx, message in mq.pop_to_next_time(current_time):
30 | 		lob.process(**message)
31 | 		if lob.own_amount_to_trade == 0:
32 | 			break
33 | 
34 | if lob.own_amount_to_trade > 0:
35 | 	reward = Limit_Order_book._DUMMY_VARIABLE * args.order_direction
36 | else:
37 | 	reward = lob.own_reward
38 | 
39 | print(reward)


--------------------------------------------------------------------------------
/src/message_queue.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | class Message_Queue(object):
 5 | 	def __init__(self, path):
 6 | 		self._df = pd.read_csv(path, header=None)
 7 | 		self._time = 34200.0
 8 | 		self._row_idx = -1
 9 | 		self._message_count = self._df.shape[0]
10 | 		self._idx2header = ['Time', 'Type', 'OrderID', 'Size', 'Price', 'Direction']
11 | 		self._header2idx = {tmp:idx for idx, tmp in enumerate(self._idx2header)}
12 | 
13 | 	def iterate_queue(self):
14 | 		for idx, row in self._df.iloc[(self._row_idx+1):].iterrows():
15 | 			message = self._create_message(row)
16 | 			self._row_idx += 1
17 | 			self._time = row[0]
18 | 			yield (idx, message)
19 | 
20 | 	def pop_to_next_time(self, time):
21 | 		while self._row_idx + 1 < self._message_count:
22 | 			row = self._df.iloc[self._row_idx+1]
23 | 			if row[0] <= time:
24 | 				self._row_idx += 1
25 | 				message = self._create_message(row)
26 | 				yield (self._row_idx, message)
27 | 			else:
28 | 				break
29 | 		self._time = time
30 | 
31 | 	def finished(self):
32 | 		return (self._row_idx+1==self._message_count)
33 | 
34 | 	def _create_message(self, row):
35 | 		order_type = int(row[1])
36 | 		order_size = int(row[3])
37 | 		order_price = int(row[4])
38 | 		order_direction = int(row[5])
39 | 		message = {'type':order_type, 'size':order_size, 'price':order_price, 'direction':order_direction}
40 | 		return message
41 | 
42 | 	def reset(self):
43 | 		self._time = 34200.0
44 | 		self._row_idx = -1
45 | 
46 | 	def jump_to_time(self, time):
47 | 		if time >= self._df.iloc[self._message_count-1][0]:
48 | 			self._time = time
49 | 			self._row_idx = self._message_count-1
50 | 		elif time < self._df.iloc[0][0]:
51 | 			self._time = 34200.0
52 | 			self._row_idx = -1
53 | 		else:
54 | 			idx_start = 0
55 | 			idx_end = 0
56 | 			d_idx = 1
57 | 			while time >= self._df.iloc[idx_end][0]:
58 | 				idx_start = idx_end
59 | 				idx_end = min(idx_end+d_idx, self._message_count-1)
60 | 				d_idx *= 2
61 | 			while idx_end - idx_start > 1:
62 | 				idx_mid = idx_start + (idx_end - idx_start) // 2
63 | 				if time >= self._df.iloc[idx_mid][0]:
64 | 					idx_start = idx_mid
65 | 				else:
66 | 					idx_end = idx_mid
67 | 			self._row_idx = idx_start
68 | 			self._time = time
69 | 
70 | 


--------------------------------------------------------------------------------
/src/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class ReplayBuffer(object):
 4 | 	def __init__(self, size, config):
 5 | 		self.config = config
 6 | 		self.size = size
 7 | 		self.last_idx = -1
 8 | 		self.history_size = 0
 9 | 
10 | 		self.states_stack = np.empty([self.size]+list(self.config.state_shape)+[self.config.state_history+1], dtype=np.float32)
11 | 		self.its = np.empty([self.size, 2, 2], dtype=np.float32)
12 | 		self.actions = np.empty([self.size], dtype=np.int32)
13 | 		self.rewards = np.empty([self.size], dtype=np.float32)
14 | 		self.done_mask = np.empty([self.size], dtype=np.bool)
15 | 
16 | 	def process_rewards(self, rewards):
17 | 		rewards_processed = []
18 | 		for reward in rewards:
19 | 			if reward == -9999999999:
20 | 				reward = -1000.0
21 | 			else:
22 | 				reward = reward * 1.e-10
23 | 			rewards_processed.append(reward)
24 | 		return rewards_processed
25 | 
26 | 	def store(self, states, actions, rewards, done_mask):
27 | 		rewards = self.process_rewards(rewards)
28 | 		for idx in range(len(actions)):
29 | 			self.last_idx += 1
30 | 			if self.last_idx == self.size:
31 | 				self.last_idx = 0
32 | 			self.actions[self.last_idx] = actions[idx]
33 | 			self.done_mask[self.last_idx] = done_mask[idx]
34 | 			self.rewards[self.last_idx] = rewards[idx]
35 | 			tmp = states[max(idx-self.config.state_history+1,0):(idx+2)]
36 | 			tmp_states, tmp_its = zip(*tmp)
37 | 			tmp_state = np.concatenate([np.expand_dims(state, -1) for state in tmp_states], axis=-1)
38 | 			tmp_it = np.concatenate([np.expand_dims(it, -1) for it in tmp_its[-2:]], axis=-1)
39 | 			self.states_stack[self.last_idx] = np.pad(tmp_state, ((0,0),(0,0),(self.config.state_history+1-tmp_state.shape[-1],0)), 'constant', constant_values=0)
40 | 			self.its[self.last_idx] = tmp_it
41 | 			self.history_size += 1
42 | 
43 | 	def sample(self, batch_size):
44 | 		idx = np.arange(min(self.size, self.history_size))
45 | 		np.random.shuffle(idx)
46 | 		idx_choice = idx[:batch_size]
47 | 
48 | 		states = self.states_stack[idx_choice][:,:,:,:-1]
49 | 		states_p = self.states_stack[idx_choice][:,:,:,1:]
50 | 		it = self.its[idx_choice][:,:,0]
51 | 		it_p = self.its[idx_choice][:,:,1]
52 | 		actions = self.actions[idx_choice]
53 | 		rewards = self.rewards[idx_choice]
54 | 		done_mask = self.done_mask[idx_choice]
55 | 
56 | 		return (states, it, states_p, it_p, actions, rewards, done_mask)


--------------------------------------------------------------------------------
/src/evaluate_policy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import copy
 3 | 
 4 | from limit_order_book import Limit_Order_book
 5 | from message_queue import Message_Queue
 6 | from order_queue import Order_Queue
 7 | 
 8 | def evaluate_policy(test_start, test_end, order_direction, V, H, T, oq, mq, action):
 9 | 	rewards = []
10 | 	episodes, real_times = load_episodes(test_start, test_end, order_direction, H, oq, mq)
11 | 	for k in range(len(episodes)):
12 | 		episode = episodes[k] 
13 | 		real_time = real_times[k]
14 | 		rewards.append(simulate_reward(episode, V, T, H, action, real_time, mq))
15 | 	return rewards
16 | 
17 | 
18 | def load_episodes(test_start, test_end, order_direction, H, oq, mq):
19 | 	lob_data, time = read_order_book(test_start, test_end, H, oq, mq)
20 | 	lob = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 
21 | 					own_init_price=-order_direction*Limit_Order_book._DUMMY_VARIABLE,
22 | 					own_trade_type=order_direction) for lob_data in lob_data]
23 | 	return lob, time
24 | 
25 | 
26 | def read_order_book(test_start, test_end, H, oq, mq):
27 | 	"""
28 | 	read the initial limit order book states from the file
29 | 	"""
30 | 	output = []
31 | 	time_output = []
32 | 	real_time = test_start
33 | 	while real_time < test_end:
34 | 		mq.reset()
35 | 		output.append(oq.create_orderbook_time(real_time, mq))
36 | 		time_output.append(real_time)
37 | 		real_time = real_time + H
38 | 	return output, time_output
39 | 
40 | 
41 | def simulate_reward(lob, amount, T, H, action, time, mq):
42 | 	"""
43 | 	simulate to next state, we need to calculate the remaining inventory given the current i and price a, and the immediate reward
44 | 	(revenue from the executed orders)
45 | 	"""
46 | 	mq.reset()
47 | 	mq.jump_to_time(time)
48 | 
49 | 	lob_copy = copy.deepcopy(lob)
50 | 
51 | 	for t in range(time, time + H, H//T):
52 | 		price = action(time + H - t, amount, lob_copy)
53 | 		lob_copy.update_own_order(price, amount)
54 | 
55 | 		for idx, message in mq.pop_to_next_time(t + H/T):
56 | 			lob_copy.process(**message)
57 | 			if lob_copy.own_amount_to_trade == 0:
58 | 				return lob_copy.own_reward
59 | 
60 | 		amount = lob_copy.own_amount_to_trade
61 | 
62 | 	lob_copy.update_own_order(lob_copy.own_trade_type*Limit_Order_book._DUMMY_VARIABLE)
63 | 	if lob_copy.own_amount_to_trade > 0 and lob_copy.own_trade_type == 1:
64 | 		return -Limit_Order_book._DUMMY_VARIABLE
65 | 	else:
66 | 		return lob_copy.own_reward
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/src/test_dqn.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import sys
 3 | import numpy as np
 4 | import sys
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import tensorflow.contrib.layers as layers
 8 | import os
 9 | 
10 | 
11 | from config_GOOG import Config
12 | from replay_buffer import ReplayBuffer
13 | from schedule import LinearSchedule
14 | from message_queue import Message_Queue
15 | from order_queue import Order_Queue
16 | from limit_order_book import Limit_Order_book
17 | from message_queue import Message_Queue
18 | from order_queue import Order_Queue
19 | from dqn_model import DQN
20 | from Neural_Net import Neural_DQN
21 | 
22 | 
23 | def evaluate_policy(m, oq, mq):
24 | 	rewards = []
25 | 	test_start, test_end, order_direction, V, H, T, depth= m._config.test_start, m._config.test_end, m._config.direction,\
26 | 	m._config.I, m._config.H, m._config.T, m._config.depth
27 | 	episodes, real_times = load_episodes(test_start, test_end, order_direction, H, oq, mq)
28 | 	for k in range(len(episodes)):
29 | 		print ('I am at the %d episode'%(k))
30 | 		real_time = real_times[k]
31 | 		states, reward, actions, done_mask = m.simulate_an_episode(V, T, 
32 | 			H, real_time, order_direction,
33 | 			m.get_best_action_fn(), depth)
34 | 		print (reward)
35 | 		rewards.append(np.sum(reward))
36 | 		# Only append the final reward
37 | 	return rewards
38 | 
39 | 
40 | def load_episodes(test_start, test_end, order_direction, H, oq, mq):
41 | 	lob_data, time = read_order_book(test_start, test_end, H, oq, mq)
42 | 	lob = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 
43 | 					own_init_price=-order_direction*Limit_Order_book._DUMMY_VARIABLE,
44 | 					own_trade_type=order_direction) for lob_data in lob_data]
45 | 	return lob, time
46 | 
47 | 
48 | def read_order_book(test_start, test_end, H, oq, mq):
49 | 	"""
50 | 	read the initial limit order book states from the file
51 | 	"""
52 | 	output = []
53 | 	time_output = []
54 | 	real_time = test_start
55 | 	while real_time < test_end:
56 | 		mq.reset()
57 | 		output.append(oq.create_orderbook_time(real_time, mq))
58 | 		time_output.append(real_time)
59 | 		real_time = real_time + H
60 | 	return output, time_output
61 | 
62 | def main():
63 | 	config = Config()
64 | 	config.mode = 'test'
65 | 	config.dropout = 1.0
66 | 	model = Neural_DQN(config)
67 | 	#model = DQN(config)
68 | 	model.initialize()
69 | 	oq = Order_Queue(config.order_path)
70 | 	mq = Message_Queue(config.message_path)
71 | 	rewards= evaluate_policy(model, oq, mq)
72 | 	print(np.mean(rewards))
73 | 
74 | if __name__ == '__main__':
75 | 	main()
76 | 


--------------------------------------------------------------------------------
/src/market_policy.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import copy
 4 | 
 5 | from limit_order_book import Limit_Order_book
 6 | from message_queue import Message_Queue
 7 | from order_queue import Order_Queue
 8 | from evaluate_policy import evaluate_policy
 9 | 
10 | parser = argparse.ArgumentParser(description='Dynamic Programming Algorithm')
11 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker')
12 | parser.add_argument('--base_point', default=100, help='Base Point', type=int)
13 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int)
14 | parser.add_argument('--train_start', default=34200, help='Train Start Time', type=float)
15 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float)
16 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float)
17 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float)
18 | parser.add_argument('--H', default=600, help='Horizon', type=int)
19 | parser.add_argument('--T', default=20, help='Time steps', type=int)
20 | parser.add_argument('--V', default=100, help='Amount to trade', type=int)
21 | parser.add_argument('--I', default=10, help='Inventory Length', type=int)
22 | parser.add_argument('--factor', default=0.3, help='Market order factor', type=float)
23 | parser.add_argument('--mode', default='train', help='Mode: train or test')
24 | args = parser.parse_args()
25 | 
26 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic)
27 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic)
28 | 
29 | def action_wrapper(order_direction, factor):
30 | 	 def action(remaining_time, amount, lob_copy):
31 | 	 	if remaining_time == args.H:
32 | 	 		current_mid_price = lob_copy.bid[0] + (lob_copy.ask[0] - lob_copy.bid[0]) // 2
33 | 	 		if order_direction == 1:
34 | 	 			return int(current_mid_price * (1 + factor))
35 | 	 		else:
36 | 	 			return int(current_mid_price * (1 - factor))
37 | 	 	else:
38 | 	 		return lob_copy.own_price
39 | 	 return action
40 | 
41 | oq = Order_Queue(file_order)
42 | mq = Message_Queue(file_msg)
43 | 
44 | if args.mode == 'train':
45 | 	action_func = action_wrapper(args.order_direction, args.factor)
46 | 	rewards = evaluate_policy(args.train_start, args.train_end, args.order_direction, args.V, args.H, args.T, oq, mq, action_func)
47 | 	print(rewards)
48 | 	print(np.mean(rewards))
49 | elif args.mode == 'test':
50 | 	action_func = action_wrapper(args.order_direction, args.factor)
51 | 	rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, action_func)
52 | 	print(rewards)
53 | 	print(np.mean(rewards))
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/src/snl_policy.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import copy
 4 | 
 5 | from limit_order_book import Limit_Order_book
 6 | from message_queue import Message_Queue
 7 | from order_queue import Order_Queue
 8 | from evaluate_policy import evaluate_policy
 9 | 
10 | parser = argparse.ArgumentParser(description='Dynamic Programming Algorithm')
11 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker')
12 | parser.add_argument('--base_point', default=100, help='Base Point', type=int)
13 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int)
14 | parser.add_argument('--train_start', default=34200, help='Train Start Time', type=float)
15 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float)
16 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float)
17 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float)
18 | parser.add_argument('--H', default=600, help='Horizon', type=int)
19 | parser.add_argument('--T', default=20, help='Time steps', type=int)
20 | parser.add_argument('--V', default=100, help='Amount to trade', type=int)
21 | parser.add_argument('--I', default=10, help='Inventory Length', type=int)
22 | parser.add_argument('--mode', default='train', help='Mode: train or test')
23 | parser.add_argument('--num', default= 10, help= 'The number of base points to go', type= int)
24 | parser.add_argument('--diff', default= 0, help= 'The number of base points to go beyond midpoint', type= int)
25 | args = parser.parse_args()
26 | 
27 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic)
28 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic)
29 | 
30 | def action_wrapper(diff):
31 |      def action(remaining_time, amount, lob_copy):
32 |      	if remaining_time == args.H:
33 |      		current_mid_price = lob_copy.bid[0] + (lob_copy.ask[0] - lob_copy.bid[0]) // 2
34 |      		return max(current_mid_price + diff, 0)
35 |      	else:
36 |      		return lob_copy.own_price
37 |      return action
38 | 
39 | def train(train_start, train_end, order_direction, V, H, oq, mq):	
40 | 	rewards = []
41 | 	for i in range(-args.num, args.num):
42 | 		print(i)
43 | 		action_func = action_wrapper(i * args.base_point)
44 | 		rewards.append(np.mean(evaluate_policy(train_start, train_end, args.order_direction, V, H, args.T, oq, mq, action_func)))
45 | 	print(rewards)
46 | 
47 | 
48 | oq = Order_Queue(file_order)
49 | mq = Message_Queue(file_msg)
50 | 
51 | if args.mode == 'train':
52 | 	train(args.train_start, args.train_end, args.order_direction, args.V, args.H, oq, mq)
53 | elif args.mode == 'test':
54 | 	diff = args.diff * args.base_point
55 | 	action_func = action_wrapper(diff)
56 | 	rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, action_func)
57 | 	print(rewards)
58 | 	print(np.mean(rewards))
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/src/tree_policy.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import copy
 4 | 
 5 | from limit_order_book import Limit_Order_book
 6 | from message_queue import Message_Queue
 7 | from order_queue import Order_Queue
 8 | from evaluate_policy import evaluate_policy
 9 | 
10 | parser = argparse.ArgumentParser(description='Dynamic Programming Algorithm')
11 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker')
12 | parser.add_argument('--base_point', default=100, help='Base Point', type=int)
13 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int)
14 | parser.add_argument('--train_start', default=34200, help='Train Start Time', type=float)
15 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float)
16 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float)
17 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float)
18 | parser.add_argument('--H', default=600, help='Horizon', type=int)
19 | parser.add_argument('--T', default=20, help='Time steps', type=int)
20 | parser.add_argument('--V', default=100, help='Amount to trade', type=int)
21 | parser.add_argument('--I', default=10, help='Inventory Length', type=int)
22 | parser.add_argument('--mode', default='train', help='Mode: train or test')
23 | parser.add_argument('--num', default= 10, help= 'The number of base points to go', type= int)
24 | parser.add_argument('--diff', default= 0, help= 'The number of base points to go beyond midpoint', type= int)
25 | args = parser.parse_args()
26 | 
27 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic)
28 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic)
29 | 
30 | def action_wrapper(diff):
31 |      def action(remaining_time, amount, lob_copy):
32 |      	if remaining_time == args.H:
33 |      		current_mid_price = lob_copy.bid[0] + (lob_copy.ask[0] - lob_copy.bid[0]) // 2
34 |      		return max(current_mid_price + diff, 0)
35 |      	else:
36 |      		return lob_copy.own_price
37 |      return action
38 | 
39 | def train(start, end, order_direction, V, k, H, oq, mq, action):	
40 | 	if time==(start+H): # This code force that (args.end-args.start) is a multiple of args.tol
41 | 		if lob.own_amount_to_trade == 0:
42 | 			return lob.own_reward
43 | 		else:
44 | 			lob.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE)
45 | 			# for idx, message in mq.pop_to_next_time(args.end):
46 | 			# 	lob.process(**message)
47 | 			# 	if lob.own_amount_to_trade == 0:
48 | 			# 		break
49 | 			return lob.own_reward
50 | 	else:
51 | 		current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2
52 | 		init_price = np.arange(current_mid_price-args.num*args.base_point, current_mid_price+args.num*args.base_point, args.base_point)
53 | 		init_price = init_price[init_price > 0]
54 | 		max_reward= -99999999.0
55 | 		for i in range(len(init_price)):
56 | 			# print ('At least this works')
57 | 			lob_copy = copy.deepcopy(lob)
58 | 			lob_copy.update_own_order(init_price[i])
59 | 			mq_copy = copy.deepcopy(mq)
60 | 			for idx, message in mq_copy.pop_to_next_time(time+args.tol):
61 | 				lob_copy.process(**message)
62 | 				if lob_copy.own_amount_to_trade == 0:
63 | 					max_reward= max(max_reward, lob_copy.own_reward)
64 | 				else:
65 | 					max_reward= max(max_reward, optimal(time+args.tol, lob_copy, mq_copy))
66 | 		return max_reward
67 | 	
68 | 
69 | 
70 | oq = Order_Queue(file_order)
71 | mq = Message_Queue(file_msg)
72 | 
73 | if args.mode == 'train':
74 | 	train(args.train_start, args.train_end, args.order_direction, args.V, args.H, oq, mq)
75 | elif args.mode == 'test':
76 | 	diff = args.diff * args.base_point
77 | 	action_func = action_wrapper(diff)
78 | 	rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, action_func)
79 | 	print(rewards)
80 | 	print(np.mean(rewards))


--------------------------------------------------------------------------------
/src/baseline_dp.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import numpy as np
  3 | import copy
  4 | 
  5 | from limit_order_book import Limit_Order_book
  6 | from message_queue import Message_Queue
  7 | from order_queue import Order_Queue
  8 | # from evaluate_policy import load_episodes, read_order_book
  9 | 
 10 | parser = argparse.ArgumentParser(description='Dynamic Programming Solution')
 11 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker')
 12 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int)
 13 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float)
 14 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float)
 15 | parser.add_argument('--base_point', default=100, help='Base Point', type=int)
 16 | parser.add_argument('--num', default= 3, help= 'The number of base points to go', type= int)
 17 | parser.add_argument('--H', default=600, help='Horizon', type=int)
 18 | parser.add_argument('--T', default=3, help='Time steps', type=int)
 19 | parser.add_argument('--V', default=8000, help='Amount to trade', type=int)
 20 | args = parser.parse_args()
 21 | 
 22 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic)
 23 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic)
 24 | 
 25 | def optimal(time,start, H, lob, mq, T, current_mid_price, V):
 26 | 	# step is how much we move at each time.
 27 | 	# H is the number of inventory to sell. 
 28 | 	# I is how many copies of inventory.
 29 | 	if time==(start+H): # This code force that (args.end-args.start) is a multiple of args.tol
 30 | 		if lob.own_amount_to_trade == 0:
 31 | 			return lob.own_reward
 32 | 		else:
 33 | 			lob.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE)
 34 | 			if lob.own_amount_to_trade > 0 and lob.own_trade_type == 1:
 35 | 				return -Limit_Order_book._DUMMY_VARIABLE
 36 | 			else:
 37 | 				return lob.own_reward
 38 | 	else:
 39 | 		# current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2
 40 | 		init_price = np.arange(current_mid_price-args.num*args.base_point, current_mid_price+args.num*args.base_point, args.base_point)
 41 | 		init_price = init_price[init_price > 0]
 42 | 		max_reward= -1.0*float('inf')
 43 | 		for i in range(len(init_price)):
 44 | 			# print ('At least this works')
 45 | 			lob_copy = copy.deepcopy(lob)
 46 | 			# print (int(init_price[i]))
 47 | 			lob_copy.update_own_order(int(init_price[i]), V)
 48 | 			mq.reset()
 49 | 			mq.jump_to_time(time)
 50 | 			for idx, message in mq.pop_to_next_time(time+H/T):
 51 | 				lob_copy.process(**message)
 52 | 				# print (lob_copy.own_reward)
 53 | 			if lob_copy.own_amount_to_trade == 0:
 54 | 				max_reward= max(max_reward,lob_copy.own_reward)
 55 | 				return max_reward
 56 | 			else:
 57 | 				max_reward= max(max_reward,lob_copy.own_reward+optimal(time+H//T,start,H,lob_copy,mq,T, current_mid_price, lob_copy.own_amount_to_trade))
 58 | 		# for i in range(len(init_price)):
 59 | 		# 	for j in range(I):
 60 | 		# 	# print ('At least this works')
 61 | 		# 		lob_copy = copy.deepcopy(lob)
 62 | 		# 		lob_copy.update_own_order(init_price[i], V/I*j)
 63 | 		# 		mq.reset()
 64 | 		# 		mq.jump_to_time(time)
 65 | 		# 		remaining= I-j+int(lob_copy.own_amount_to_trade/V*I)-1
 66 | 		# 		for idx, message in mq.pop_to_next_time(time+H/T):
 67 | 		# 			lob_copy.process(**message)
 68 | 		# 			print (lob_copy.own_reward)
 69 | 		# 		if lob_copy.own_amount_to_trade == 0:
 70 | 		# 			max_reward= max(max_reward,lob_copy.own_reward+optimal(time+H//T,start,H,lob_copy,mq,T,remaining,I))
 71 | 		# 			return max_reward
 72 | 		# 		else:
 73 | 		# 			max_reward= max(max_reward,lob_copy.own_reward+optimal(time+H//T,start,H,lob_copy,mq,T,remaining,I))
 74 | 		return max_reward
 75 | oq = Order_Queue(file_order)
 76 | mq = Message_Queue(file_msg)
 77 | def load_episodes(test_start, test_end, order_direction, H, oq, mq):
 78 | 	lob_data, time = read_order_book(test_start, test_end, H, oq, mq)
 79 | 	lob = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 
 80 | 					own_init_price=-order_direction*Limit_Order_book._DUMMY_VARIABLE,
 81 | 					own_trade_type=order_direction) for lob_data in lob_data]
 82 | 	return lob, time
 83 | 
 84 | 
 85 | def read_order_book(test_start, test_end, H, oq, mq):
 86 | 	"""
 87 | 	read the initial limit order book states from the file
 88 | 	"""
 89 | 	output = []
 90 | 	time_output = []
 91 | 	real_time = test_start
 92 | 	while real_time < test_end:
 93 | 		mq.reset()
 94 | 		output.append(oq.create_orderbook_time(real_time, mq))
 95 | 		time_output.append(real_time)
 96 | 		real_time = real_time + H
 97 | 	return output, time_output
 98 | 
 99 | episodes, real_times = load_episodes(args.test_start, args.test_end, args.order_direction, args.H, oq, mq)
100 | rewards= []
101 | for k in range(len(episodes)):
102 | 	episode = episodes[k] 
103 | 	# episode.own_amount_to_trade= args.V
104 | 	# print (episode.own_amount_to_trade)
105 | 	current_mid_price = episode.bid[0] + (episode.ask[0] - episode.bid[0]) // 2
106 | 	real_time = real_times[k]
107 | 	rewards.append(optimal(real_time,real_time, args.H,episode, mq, args.T, current_mid_price, args.V))
108 | print (np.mean(rewards))


--------------------------------------------------------------------------------
/src/test_book_queue.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from limit_order_book import Limit_Order_book
  5 | from message_queue import Message_Queue
  6 | 
  7 | def print_info(idx, msg=None, status='[FAIL]'):
  8 | 	if idx in [1]:
  9 | 		print('Execute Buy Order %s' %status)
 10 | 	elif idx in [48]:
 11 | 		print('Execute Sell Order %s' %status)
 12 | 	elif idx in [9]:
 13 | 		print('Add Buy Order %s' %status)
 14 | 	elif idx in [41]:
 15 | 		print('Add Sell Order %s' %status)
 16 | 	elif idx in [5]:
 17 | 		print('Execute Hidden Order %s' %status)
 18 | 	elif idx in [46]:
 19 | 		print('Delete Buy Order %s' %status)
 20 | 	elif idx in [47]:
 21 | 		print('Delete Sell Order %s' %status)
 22 | 
 23 | 	if status == '[FAIL]':
 24 | 		print('ERROR! idx %d msg %s' %(idx, str(msg)))
 25 | 
 26 | 
 27 | message_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_message_10.csv'
 28 | mq = Message_Queue(message_path)
 29 | 
 30 | book_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_orderbook_10.csv'
 31 | df_book = pd.read_csv(book_path, header=None)
 32 | level = 10
 33 | ask_book = df_book[np.arange(level)*4].values
 34 | ask_size_book = df_book[1+np.arange(level)*4].values
 35 | bid_book = df_book[2+np.arange(level)*4].values
 36 | bid_size_book = df_book[3+np.arange(level)*4].values
 37 | book = np.concatenate([tmp[:,:,np.newaxis] for tmp in [bid_book, bid_size_book, ask_book, ask_size_book]], axis=2)
 38 | 
 39 | for idx, message in mq.iterate_queue():
 40 | 	if idx == 0:
 41 | 		ask_book_init = ask_book[0]
 42 | 		ask_size_book_init = ask_size_book[0]
 43 | 		bid_book_init = bid_book[0]
 44 | 		bid_size_book_init = bid_size_book[0]
 45 | 		lob = Limit_Order_book(bid_book_init, bid_size_book_init, ask_book_init, ask_size_book_init)
 46 | 		snap_shot_book = book[idx]
 47 | 		assert(np.sum(snap_shot_book != lob.display_book(level))==0)
 48 | 		print('Initialize LOB [SUCCESS]')
 49 | 	else:
 50 | 		lob.process(**message)
 51 | 		snap_shot_book = book[idx]
 52 | 		try:
 53 | 			assert(np.sum(snap_shot_book != lob.display_book(level))==0)
 54 | 			print_info(idx, msg=message, status='[SUCCESS]')
 55 | 		except:
 56 | 			print_info(idx, msg=message, status='[FAIL]')
 57 | 		
 58 | 
 59 | 		if idx == 64:
 60 | 			print('\nFinished! Unable to compare due to invisible order in the book!')
 61 | 			print('Current LOB: ')
 62 | 			print(lob.display_book(16))
 63 | 			break
 64 | 
 65 | ### test own order
 66 | assert(lob.own_earlier_orders == np.sum(lob.ask_size[:-1]))
 67 | print('Initial Own Order [SUCCESS]')
 68 | 
 69 | lob.update_own_order(5800100)
 70 | assert(lob.own_earlier_orders == 3237)
 71 | assert(lob.own_amount_to_trade == 100)
 72 | print('Update Price To Price On LOB [SUCCESS]')
 73 | 
 74 | lob.update_own_order(5802000)
 75 | assert(lob.own_earlier_orders == 3247)
 76 | assert(lob.own_amount_to_trade == 100)
 77 | print('Update Price To Price Not On LOB [SUCCESS]')
 78 | 
 79 | lob.update_own_order(5797000)
 80 | assert(lob.own_earlier_orders == 0)
 81 | assert(lob.own_amount_to_trade == 100)
 82 | print('Update Price To Best Ask On LOB [SUCCESS]')
 83 | 
 84 | lob.process(1, 10, 5796900, -1)
 85 | assert(lob.own_earlier_orders == 10)
 86 | assert(lob.own_amount_to_trade == 100)
 87 | print('Insert An Sell Order With Better Ask On LOB [SUCCESS]')
 88 | 
 89 | lob.process(1, 11, 5797000, 1)
 90 | assert(lob.own_earlier_orders == 0)
 91 | assert(lob.own_amount_to_trade == 99)
 92 | print('Execute An Buy Order With Same Ask [SUCCESS]')
 93 | 
 94 | lob.update_own_order(5791900)
 95 | assert(lob.own_earlier_orders == 0)
 96 | assert(lob.own_amount_to_trade == 24)
 97 | print('Update Price To Best Bid [SUCCESS]')
 98 | 
 99 | lob.process(1, 1, 5791600, 1)
100 | lob.process(1, 1, 5791700, 1)
101 | lob.process(1, 1, 5791800, 1)
102 | assert(lob.own_earlier_orders == 0)
103 | assert(lob.own_amount_to_trade == 24)
104 | print('Insert Small Buy Orders With Better Bid On LOB [SUCCESS]')
105 | 
106 | lob.update_own_order(5791500)
107 | assert(lob.own_earlier_orders == 0)
108 | assert(lob.own_amount_to_trade == 21)
109 | print('Update Price To Execute 3 Small Buy Orders [SUCCESS]')
110 | 
111 | lob.process(1, 30, 5791500, -1)
112 | assert(lob.own_earlier_orders == 0)
113 | assert(lob.own_amount_to_trade == 21)
114 | print('Execute An Sell Order With Same Ask [SUCCESS]')
115 | 
116 | lob.process(3, 15, 5791500, -1)
117 | assert(lob.own_earlier_orders == 0)
118 | assert(lob.own_amount_to_trade == 21)
119 | print('Cancel Half Of Previous Order [SUCCESS]')
120 | 
121 | lob.update_own_order(5797900)
122 | assert(lob.own_earlier_orders == 225)
123 | assert(lob.own_amount_to_trade == 21)
124 | print('Update Price To Second Best Ask On LOB [SUCCESS]')
125 | 
126 | 
127 | lob.process(1, 9, 5797900, -1)
128 | assert(lob.own_earlier_orders == 225)
129 | assert(lob.own_amount_to_trade == 21)
130 | print('Insert An Sell Order With Second Best Ask On LOB [SUCCESS]')
131 | 
132 | lob.process(3, 215, 5797900, -1)
133 | assert(lob.own_earlier_orders == 15)
134 | assert(lob.own_amount_to_trade == 21)
135 | print('Cancel Sell Order With Second Best Ask On LOB [SUCCESS]')
136 | 
137 | lob.process(1, 45, 5799500, 1)
138 | assert(lob.own_earlier_orders == 0)
139 | assert(lob.own_amount_to_trade == 0)
140 | print('Insert Buy Orders [SUCCESS]')


--------------------------------------------------------------------------------
/src/baseline_SnL.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import numpy as np
  4 | 
  5 | from limit_order_book import Limit_Order_book
  6 | from message_queue import Message_Queue
  7 | 
  8 | parser = argparse.ArgumentParser(description='Dynamic Programming Solution')
  9 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker')
 10 | parser.add_argument('--order_size', default=1200, help='Order Size', type=int)
 11 | parser.add_argument('--order_direction', default=1, help='Buy 1, Sell -1', type=int)
 12 | parser.add_argument('--train_start', default=34201, help='Train Start Time', type=float)
 13 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float)
 14 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float)
 15 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float)
 16 | parser.add_argument('--H', default=600, help='Horizon', type=float)
 17 | parser.add_argument('--base_point', default=1000, help='Base Point', type=int)
 18 | parser.add_argument('--adj_freq', default=100, help='Adjustment Frequency', type=float)
 19 | parser.add_argument('--tol', default=100, help='Remaining Time To Submit Market Order', type=float)
 20 | parser.add_argument('--num', default= 10, help= 'The number of base points to go', type= int)
 21 | args = parser.parse_args()
 22 | # Use the train_start and train_end to find the best num. H: the total amount of time to execute the orders.
 23 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic)
 24 | 
 25 | mq = Message_Queue(file_msg)
 26 | lob = Limit_Order_book(own_amount_to_trade=args.order_size,
 27 | 					own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE,
 28 | 					own_trade_type=args.order_direction)
 29 | for idx, message in mq.pop_to_next_time(args.train_start):
 30 | 	lob.process(**message)
 31 | 
 32 | current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2
 33 | init_price = np.arange(current_mid_price-args.num*args.base_point, current_mid_price+args.num*args.base_point, args.base_point)
 34 | init_price = init_price[init_price > 0]
 35 | 
 36 | reward = np.zeros(init_price.shape)
 37 | 
 38 | for i in range(len(init_price)):
 39 | 	real_time= args.train_start
 40 | 	# print ('I am at %d now'%(i))
 41 | 	lob_copy = copy.deepcopy(lob)
 42 | 	lob_copy.update_own_order(init_price[i])
 43 | 	mq_copy = copy.deepcopy(mq)
 44 | 	num_count= 0
 45 | 	while real_time+args.H<args.train_end:
 46 | 		# print ('I have been here once')
 47 | 		real_time= real_time+args.H
 48 | 		#-args.tol: before ending, see how much we can sell.
 49 | 		for idx, message in mq_copy.pop_to_next_time(real_time):
 50 | 			lob_copy.process(**message)
 51 | 			# print ('I have been here once')
 52 | 			if lob_copy.own_amount_to_trade == 0:
 53 | 				break
 54 | 		current_reward= 0
 55 | 		# print (current_reward)
 56 | 		if lob_copy.own_amount_to_trade==0:
 57 | 			current_reward= lob_copy.own_reward
 58 | 		else:
 59 | 			lob_copy.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE)
 60 | 			# for idx, message in mq_copy.pop_to_next_time(real_time):
 61 | 			# 	lob_copy.process(**message)
 62 | 			# 	if lob_copy.own_amount_to_trade == 0:
 63 | 			# 		break
 64 | 			if lob_copy.own_amount_to_trade > 0 and args.order_direction==1:
 65 | 				current_reward = Limit_Order_book._DUMMY_VARIABLE * (-1)
 66 | 				print ('I have ever been here')
 67 | 			else:
 68 | 				current_reward = lob_copy.own_reward
 69 | 		# print (current_reward)
 70 | 		reward[i]= num_count/(num_count+1)*reward[i]+ 1/(num_count+1)*current_reward
 71 | 		num_count= num_count+1
 72 | if args.order_direction==1:
 73 | 	best_index= np.argmax(reward)
 74 | else:
 75 | 	best_index= np.argmax(reward)
 76 | print (reward)
 77 | # Now go to evaluate the test mode
 78 | mq = Message_Queue(file_msg)
 79 | lob = Limit_Order_book(own_amount_to_trade=args.order_size,
 80 | 					own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE,
 81 | 					own_trade_type=args.order_direction)
 82 | for idx, message in mq.pop_to_next_time(args.test_start):
 83 | 	lob.process(**message)
 84 | 
 85 | current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2
 86 | init_price= current_mid_price- args.num*args.base_point+ best_index*args.base_point
 87 | if init_price<0:
 88 | 	print ('This does not make sense at all')
 89 | 
 90 | reward_test= 0
 91 | real_time= args.test_start
 92 | num_count= 0
 93 | while real_time+args.H<args.test_end:
 94 | 	# print ('Hi, I have ever been in the for loop')
 95 | 	real_time= real_time+ args.H
 96 | 	for idx, message in mq_copy.pop_to_next_time(real_time):
 97 | 		lob_copy.process(**message)
 98 | 		if lob_copy.own_amount_to_trade == 0:
 99 | 			break
100 | 
101 | 	if lob_copy.own_amount_to_trade==0:
102 | 		current_reward= lob_copy.own_reward
103 | 	else:
104 | 		lob_copy.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE)
105 | 		# for idx, message in mq_copy.pop_to_next_time(real_time):
106 | 		# 	lob_copy.process(**message)
107 | 		# 	if lob_copy.own_amount_to_trade == 0:
108 | 		# 		break
109 | 		if lob_copy.own_amount_to_trade > 0 and args.order_direction== 1:
110 | 			current_reward = Limit_Order_book._DUMMY_VARIABLE * (-1)
111 | 			print ('Have I ever been here')
112 | 		else:
113 | 			current_reward = lob_copy.own_reward
114 | 	print (current_reward)
115 | 	reward_test= num_count/(num_count+1)*reward_test+ 1/(num_count+1)*current_reward
116 | 	num_count= num_count+1
117 | print (reward_test)
118 | print (best_index)
119 | exit()
120 | 
121 | 
122 | 
123 | 
124 | # for i in range(len(init_price)):
125 | # 	lob_copy = copy.deepcopy(lob)
126 | # 	lob_copy.update_own_order(init_price[i])
127 | # 	mq_copy = copy.deepcopy(mq)
128 | 
129 | # 	for idx, message in mq_copy.pop_to_next_time(args.end-args.tol):
130 | # 		lob_copy.process(**message)
131 | # 		if lob_copy.own_amount_to_trade == 0:
132 | # 			break
133 | 
134 | # 	if lob_copy.own_amount_to_trade == 0:
135 | # 		reward[i] = lob_copy.own_reward
136 | # 	else:
137 | # 		lob_copy.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE)
138 | # 		for idx, message in mq_copy.pop_to_next_time(args.end):
139 | # 			lob_copy.process(**message)
140 | # 			if lob_copy.own_amount_to_trade == 0:
141 | # 				break
142 | # 		if lob_copy.own_amount_to_trade > 0:
143 | # 			reward[i] = Limit_Order_book._DUMMY_VARIABLE * args.order_direction
144 | # 		else:
145 | # 			reward[i] = lob_copy.own_reward
146 | 
147 | # print(init_price)
148 | # print(max(reward))


--------------------------------------------------------------------------------
/src/dqn_model.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import tensorflow.contrib.layers as layers
  5 | import os
  6 | 
  7 | from config_AMZN import Config
  8 | from model_base import model
  9 | 
 10 | class DQN(model):
 11 | 	"""
 12 | 	Implement Neural Network with Tensorflow
 13 | 	"""
 14 | 	def add_placeholders_op(self):
 15 | 		state_shape = self._config.state_shape
 16 | 		state_history = self._config.state_history
 17 | 		# a state shape is (depth, 4)
 18 | 
 19 | 		# self.s_book: batch of book states, type = float32
 20 | 		# self.s_it: batch of inventory and time states, type = float32
 21 | 		# self.a: batch of actions, type = int32
 22 | 		# self.r: batch of rewards, type = float32
 23 | 		# self.sp_book: batch of next book states, type = float32
 24 | 		# self.sp_it: batch of next inventory and time states, type = float32
 25 | 		# self.done_mask: bath of done, type = bool
 26 | 		# self.lr: learning rate, type = float32
 27 | 
 28 | 		self.s_book = tf.placeholder(dtype=tf.float32, shape=[None, state_shape[0], state_shape[1], state_history])
 29 | 		self.s_it = tf.placeholder(dtype=tf.float32, shape=[None, 2])
 30 | 		self.a = tf.placeholder(dtype=tf.int32, shape=[None])
 31 | 		self.r = tf.placeholder(dtype=tf.float32, shape=[None])
 32 | 		self.sp_book = tf.placeholder(dtype=tf.float32, shape=[None, state_shape[0], state_shape[1], state_history])
 33 | 		self.sp_it = tf.placeholder(dtype=tf.float32, shape=[None, 2])
 34 | 		self.done_mask = tf.placeholder(dtype=tf.bool, shape=[None])
 35 | 		self.lr = tf.placeholder(dtype=tf.float32, shape=[])
 36 | 
 37 | 	def get_q_values_op(self, state, scope, reuse=False):
 38 | 		### Implement a Fully-Connected Network, replace with CNN later
 39 | 		num_actions = self._config.L + 1 # 1 for market order
 40 | 		state_book, state_it = state
 41 | 		with tf.variable_scope(scope, reuse=reuse):
 42 | 			state_book_flattened = layers.flatten(state_book)
 43 | 			state_out = tf.concat([state_book_flattened, state_it], axis=1)
 44 | 
 45 | 			out = layers.fully_connected(state_out, num_actions, activation_fn=None)
 46 | 		return out
 47 | 
 48 | 	def add_update_target_op(self, q_scope, target_q_scope):
 49 | 		q_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=q_scope)
 50 | 		target_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=target_q_scope)
 51 | 		self.update_target_op = tf.group(*[tf.assign(var1, var2) for var1, var2 in zip(target_var, q_var)])
 52 | 
 53 | 	def add_loss_op(self, q, target_q):
 54 | 		num_actions = self._config.L + 1 # 1 for market order
 55 | 		Q_samp = self.r + self._config.gamma * tf.reduce_max(target_q, axis=1) * (1 - tf.cast(self.done_mask, tf.float32))
 56 | 		Q_s_a = tf.reduce_sum(tf.one_hot(self.a, num_actions) * q, axis=1)
 57 | 		self.loss = tf.reduce_mean(tf.square(Q_samp - Q_s_a))
 58 | 
 59 | 	def add_optimizer_op(self, scope):
 60 | 		optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
 61 | 		var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
 62 | 		grads = optimizer.compute_gradients(self.loss, var_list=var_list)
 63 | 		if self._config.grad_clip:
 64 | 			grads = [(tf.clip_by_norm(grad, self._config.clip_val), var) for grad, var in grads]
 65 | 		self.train_op = optimizer.apply_gradients(grads)
 66 | 		self.grad_norm = tf.global_norm([grad for grad, _ in grads])
 67 | 
 68 | 	def build(self):
 69 | 		self.add_placeholders_op()
 70 | 
 71 | 		q_state = (self.s_book, self.s_it)
 72 | 		self.q = self.get_q_values_op(q_state, scope='q', reuse=False)
 73 | 
 74 | 		target_q_state = (self.sp_book, self.sp_it)
 75 | 		self.target_q = self.get_q_values_op(target_q_state, scope='target_q', reuse=False)
 76 | 
 77 | 		self.add_update_target_op('q','target_q')
 78 | 
 79 | 		self.add_loss_op(self.q, self.target_q)
 80 | 
 81 | 		self.add_optimizer_op('q')
 82 | 
 83 | 	def initialize(self):
 84 | 		self.sess = tf.Session()
 85 | 		self.saver = tf.train.Saver()
 86 | 		if self._config.mode == 'train':
 87 | 			self.sess.run(tf.global_variables_initializer())
 88 | 			print('running training mode')
 89 | 		elif self._config.mode == 'test':
 90 | 			self.saver.restore(self.sess, tf.train.latest_checkpoint(self._config.model_output))
 91 | 			print('running test mode')
 92 | 		self.sess.run(self.update_target_op)
 93 | 
 94 | 
 95 | 	def train(self):
 96 | 		self.sampling_buffer()
 97 | 
 98 | 		t = 0
 99 | 
100 | 		total_loss = 0
101 | 		while t < self._config.nsteps_train:
102 | 			t += 1
103 | 			self._lr_schedule.update(t)
104 | 			self._eps_schedule.update(t)
105 | 			loss_t = self.train_step(t, self._config.batch_size, self._lr_schedule.get_epsilon())
106 | 			total_loss += loss_t
107 | 			if t % self._config.print_freq == 0:
108 | 				sys.stdout.write('Iter {} \t Loss {} \n'.format(t, total_loss / t))
109 | 				sys.stdout.flush()
110 | 
111 | 	def train_step(self, t, batch_size, lr):
112 | 		states, it, states_p, it_p, actions, rewards, done_mask = self._bf.sample(batch_size)
113 | 		feed_dict = {self.s_book:states, self.s_it:it, self.sp_book:states_p, self.sp_it:it_p, 
114 | 			self.a:actions, self.r:rewards, self.done_mask:done_mask, self.lr:lr}
115 | 		loss_eval, _ = self.sess.run([self.loss, self.train_op], feed_dict=feed_dict)
116 | 
117 | 		if t % self._config.target_update_freq == 0:
118 | 			self.sess.run(self.update_target_op)
119 | 		if t % self._config.saving_freq == 0:
120 | 			print(self._config.model_output)
121 | 			if not os.path.exists(self._config.model_output):
122 | 				os.makedirs(self._config.model_output)
123 | 			self.saver.save(self.sess, save_path=os.path.join(self._config.model_output, 'model'))
124 | 		if t % self._config.simulation_freq == 0:
125 | 			self.sampling_buffer()
126 | 
127 | 		return loss_eval
128 | 
129 | 	def get_random_action(self, state):
130 | 		action = np.random.randint(self._config.L)
131 | 		q = self.get_q_values(state)[0]
132 | 		q_value = q_value = q[action]
133 | 		return (action, q_value)
134 | 
135 | 	def get_best_action(self, state):
136 | 		q = self.get_q_values(state)[0]
137 | 		action = np.argmax(q[:-1])
138 | 		q_value = q[action]
139 | 		return (action, q_value)
140 | 
141 | 	def get_q_values(self, state):
142 | 		state_book, state_it = state
143 | 		q, = self.sess.run([self.q], feed_dict={self.s_book:state_book, self.s_it:state_it})
144 | 		return q
145 | 	
146 | 	def get_best_action_fn(self):
147 | 		print('get best action fn')
148 | 		def action_fn(t, amount, state, mid_price):
149 | 			action = self.get_best_action(state)[0]
150 | 			price = (action-self._config.L//2) * self._config.base_point + mid_price
151 | 			return (price, action)
152 | 		return action_fn
153 | 
154 | if __name__ == '__main__':
155 | 	config = Config()
156 | 	model = DQN(config)
157 | 	model.initialize()
158 | 	model.train()
159 | 


--------------------------------------------------------------------------------
/src/model_base.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import sys
  3 | import numpy as np
  4 | 
  5 | from config import Config
  6 | from replay_buffer import ReplayBuffer
  7 | from schedule import LinearSchedule
  8 | from message_queue import Message_Queue
  9 | from order_queue import Order_Queue
 10 | from limit_order_book import Limit_Order_book
 11 | 
 12 | class model(object):
 13 | 	def __init__(self, config):
 14 | 		self._config = config
 15 | 		self._eps_schedule = LinearSchedule(
 16 | 			self._config.eps_begin,
 17 | 			self._config.eps_end,
 18 | 			self._config.nsteps)
 19 | 		self._lr_schedule = LinearSchedule(
 20 | 			self._config.lr_begin,
 21 | 			self._config.lr_end,
 22 | 			self._config.lr_nsteps)
 23 | 		self._oq = Order_Queue(self._config.order_path)
 24 | 		self._mq = Message_Queue(self._config.message_path)
 25 | 		self._bf = ReplayBuffer(1000000, config)
 26 | 
 27 | 		self._action_fn = self.get_action_fn()
 28 | 
 29 | 		self.build()
 30 | 
 31 | 	def build(self):
 32 | 		pass
 33 | 
 34 | 	def initialize(self):
 35 | 		pass
 36 | 
 37 | 	def get_random_action(self, state):
 38 | 		pass
 39 | 
 40 | 	def get_best_action(self, state):
 41 | 		### return action, q value
 42 | 		pass
 43 | 
 44 | 	def get_action(self, state):
 45 | 		if np.random.random() < self._eps_schedule.get_epsilon():
 46 | 			return self.get_random_action(state)[0]
 47 | 		else:
 48 | 			return self.get_best_action(state)[0]
 49 | 
 50 | 	def get_random_action_fn(self):
 51 | 		def random_action_fn(t, amount, state, mid_price):
 52 | 			action = np.random.randint(self._config.L) # action = L for market order
 53 | 			price = (action-self._config.L//2) * self._config.base_point + mid_price
 54 | 			return (price, action)
 55 | 		return random_action_fn
 56 | 
 57 | 	def get_action_fn(self):
 58 | 		def action_fn(t, amount, state, mid_price):
 59 | 			action = self.get_action(state)
 60 | 			price = (action-self._config.L//2) * self._config.base_point + mid_price
 61 | 			return (price, action)
 62 | 		return action_fn
 63 | 
 64 | 	def pad_state(self, states, state_history):
 65 | 		tmp_states, tmp_its = zip(*states)
 66 | 		tmp_state = np.concatenate([np.expand_dims(state, -1) for state in tmp_states], axis=-1)
 67 | 		tmp_state = np.pad(tmp_state, ((0,0),(0,0),(state_history-tmp_state.shape[-1],0)), 'constant', constant_values=0)
 68 | 		tmp_it = tmp_its[-1]
 69 | 		return ([tmp_state], [tmp_it])
 70 | 
 71 | 	def simulate_an_episode(self, amount, T, H, start_time, order_direction, action_fn, depth):
 72 | 		dH = H // T
 73 | 		self._mq.reset()
 74 | 		lob_data = self._oq.create_orderbook_time(start_time, self._mq)
 75 | 		lob = Limit_Order_book(**lob_data, own_amount_to_trade=0,
 76 | 			own_init_price=-order_direction*Limit_Order_book._DUMMY_VARIABLE,
 77 | 			own_trade_type=order_direction)
 78 | 		rewards = []
 79 | 		states = []
 80 | 		actions = []
 81 | 		done_mask = []
 82 | 		
 83 | 		amount_remain = amount
 84 | 		cum_reward = 0
 85 | 
 86 | 		for t in range(start_time, start_time+H-dH, dH):
 87 | 			tmp1 = 1.0 * amount_remain / amount # amount remain
 88 | 			tmp2 = 1.0 * (start_time + H - t) / H # time remain
 89 | 			state = (lob.display_book(depth), np.array([tmp1, tmp2], dtype=float))
 90 | 			state = self.process_state(state)
 91 | 			states.append(state)
 92 | 
 93 | 			mid_price = lob.get_mid_price()
 94 | 			state_input = self.pad_state(states[-self._config.state_history:], self._config.state_history)
 95 | 			price, action = action_fn(start_time+H-t, amount_remain, state_input, mid_price)
 96 | 			actions.append(action)
 97 | 			done_mask.append(False)
 98 | 
 99 | 			lob.update_own_order(price, amount_remain)
100 | 
101 | 			for idx, message in self._mq.pop_to_next_time(t+dH):
102 | 				lob.process(**message)
103 | 				if lob.own_amount_to_trade == 0:
104 | 					done_mask.append(True)
105 | 					state = (lob.display_book(depth), np.array([0, 1.0*(start_time+H-self._mq._time)/H], dtype=float))
106 | 					state = self.process_state(state)
107 | 					states.append(state)
108 | 					rewards.append(lob.own_reward - cum_reward)
109 | 					break
110 | 			if done_mask[-1]:
111 | 				break
112 | 			else:
113 | 				# What is going on over here?
114 | 				rewards.append(lob.own_reward - cum_reward)
115 | 				cum_reward = lob.own_reward
116 | 				amount_remain = lob.own_amount_to_trade
117 | 
118 | 		if not done_mask[-1]:
119 | 			tmp1 = 1.0 * amount_remain / amount
120 | 			tmp2 = 1.0 * (start_time + H - t - dH) / H
121 | 			state = (lob.display_book(depth), np.array([tmp1, tmp2], dtype=float))
122 | 			state = self.process_state(state)
123 | 			states.append(state)
124 | 			done_mask.append(False)
125 | 
126 | 			lob.update_own_order(lob.own_trade_type*Limit_Order_book._DUMMY_VARIABLE)
127 | 			if lob.own_amount_to_trade == 0:
128 | 				rewards.append(lob.own_reward - cum_reward)
129 | 			else:
130 | 				rewards.append(-Limit_Order_book._DUMMY_VARIABLE)
131 | 			tmp1 = 1.0 * lob.own_amount_to_trade / amount
132 | 			state = (lob.display_book(depth), np.array([tmp1, 0], dtype=float))
133 | 			state = self.process_state(state)
134 | 			states.append(state)
135 | 			actions.append(self._config.L)
136 | 			done_mask.append(True)
137 | 		return (states, rewards, actions, done_mask[1:])
138 | 
139 | 	def sampling_buffer(self):
140 | 		for start_time in range(self._config.train_start, self._config.train_end, self._config.H):
141 | 			states, rewards, actions, done_mask = self.simulate_an_episode(
142 | 				self._config.I, self._config.T, self._config.H, start_time, 
143 | 				self._config.direction, self._action_fn, self._config.depth)
144 | 			self._bf.store(states, actions, rewards, done_mask)
145 | 
146 | 	def process_state(self, state):
147 | 		state_book, state_it = state
148 | 		state_book = state_book.astype('float32')
149 | 		state_book[:,0] /= 1.e6
150 | 		state_book[:,1] /= 1.e2
151 | 		state_book[:,2] /= 1.e6
152 | 		state_book[:,3] /= 1.e2
153 | 		return (state_book, state_it)
154 | 
155 | if __name__ == '__main__':
156 | 	config = Config()
157 | 	m = model(config)
158 | 	states, rewards, actions, done_mask = m.simulate_an_episode(m._config.I, m._config.T, 
159 | 		m._config.H, m._config.train_start, m._config.direction,
160 | 		m.get_random_action_fn(), m._config.depth)
161 | 
162 | 	print(states)
163 | 	print(rewards)
164 | 	print(actions)
165 | 	print(done_mask)
166 | 
167 | 	bf = ReplayBuffer(3, config)
168 | 	bf.store(states, actions, rewards, done_mask)
169 | 	print(bf.states_stack)
170 | 	print(bf.its)
171 | 	print(bf.actions)
172 | 	print(bf.rewards)
173 | 	print(bf.done_mask)
174 | 
175 | 	# states, it, states_p, it_p, actions, rewards, done_mask = bf.sample(2)
176 | 	# print(states)
177 | 	# print(it)
178 | 	# print(states_p)
179 | 	# print(it_p)
180 | 	# print(actions)
181 | 	# print(rewards)
182 | 	# print(done_mask)
183 | 
184 | 


--------------------------------------------------------------------------------
/src/dp_policy.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import numpy as np
  3 | import copy
  4 | 
  5 | from limit_order_book import Limit_Order_book
  6 | from message_queue import Message_Queue
  7 | from order_queue import Order_Queue
  8 | from evaluate_policy import evaluate_policy
  9 | 
 10 | n_state = 2
 11 | states_len = [2, 3]
 12 | 
 13 | parser = argparse.ArgumentParser(description='Dynamic Programming Algorithm')
 14 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker')
 15 | parser.add_argument('--base_point', default=100, help='Base Point', type=int)
 16 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int)
 17 | parser.add_argument('--spread_cutoff', default=10.0, help='Cutoff for low bid-ask spread/high spread', type=float)
 18 | parser.add_argument('--train_start', default=34200, help='Train Start Time', type=float)
 19 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float)
 20 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float)
 21 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float)
 22 | parser.add_argument('--H', default=600, help='Horizon', type=int)
 23 | parser.add_argument('--T', default=20, help='Time steps', type=int)
 24 | parser.add_argument('--V', default=100, help='Amount to trade', type=int)
 25 | parser.add_argument('--I', default=10, help='Inventory Length', type=int)
 26 | parser.add_argument('--L', default=10, help='Action Length', type=int)
 27 | parser.add_argument('--mode', default='train', help='Mode: train or test')
 28 | args = parser.parse_args()
 29 | 
 30 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic)
 31 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic)
 32 | 
 33 | def Calculate_Q(V, H, T, I, L, oq, mq):
 34 | 	"""
 35 | 	Q is indexed by states and actions, where states include time_step T 
 36 | 	(need to calculate 0 to T, T+1 is left with 0s), inventory I, and 
 37 | 	limit order book states. Actions has dimension L
 38 | 	V is the total number of shares, I is the number of inventory units.
 39 | 	H is the total time left to sell all of the inventory. One period is H/T.
 40 | 	"""
 41 | 	Q = np.zeros((T + 2, I, states_len[0], states_len[1], L))
 42 | 	Q_counter = np.zeros((T + 2, I, states_len[0], states_len[1], L))
 43 | 	for t in np.arange(T, -1, -1):
 44 | 		time = H*(t/T)
 45 | 		next_time = time + H/T
 46 | 		"""
 47 | 		load_episodes will load the current orderbook at time H*(t/T)
 48 | 		and the orderbook at next time step H*(t+1)/T
 49 | 		"""
 50 | 		episodes, real_times = load_episodes(time, next_time, H, V, oq, mq)
 51 | 		for k in range(len(episodes)):
 52 | 			episode = episodes[k] 
 53 | 			real_time = real_times[k]
 54 | 			print(real_time)
 55 | 			episode_states = get_state(episode[0])
 56 | 			prices = generate_prices(episode[0], L)
 57 | 			for i in range(I):
 58 | 				for a in range(L):
 59 | 					a_price = prices[a]
 60 | 					if t == T:
 61 | 						episode_next_state = get_state(episode[0])
 62 | 						episode_next_i, im_reward = simulate(episode[0], int((i+1)*V/I) , a_price, real_time[0], real_time[0], mq)
 63 | 					else:
 64 | 						episode_next_state = get_state(episode[1])
 65 | 						episode_next_i, im_reward = simulate(episode[0], int((i+1)*V/I) , a_price, real_time[0], real_time[1], mq)
 66 | 
 67 | 					episode_next_i = int(episode_next_i/V*I)-1 # Have to change new order_size into inventory units.
 68 | 					max_Q = np.amax(Q[t+1, episode_next_i, episode_next_state[0], episode_next_state[1], :])
 69 | 					n = Q_counter[t, i, episode_states[0], episode_states[1], a]
 70 | 					Q_counter[t, i, episode_states[0], episode_states[1], a] += 1
 71 | 					Q[t, i, episode_states[0], episode_states[1], a] = n/(n+1) * Q[t, i, episode_states[0], episode_states[1], a] + 1/(n+1)*(im_reward+max_Q)
 72 | 	return Q
 73 | 
 74 | 
 75 | def Optimal_strategy(Q):
 76 | 	"""
 77 | 	return argmax of each Q along the last axis (action)
 78 | 	"""
 79 | 	return np.argmax(Q, axis=len(Q.shape)-1)
 80 | 
 81 | def Optimal_action(remaining_time, amount, lob_copy):
 82 | 	t = int((args.H - remaining_time) / args.H * args.T)
 83 | 	i = int(amount / args.V * args.I)-1
 84 | 	states = get_state(lob_copy)
 85 | 	action = Optimal_Q[t,i,states[0],states[1]]
 86 | 	prices = generate_prices(lob_copy, args.L)
 87 | 	return prices[action]
 88 | 
 89 | 
 90 | def load_episodes(time, next_time, H, V, oq, mq):
 91 | 	lob1_data, time_1 = read_order_book(time, H, oq, mq)
 92 | 	lob1 = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 
 93 | 					own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE,
 94 | 					own_trade_type=args.order_direction) for lob_data in lob1_data]
 95 | 
 96 | 	lob2_data, time_2 = read_order_book(next_time, H, oq, mq)
 97 | 	lob2 = [Limit_Order_book(**lob_data, own_amount_to_trade = 0,
 98 | 					own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE,
 99 | 					own_trade_type=args.order_direction) for lob_data in lob2_data]
100 | 	return list(zip(lob1, lob2)), list(zip(time_1, time_2))
101 | 
102 | 
103 | def read_order_book(time, H, oq, mq):
104 | 	"""
105 | 	read the initial limit order book states from the file
106 | 	"""
107 | 	output = []
108 | 	time_output = []
109 | 	real_time = args.train_start + time
110 | 	while real_time < args.train_end:
111 | 		mq.reset()
112 | 		output.append(oq.create_orderbook_time(real_time, mq))
113 | 		time_output.append(real_time)
114 | 		real_time= real_time + H
115 | 	return output, time_output
116 | 
117 | 
118 | def generate_prices(lob, L):
119 | 	"""
120 | 	generate a list of action prices based on current lob info
121 | 	"""
122 | 	if len(lob.ask) == 0:
123 | 		current_mid_price = lob.bid[0]
124 | 	elif len(lob.bid) == 0:
125 | 		current_mid_price = lob.ask[0]
126 | 	else:
127 | 		current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2
128 | 	return np.arange(current_mid_price-(L//2)*args.base_point, current_mid_price+(L-L//2)*args.base_point, args.base_point)
129 | 
130 | def get_state(lob):
131 | 	"""
132 | 	calculate states based on the limit order book
133 | 	State 1: bid-ask spread
134 | 	State 2: bid-ask volume misbalance
135 | 	"""
136 | 	if len(lob.ask) == 0:
137 | 		return [1, 1]
138 | 	elif len(lob.bid) == 0:
139 | 		return [1, -1]
140 | 	else:
141 | 		spread = (lob.ask[0] - lob.bid[0])/100.0
142 | 		state1 = 0 if spread < args.spread_cutoff else 1
143 | 		state2 = np.sign(lob.ask_size[0] - lob.bid_size[0])
144 | 		return [state1, state2]
145 | 
146 | def simulate(lob, amount, a_price, time, next_time, mq):
147 | 	"""
148 | 	simulate to next state, we need to calculate the remaining inventory given the current i and price a, and the immediate reward
149 | 	(revenue from the executed orders)
150 | 	"""
151 | 	mq.reset()
152 | 	mq.jump_to_time(time)
153 | 
154 | 	lob_copy = copy.deepcopy(lob)
155 | 	lob_copy.update_own_order(a_price, amount)
156 | 
157 | 	for idx, message in mq.pop_to_next_time(next_time):
158 | 		lob_copy.process(**message)
159 | 		if lob_copy.own_amount_to_trade == 0:
160 | 			break
161 | 
162 | 	return [lob_copy.own_amount_to_trade, lob_copy.own_reward]
163 | 
164 | path_target = '../data/%s_Q_dp_%s.npy' % (args.tic,args.V)
165 | oq = Order_Queue(file_order)
166 | mq = Message_Queue(file_msg)
167 | 
168 | if args.mode == 'train':
169 | 	np.save(path_target, Calculate_Q(args.V, args.H, args.T, args.I, args.L,oq,mq))
170 | elif args.mode == 'test':
171 | 	Q = np.load(path_target)
172 | 	Optimal_Q = Optimal_strategy(Q)
173 | 	rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, Optimal_action)
174 | 	print(rewards)
175 | 	print(np.mean(rewards))
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/src/limit_order_book.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class Limit_Order_book(object):
  4 |     """
  5 |     Abstract Class for Limit Order Book
  6 | 
  7 |     Initialize with the following information:
  8 |     1) Initial state of the Limit Order Book, including:
  9 |     	Bid prices (Descending order) and the corresponding sizes
 10 |     	Ask prices (Ascending order) and the corresponding sizes 
 11 |     2) Depth of the Limit order book
 12 |     3) Dummy prices to reach depth, dummy for ask price, negative for bid
 13 |     4) Our own amount of the stocks to trade
 14 |     5) The initial limit order price
 15 |     6) Our own trade type: -1 for sell, 1 for buy
 16 | 
 17 |     When we need to update our own limit order, use update_own_order(price)
 18 | 
 19 |     When get a new limit order from the message, use process(type, size, price, direction)
 20 |     """
 21 | 
 22 |     _DUMMY_VARIABLE = 9999999999
 23 | 
 24 |     def __init__(self, bid=np.empty((0,), dtype=int), 
 25 |                 bid_size=np.empty((0,), dtype=int),
 26 |                 ask=np.empty((0,), dtype=int),
 27 |                 ask_size=np.empty((0,), dtype=int),
 28 |                 own_amount_to_trade=100,
 29 |                 own_init_price=9999999999,
 30 |                 own_trade_type=-1):
 31 |         """
 32 |         Initializer for LOB
 33 |         """
 34 | 
 35 |         assert(len(bid)==len(bid_size))
 36 |         self.bid = bid
 37 |         self.bid_size = bid_size
 38 | 
 39 |         assert(len(ask)==len(ask_size))
 40 |         self.ask = ask
 41 |         self.ask_size = ask_size        
 42 | 
 43 |         #Initialize own order info
 44 |         self.init_own_order(own_amount_to_trade, own_init_price, own_trade_type)
 45 | 
 46 | 
 47 |     def init_own_order(self, own_amount_to_trade, own_init_price, own_trade_type):
 48 |         """
 49 |         Initializer for own order info
 50 |         """
 51 |         self.own_price = own_init_price
 52 |         self.own_amount_to_trade = own_amount_to_trade
 53 |         self.own_trade_type = own_trade_type
 54 | 
 55 |         self.own_reward = 0.0
 56 |         self.own_earlier_orders = 0 #Total number of limit orders before us, including same price but earlier orders
 57 | 
 58 |         #Add our own limit order to the LOB
 59 |         if self.own_amount_to_trade > 0:
 60 |             self.add_order(self.own_amount_to_trade, self.own_price, self.own_trade_type, own=True)
 61 | 
 62 | 
 63 |     def update_own_order(self, price, amount = None):
 64 |         """
 65 |         Helper to update our own order info, only need the new price
 66 |         """
 67 |         
 68 |         if price != self.own_price or ((amount is not None) and (amount !=self.own_amount_to_trade)): #Only need to update if different price
 69 |             if self.own_amount_to_trade > 0:
 70 |                 self.delete_order(self.own_amount_to_trade, self.own_price, self.own_trade_type, own=True)
 71 |             self.own_price = price
 72 |             if amount is not None:
 73 |                 self.own_amount_to_trade = amount
 74 |             if self.own_amount_to_trade > 0:
 75 |                 self.add_order(self.own_amount_to_trade, self.own_price, self.own_trade_type, own=True)
 76 | 
 77 | 
 78 |     def process(self, type, size, price, direction):
 79 |         """
 80 |         Process other limit order messages
 81 |         Type 1: new limit order
 82 |         Type 2 or 3: cancellation or deletion of a limit order, here we assume deleting the earliest ones of the same price
 83 |         Type 4: Execution of a visible limit order, equivalent to adding a new limit order of the opposite direction, and
 84 |         	then execute the matching
 85 |         Type 5: Execution of a hidden limit order, ignored since unobservable
 86 |         """
 87 |         if type == 1:
 88 |             self.add_order(size, price, direction, own=False)
 89 |         elif type == 2 or type == 3:
 90 |             self.delete_order(size, price, direction, own=False)
 91 |         elif type == 4:
 92 |             self.add_order(size, price, -direction, own=False)
 93 | 
 94 | 
 95 |     def add_order(self, size, price, direction, own=False):
 96 |         """
 97 |         Execute the matching first, and then insert the remaining ones
 98 |         """
 99 |         executed = self.partial_execution(size, price, direction, own)
100 |         if executed < size:
101 |             self.insert_order(size - executed, price, direction, own)
102 | 
103 |     def delete_order(self, size, price, direction, own=False, cancel=True):
104 |         """
105 |         Delete order from the LOB and update number of orders before our own order
106 |         """
107 |         if size <= 0:
108 |             return 0
109 | 
110 |         if direction == -1: #delete sell order, check ask
111 |             index = np.searchsorted(self.ask, price) #self.ask is in ascending order
112 |             if cancel and not own:
113 |                 try:
114 |                     assert(self.ask[index] == price)
115 |                     if self.own_price == price and direction == self.own_trade_type:
116 |                         assert(self.ask_size[index] - self.own_amount_to_trade >= size)
117 |                     else:
118 |                         assert(self.ask_size[index] >= size)
119 |                 except:
120 |                     return 0
121 |             else:
122 |                 assert(self.ask[index] == price)
123 |                 assert(self.ask_size[index] >= size)
124 | 
125 | 
126 |             if self.ask_size[index] == size: # have to remove the entry and add dummy if remove whole order
127 |                 self.ask = np.delete(self.ask, index)
128 |                 self.ask_size = np.delete(self.ask_size, index)
129 |             else:
130 |                 self.ask_size[index] -= size
131 | 
132 |             if price < self.own_price:
133 |                 self.own_earlier_orders -= size
134 |             elif price == self.own_price:
135 |                 if not own:
136 |                     #if same price as our own order, only remove the earlier ones
137 |                     else_executed = min(size, self.own_earlier_orders - np.sum(self.ask_size[:index]))
138 |                     self.own_earlier_orders -= else_executed
139 |                     if not cancel:
140 |                         own_executed = min(size - else_executed, self.own_amount_to_trade) 
141 |                         self.own_amount_to_trade -= own_executed
142 |                         return own_executed
143 | 
144 |         elif direction == 1: #delete buy order, check bid
145 |             index = self.bid.size - np.searchsorted(self.bid[::-1], price, side='right') #self.bid is in descending order
146 |             if cancel and not own:
147 |                 try:
148 |                     assert(self.bid[index] == price)
149 |                     if self.own_price == price and direction == self.own_trade_type:
150 |                         assert(self.bid_size[index] - self.own_amount_to_trade >= size)
151 |                     else:
152 |                         assert(self.bid_size[index] >= size)
153 |                 except:
154 |                     return 0
155 |             else:
156 |                 assert(self.bid[index] == price)
157 |                 assert(self.bid_size[index] >= size)
158 | 
159 |             if self.bid_size[index] == size:
160 |                 self.bid = np.delete(self.bid, index)
161 |                 self.bid_size = np.delete(self.bid_size, index)
162 |             else:
163 |                 self.bid_size[index] -= size
164 | 
165 |             if price > self.own_price:
166 |                 self.own_earlier_orders -= size
167 |             elif price == self.own_price:
168 |                 if not own:
169 |                     else_executed = min(size, self.own_earlier_orders - np.sum(self.bid_size[:index]))
170 |                     self.own_earlier_orders -= else_executed
171 |                     if not cancel:
172 |                         own_executed = min(size - else_executed, self.own_amount_to_trade) 
173 |                         self.own_amount_to_trade -= own_executed
174 |                         return own_executed
175 |         return 0
176 | 
177 |     def insert_order(self, size, price, direction, own=False):
178 |         """
179 |         Insert order to the LOB and update number of orders before our own order
180 |         """
181 |         if direction == -1: #insert sell order, check ask
182 |             index = np.searchsorted(self.ask, price)
183 |             extra = 0 #track number of existing same price ones
184 |             if index == len(self.ask) or self.ask[index] != price: #need to insert new entry
185 |                 self.ask = np.insert(self.ask, index, price)
186 |                 self.ask_size = np.insert(self.ask_size, index, size)
187 |             else:
188 |                 extra = self.ask_size[index]
189 |                 self.ask_size[index] += size
190 | 
191 |             if not own: #update number of earlier orders
192 |                 if direction == self.own_trade_type and price < self.own_price:
193 |                     self.own_earlier_orders += size
194 |             else: #calculate number of earlier orders
195 |                 self.own_earlier_orders = np.sum(self.ask_size[:index]) + extra
196 | 
197 |         elif direction == 1: #insert buy order, check bid
198 |             index = self.bid.size - np.searchsorted(self.bid[::-1], price, side='right')
199 |             extra = 0
200 |             if index == len(self.bid) or self.bid[index] != price:
201 |                 self.bid = np.insert(self.bid, index, price)
202 |                 self.bid_size = np.insert(self.bid_size, index, size)
203 |             else:
204 |                 extra = self.bid_size[index]
205 |                 self.bid_size[index] += size
206 |                 
207 |             if not own:
208 |                 if direction == self.own_trade_type and price > self.own_price:
209 |                     self.own_earlier_orders += size
210 |             else:
211 |                 self.own_earlier_orders = np.sum(self.bid_size[:index]) + extra
212 | 
213 | 
214 |     def partial_execution(self, size, price, direction, own=False):
215 |         """
216 |         Match the new order to the LOB and return executed number of orders
217 |         """
218 |         remaining = size #remaining number of orders to execute
219 |         order_reward = 0.0 #reward from executing this order
220 |         own_executed = 0
221 | 
222 |         if direction == -1: #sell order, check bid
223 |             while remaining > 0 and len(self.bid) > 0 and self.bid[0] >= price:
224 |                 to_execute = min(remaining, self.bid_size[0])
225 |                 order_reward += to_execute * self.bid[0]
226 |                 own_executed += self.delete_order(to_execute, self.bid[0], 1, own=own, cancel=False) #remove matched order
227 |                 remaining -= to_execute
228 | 
229 |         elif direction == 1: #buy order, check ask
230 |             while remaining > 0 and len(self.ask) > 0 and self.ask[0] <= price:
231 |                 to_execute = min(remaining, self.ask_size[0])
232 |                 order_reward -= to_execute * self.ask[0]
233 |                 own_executed += self.delete_order(to_execute, self.ask[0], -1, own=own, cancel=False)
234 |                 remaining -= to_execute
235 | 
236 |         executed = size - remaining
237 | 
238 |         if own:
239 |             self.own_amount_to_trade -= executed
240 |             self.own_reward += order_reward
241 |         elif direction != self.own_trade_type and executed > self.own_earlier_orders:
242 |         	#Calculate number of our own limit order that got executed
243 |             self.own_reward += self.own_price * own_executed * direction
244 | 
245 |         return executed
246 | 
247 |     def get_mid_price(self):
248 |         if len(self.bid) == 0:
249 |             if len(self.ask) == 0:
250 |                 return 0
251 |             else:
252 |                 return self.ask[0]
253 |         elif len(self.ask) == 0:
254 |             return self.bid[0]
255 | 
256 |         if self.bid[0] < self._DUMMY_VARIABLE and self.ask[0] > -self._DUMMY_VARIABLE:
257 |             return (self.ask[0] - self.bid[0]) // 2 + self.bid[0]
258 |         if self.bid[0] < self._DUMMY_VARIABLE:
259 |             return self.bid[0]
260 |         else:
261 |             return self.ask[0]
262 | 
263 |     def display_book(self, level):
264 |         bid = np.pad(self.bid, [0, max(0,level-self.bid.size)], 'constant', constant_values=-self._DUMMY_VARIABLE)[:level][:,np.newaxis]
265 |         bid_size = np.pad(self.bid_size, [0, max(0,level-self.bid_size.size)], 'constant', constant_values=0)[:level][:,np.newaxis]
266 |         ask = np.pad(self.ask, [0, max(0,level-self.ask.size)], 'constant', constant_values=self._DUMMY_VARIABLE)[:level][:,np.newaxis]
267 |         ask_size = np.pad(self.ask_size, [0, max(0,level-self.ask_size.size)], 'constant', constant_values=0)[:level][:,np.newaxis]
268 |         return np.concatenate([bid, bid_size, ask, ask_size], axis=1)


--------------------------------------------------------------------------------