├── README.md └── DQN_Trading ├── outputs └── TradingSystem_v0 │ ├── k=5 │ ├── results │ │ ├── test.jpg │ │ └── train.jpg │ └── models │ │ └── dqn_checkpoint.pth │ ├── k=10 │ ├── results │ │ ├── test.jpg │ │ └── train.jpg │ └── models │ │ └── dqn_checkpoint.pth │ ├── k=20 │ ├── results │ │ ├── test.jpg │ │ └── train.jpg │ └── models │ │ └── dqn_checkpoint.pth │ └── k=50 │ ├── results │ ├── test.jpg │ └── train.jpg │ └── models │ └── dqn_checkpoint.pth ├── trading_env.py ├── dqn.py └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # Algorithmic Trading with DQN 2 | 3 | This repository provides codes for implementing DQN model in algorithmic trading. 4 | -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=5/results/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=5/results/test.jpg -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=10/results/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=10/results/test.jpg -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=10/results/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=10/results/train.jpg -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=20/results/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=20/results/test.jpg -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=20/results/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=20/results/train.jpg -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=5/results/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=5/results/train.jpg -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=50/results/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=50/results/test.jpg -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=50/results/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=50/results/train.jpg -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=10/models/dqn_checkpoint.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=10/models/dqn_checkpoint.pth -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=20/models/dqn_checkpoint.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=20/models/dqn_checkpoint.pth -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=5/models/dqn_checkpoint.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=5/models/dqn_checkpoint.pth -------------------------------------------------------------------------------- /DQN_Trading/outputs/TradingSystem_v0/k=50/models/dqn_checkpoint.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Murraaa11/Algorithmic-Trading-with-DQN/HEAD/DQN_Trading/outputs/TradingSystem_v0/k=50/models/dqn_checkpoint.pth -------------------------------------------------------------------------------- /DQN_Trading/trading_env.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class TradingSystem_v0: 4 | def __init__(self, returns_data, k_value, mode): 5 | self.mode = mode # test or train 6 | self.index = 0 7 | self.data = returns_data 8 | self.tickers = list(returns_data.keys()) 9 | self.current_stock = self.tickers[self.index] 10 | self.r_ts = self.data[self.current_stock] 11 | self.k = k_value 12 | self.total_steps = len(self.r_ts) - self.k 13 | self.current_step = 0 14 | self.initial_state = tuple(self.r_ts[:self.k]) # Use tuple because it's immutable 15 | self.state = self.initial_state 16 | self.reward = 0.0 17 | self.is_terminal = False 18 | 19 | # write step function that returns obs(next state), reward, is_done 20 | def step(self, action): 21 | self.current_step += 1 22 | if self.current_step == self.total_steps: 23 | self.is_terminal = True 24 | self.reward = (action-1) * self.r_ts[self.current_step + self.k - 1] 25 | self.state = tuple(self.r_ts[self.current_step:(self.k + self.current_step)]) 26 | return self.state, self.reward, self.is_terminal 27 | 28 | def reset(self): 29 | if self.mode == 'train': 30 | self.current_stock = random.choice(self.tickers) # randomly pick a stock for every episode 31 | else: 32 | self.current_stock = self.tickers[self.index] 33 | self.index += 1 34 | self.r_ts = self.data[self.current_stock] 35 | self.total_steps = len(self.r_ts) - self.k 36 | self.current_step = 0 37 | self.initial_state = tuple(self.r_ts[:self.k]) 38 | self.state = self.initial_state 39 | self.reward = 0.0 40 | self.is_terminal = False 41 | return self.state 42 | 43 | -------------------------------------------------------------------------------- /DQN_Trading/dqn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | import random 6 | import math 7 | import numpy as np 8 | 9 | 10 | class MLP(nn.Module): 11 | def __init__(self, state_dim, action_dim, hidden_dim=128): 12 | super(MLP, self).__init__() 13 | self.fc1 = nn.Linear(state_dim, hidden_dim) # input layer 14 | self.fc2 = nn.Linear(hidden_dim, hidden_dim) # hidden layer 15 | self.fc3 = nn.Linear(hidden_dim, action_dim) # output layer 16 | 17 | def forward(self, x): 18 | # activation function 19 | x = F.relu(self.fc1(x)) 20 | x = F.relu(self.fc2(x)) 21 | return self.fc3(x) 22 | 23 | 24 | class ReplayBuffer: 25 | def __init__(self, capacity): 26 | self.capacity = capacity # capacity of buffer 27 | self.buffer = [] # replay buffer 28 | self.position = 0 29 | 30 | def push(self, state, action, reward, next_state, done): 31 | ''' replay buffer is a queue (LIFO) 32 | ''' 33 | if len(self.buffer) < self.capacity: 34 | self.buffer.append(None) 35 | self.buffer[self.position] = (state, action, reward, next_state, done) 36 | self.position = (self.position + 1) % self.capacity 37 | 38 | def sample(self, batch_size): 39 | batch = random.sample(self.buffer, batch_size) 40 | state, action, reward, next_state, done = zip(*batch) 41 | return state, action, reward, next_state, done 42 | 43 | def __len__(self): 44 | return len(self.buffer) 45 | 46 | 47 | class DQN: 48 | def __init__(self, state_dim, action_dim, cfg): 49 | 50 | self.action_dim = action_dim 51 | self.device = cfg.device # cpu or gpu 52 | self.gamma = cfg.gamma # discount factor 53 | self.frame_idx = 0 # attenuation 54 | self.epsilon = lambda frame_idx: cfg.epsilon_end + \ 55 | (cfg.epsilon_start - cfg.epsilon_end) * \ 56 | math.exp(-1. * frame_idx / cfg.epsilon_decay) 57 | self.batch_size = cfg.batch_size 58 | self.policy_net = MLP(state_dim, action_dim, hidden_dim=cfg.hidden_dim).to(self.device) 59 | self.target_net = MLP(state_dim, action_dim, hidden_dim=cfg.hidden_dim).to(self.device) 60 | for target_param, param in zip(self.target_net.parameters(), 61 | self.policy_net.parameters()): # copy parameters to target net 62 | target_param.data.copy_(param.data) 63 | self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # optimizer 64 | self.memory = ReplayBuffer(cfg.memory_capacity) # experience replay 65 | 66 | def choose_action(self, state): 67 | self.frame_idx += 1 68 | if random.random() > self.epsilon(self.frame_idx): 69 | with torch.no_grad(): 70 | state = torch.tensor([state], device=self.device, dtype=torch.float32) 71 | q_values = self.policy_net(state) 72 | action = q_values.max(1)[1].item() # choose the action with maximum q-value 73 | else: 74 | action = random.randrange(self.action_dim) 75 | return action 76 | 77 | def update(self): 78 | if len(self.memory) < self.batch_size: 79 | return 80 | state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample( 81 | self.batch_size) 82 | # transfer to tensor 83 | state_batch = torch.tensor(state_batch, device=self.device, dtype=torch.float) 84 | action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) 85 | reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float) 86 | next_state_batch = torch.tensor(next_state_batch, device=self.device, dtype=torch.float) 87 | done_batch = torch.tensor(np.float32(done_batch), device=self.device) 88 | q_values = self.policy_net(state_batch).gather(dim=1, index=action_batch) 89 | next_q_values = self.target_net(next_state_batch).max(1)[0].detach() 90 | # calculate the expected q-value, for final state, done_batch[0]=1 and the corresponding 91 | # expected_q_value equals to reward 92 | expected_q_values = reward_batch + self.gamma * next_q_values * (1 - done_batch) 93 | loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1)) 94 | # update the network 95 | self.optimizer.zero_grad() 96 | loss.backward() 97 | for param in self.policy_net.parameters(): # avoid gradient explosion by using clip 98 | param.grad.data.clamp_(-1, 1) 99 | self.optimizer.step() 100 | 101 | def save(self, path): 102 | torch.save(self.target_net.state_dict(), path + 'dqn_checkpoint.pth') 103 | 104 | def load(self, path): 105 | self.target_net.load_state_dict(torch.load(path + 'dqn_checkpoint.pth')) 106 | for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()): 107 | param.data.copy_(target_param.data) -------------------------------------------------------------------------------- /DQN_Trading/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | curr_path = os.path.dirname(__file__) 4 | 5 | import gym 6 | import torch 7 | import numpy as np 8 | import random 9 | import yfinance as yf 10 | import datetime as dt 11 | import pandas as pd 12 | import matplotlib.pyplot as plt 13 | from dqn import DQN 14 | from trading_env import TradingSystem_v0 15 | 16 | curr_time = dt.datetime.now().strftime("%Y%m%d-%H%M%S") 17 | random.seed(11) 18 | 19 | # Define the tickers and the time range 20 | start_date = dt.date(2020, 1, 1) 21 | end_date = dt.datetime.today().strftime ('%Y-%m-%d') 22 | train_tickers = ['ZM','TWTR','FB','MTCH','GOOG','PINS','SNAP','ETSY'] 23 | test_tickers = ['IAC','ZNGA','BMBL','SOCL'] 24 | 25 | 26 | class Config: 27 | ''' 28 | hyperparameters 29 | ''' 30 | 31 | def __init__(self): 32 | ################################## env hyperparameters ################################### 33 | self.algo_name = 'DQN' # algorithmic name 34 | self.env_name = 'TradingSystem_v0' # environment name 35 | self.device = torch.device( 36 | "cuda" if torch.cuda.is_available() else "cpu") # examine GPU 37 | self.seed = 11 # random seed 38 | self.train_eps = 200 # training episodes 39 | self.state_space_dim = 50 # state space size (K-value) 40 | self.action_space_dim = 3 # action space size (short: 0, neutral: 1, long: 2) 41 | ################################################################################ 42 | 43 | ################################## algo hyperparameters ################################### 44 | self.gamma = 0.95 # discount factor 45 | self.epsilon_start = 0.90 # start epsilon of e-greedy policy 46 | self.epsilon_end = 0.01 # end epsilon of e-greedy policy 47 | self.epsilon_decay = 500 # attenuation rate of epsilon in e-greedy policy 48 | self.lr = 0.0001 # learning rate 49 | self.memory_capacity = 1000 # capacity of experience replay 50 | self.batch_size = 64 # size of mini-batch SGD 51 | self.target_update = 4 # update frequency of target network 52 | self.hidden_dim = 128 # dimension of hidden layer 53 | ################################################################################ 54 | 55 | ################################# save path ############################## 56 | self.result_path = curr_path + "/outputs/" + self.env_name + \ 57 | '/' + curr_time + '/results/' 58 | self.model_path = curr_path + "/outputs/" + self.env_name + \ 59 | '/' + curr_time + '/models/' 60 | self.save = True # whether to save the image 61 | ################################################################################ 62 | 63 | 64 | def env_agent_config(data, cfg, mode): 65 | ''' create environment and agent 66 | ''' 67 | env = TradingSystem_v0(data, cfg.state_space_dim, mode) 68 | agent = DQN(cfg.state_space_dim, cfg.action_space_dim, cfg) 69 | if cfg.seed != 0: # set random seeds 70 | torch.manual_seed(cfg.seed) 71 | np.random.seed(cfg.seed) 72 | return env, agent 73 | 74 | 75 | def train(cfg, env, agent): 76 | ''' training 77 | ''' 78 | print('Start Training!') 79 | print(f'Environment:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}') 80 | rewards = [] # record total rewards 81 | ma_rewards = [] # record moving average total rewards 82 | for i_ep in range(cfg.train_eps): 83 | ep_reward = 0 84 | state = env.reset() 85 | while True: 86 | action = agent.choose_action(state) 87 | next_state, reward, done = env.step(action) 88 | agent.memory.push(state, action, reward, next_state, done) # save transition 89 | state = next_state 90 | agent.update() 91 | ep_reward += reward 92 | if done: 93 | break 94 | if (i_ep + 1) % cfg.target_update == 0: # update target network 95 | agent.target_net.load_state_dict(agent.policy_net.state_dict()) 96 | rewards.append(ep_reward) 97 | if ma_rewards: 98 | ma_rewards.append(0.9 * ma_rewards[-1] + 0.1 * ep_reward) 99 | else: 100 | ma_rewards.append(ep_reward) 101 | if (i_ep + 1) % 10 == 0: 102 | print('Episode:{}/{}, Reward:{}'.format(i_ep + 1, cfg.train_eps, ep_reward)) 103 | print('Finish Training!') 104 | return rewards, ma_rewards 105 | 106 | 107 | def test(cfg, env, agent): 108 | print('Start Testing!') 109 | print(f'Environment:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}') 110 | ############# Test does not use e-greedy policy, so we set epsilon to 0 ############### 111 | cfg.epsilon_start = 0.0 112 | cfg.epsilon_end = 0.0 113 | ################################################################################ 114 | stocks = env.tickers 115 | rewards = [] # record total rewards 116 | for i_ep in range(len(stocks)): 117 | ep_reward = 0 118 | state = env.reset() 119 | while True: 120 | action = agent.choose_action(state) 121 | next_state, reward, done = env.step(action) 122 | state = next_state 123 | ep_reward += reward 124 | if done: 125 | break 126 | rewards.append(ep_reward) 127 | print(f"Episode:{i_ep + 1}/{len(stocks)},Reward:{ep_reward:.1f}") 128 | print('Finish Testing!') 129 | return stocks, rewards 130 | 131 | 132 | if __name__ == "__main__": 133 | 134 | # download stock data from yahoo finance 135 | train_data = {} 136 | for ticker in train_tickers: 137 | data = yf.download(ticker, start_date, end_date) 138 | returns = data['Adj Close'].pct_change()[1:] 139 | train_data[ticker] = returns 140 | 141 | test_data = {} 142 | for ticker in test_tickers: 143 | data = yf.download(ticker, start_date, end_date) 144 | returns = data['Adj Close'].pct_change()[1:] 145 | test_data[ticker] = returns 146 | 147 | 148 | cfg = Config() 149 | # training 150 | env, agent = env_agent_config(train_data, cfg, 'train') 151 | rewards, ma_rewards = train(cfg, env, agent) 152 | os.makedirs(cfg.result_path) # create output folders 153 | os.makedirs(cfg.model_path) 154 | agent.save(path=cfg.model_path) # save model 155 | fig, ax = plt.subplots(1, 1, figsize=(10, 7)) # plot the training result 156 | ax.plot(list(range(1, cfg.train_eps+1)), rewards, color='blue', label='rewards') 157 | ax.plot(list(range(1, cfg.train_eps+1)), ma_rewards, color='green', label='ma_rewards') 158 | ax.legend() 159 | ax.set_xlabel('Episode') 160 | plt.savefig(cfg.result_path+'train.jpg') 161 | 162 | # testing 163 | all_data = {**train_data, **test_data} 164 | env, agent = env_agent_config(all_data, cfg, 'test') 165 | agent.load(path=cfg.model_path) # load model 166 | stocks, rewards = test(cfg, env, agent) 167 | buy_and_hold_rewards = [sum(all_data[stock]) for stock in stocks] 168 | fig, ax = plt.subplots(1, 1, figsize=(10, 7)) # plot the test result 169 | width = 0.3 170 | x = np.arange(len(stocks)) 171 | ax.bar(x, rewards, width=width, color='salmon', label='DQN') 172 | ax.bar(x+width, buy_and_hold_rewards, width=width, color='orchid', label='Buy and Hold') 173 | ax.set_xticks(x+width/2) 174 | ax.set_xticklabels(stocks, fontsize=12) 175 | ax.legend() 176 | plt.savefig(cfg.result_path+'test.jpg') 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | --------------------------------------------------------------------------------