├── .gitignore
├── Bot_code_and_models
    ├── .gitignore
    ├── Agent.py
    ├── Environment.py
    ├── Readme.md
    ├── hourly_aggregated_dataset.csv
    ├── main.py
    ├── models.py
    ├── profit_reward_double_ddqn_model
    ├── profit_reward_double_dqn_model
    ├── profit_reward_dqn_model
    ├── sr_reward_double_ddqn_model
    ├── sr_reward_double_dqn_model
    ├── sr_reward_dqn_model
    ├── train_test.py
    └── utils.py
├── LICENSE
├── Readme.md
└── report.pdf


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__/
3 | venv/
4 | 


--------------------------------------------------------------------------------
/Bot_code_and_models/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__/
3 | venv/
4 | 


--------------------------------------------------------------------------------
/Bot_code_and_models/Agent.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.optim as optim
  3 | import torch.nn.functional as F
  4 | from models import ConvDQN, ConvDuelingDQN
  5 | from utils import ReplayMemory
  6 | from utils import Transition
  7 | import random
  8 | from tqdm import tqdm
  9 | import re
 10 | import os
 11 | 
 12 | 
 13 | class Agent:
 14 |     """Definition of the Agent that will interact with the environment.
 15 | 
 16 |     Attributes:
 17 |         REPLAY_MEM_SIZE (:obj:`int`): max capacity of Replay Memory
 18 | 
 19 |         BATCH_SIZE (:obj:`int`): Batch size. Default is 40 as specified in the paper.
 20 | 
 21 |         GAMMA (:obj:`float`): The discount, should be a constant between 0 and 1
 22 |             that ensures the sum converges. It also controls the importance of future
 23 |             expected reward.
 24 | 
 25 |         EPS_START(:obj:`float`): initial value for epsilon of the e-greedy action
 26 |             selection
 27 | 
 28 |         EPS_END(:obj:`float`): final value for epsilon of the e-greedy action
 29 |             selection
 30 | 
 31 |         LEARNING_RATE(:obj:`float`): learning rate of the optimizer
 32 |             (Adam)
 33 | 
 34 |         INPUT_DIM (:obj:`int`): input dimentionality withut considering batch size.
 35 | 
 36 |         HIDDEN_DIM (:obj:`int`): hidden layer dimentionality (for Linear models only)
 37 | 
 38 |         ACTION_NUMBER (:obj:`int`): dimentionality of output layer of the Q network
 39 | 
 40 |         TARGET_UPDATE (:obj:`int`): period of Q target network updates
 41 | 
 42 |         MODEL (:obj:`string`): type of the model.
 43 | 
 44 |         DOUBLE (:obj:`bool`): Type of Q function computation.
 45 |     """
 46 | 
 47 |     def __init__(self,
 48 |                  REPLAY_MEM_SIZE=10000,
 49 |                  BATCH_SIZE=40,
 50 |                  GAMMA=0.98,
 51 |                  EPS_START=1,
 52 |                  EPS_END=0.12,
 53 |                  EPS_STEPS=300,
 54 |                  LEARNING_RATE=0.001,
 55 |                  INPUT_DIM=24,
 56 |                  HIDDEN_DIM=120,
 57 |                  ACTION_NUMBER=3,
 58 |                  TARGET_UPDATE=10,
 59 |                  MODEL='ddqn',
 60 |                  DOUBLE=True):
 61 | 
 62 |         self.REPLAY_MEM_SIZE = REPLAY_MEM_SIZE
 63 |         self.BATCH_SIZE = BATCH_SIZE
 64 |         self.GAMMA = GAMMA
 65 |         self.EPS_START = EPS_START
 66 |         self.EPS_END = EPS_END
 67 |         self.EPS_STEPS = EPS_STEPS
 68 |         self.LEARNING_RATE = LEARNING_RATE
 69 |         self.INPUT_DIM = INPUT_DIM
 70 |         self.HIDDEN_DIM = HIDDEN_DIM
 71 |         self.ACTION_NUMBER = ACTION_NUMBER
 72 |         self.TARGET_UPDATE = TARGET_UPDATE
 73 |         self.MODEL = MODEL  # deep q network (dqn) or Dueling deep q network (ddqn)
 74 |         self.DOUBLE = DOUBLE  # to understand if use or do not use a 'Double' model (regularization)
 75 |         self.TRAINING = True  # to do not pick random actions during testing
 76 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 77 |         print("Agent is using device:\t" + str(self.device))
 78 |         '''elif self.MODEL == 'lin_ddqn':
 79 |             self.policy_net = DuelingDQN(self.INPUT_DIM, self.HIDDEN_DIM, self.ACTION_NUMBER).to(self.device)
 80 |             self.target_net = DuelingDQN(self.INPUT_DIM, self.HIDDEN_DIM, self.ACTION_NUMBER).to(self.device)
 81 |         elif self.MODEL == 'lin_dqn':
 82 |             self.policy_net = DQN(self.INPUT_DIM, self.HIDDEN_DIM, self.ACTION_NUMBER).to(self.device)
 83 |             self.target_net = DQN(self.INPUT_DIM, self.HIDDEN_DIM, self.ACTION_NUMBER).to(self.device)
 84 |         '''
 85 | 
 86 |         if self.MODEL == 'ddqn':
 87 |             self.policy_net = ConvDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
 88 |             self.target_net = ConvDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
 89 |         elif self.MODEL == 'dqn':
 90 |             self.policy_net = ConvDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
 91 |             self.target_net = ConvDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
 92 | 
 93 |         self.target_net.load_state_dict(self.policy_net.state_dict())
 94 |         self.target_net.eval()
 95 | 
 96 |         self.optimizer = optim.Adam(self.policy_net.parameters(), lr=self.LEARNING_RATE)
 97 |         self.memory = ReplayMemory(self.REPLAY_MEM_SIZE)
 98 |         self.steps_done = 0
 99 |         self.training_cumulative_reward = []
100 | 
101 |     def select_action(self, state):
102 |         """ the epsilon-greedy action selection"""
103 |         state = state.unsqueeze(0).unsqueeze(1)
104 |         sample = random.random()
105 |         if self.TRAINING:
106 |             if self.steps_done > self.EPS_STEPS:
107 |                 eps_threshold = self.EPS_END
108 |             else:
109 |                 eps_threshold = self.EPS_START
110 |         else:
111 |             eps_threshold = self.EPS_END
112 | 
113 |         self.steps_done += 1
114 |         # [Exploitation] pick the best action according to current Q approx.
115 |         if sample > eps_threshold:
116 |             with torch.no_grad():
117 |                 # Return the number of the action with highest non normalized probability
118 |                 # TODO: decide if diverge from paper and normalize probabilities with
119 |                 # softmax or at least compare the architectures
120 |                 return torch.tensor([self.policy_net(state).argmax()], device=self.device, dtype=torch.long)
121 | 
122 |         # [Exploration]  pick a random action from the action space
123 |         else:
124 |             return torch.tensor([random.randrange(self.ACTION_NUMBER)], device=self.device, dtype=torch.long)
125 | 
126 |     def optimize_model(self):
127 |         if len(self.memory) < self.BATCH_SIZE:
128 |             # it will return without doing nothing if we have not enough data to sample
129 |             return
130 |         transitions = self.memory.sample(self.BATCH_SIZE)
131 |         # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
132 |         # detailed explanation). This converts batch-array of Transitions
133 |         # to Transition of batch-arrays.
134 |         # Transition is the named tuple defined above.
135 |         batch = Transition(*zip(*transitions))
136 | 
137 |         # Compute a mask of non-final states and concatenate the batch elements
138 |         # (a final state would've been the one after which simulation ended)
139 |         #
140 |         # non_final_mask is a column vector telling wich state of the sampled is final
141 |         # non_final_next_states contains all the non-final states sampled
142 |         non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=self.device,
143 |                                       dtype=torch.bool)
144 |         nfns = [s for s in batch.next_state if s is not None]
145 |         non_final_next_states = torch.cat(nfns).view(len(nfns), -1)
146 |         non_final_next_states = non_final_next_states.unsqueeze(1)
147 | 
148 |         state_batch = torch.cat(batch.state).view(self.BATCH_SIZE, -1)
149 |         state_batch = state_batch.unsqueeze(1)
150 |         action_batch = torch.cat(batch.action).view(self.BATCH_SIZE, -1)
151 |         reward_batch = torch.cat(batch.reward).view(self.BATCH_SIZE, -1)
152 | 
153 |         # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
154 |         # columns of actions taken. These are the actions which would've been taken
155 |         # for each batch state according to policy_net
156 |         state_action_values = self.policy_net(state_batch).gather(1, action_batch)
157 | 
158 |         # Compute V(s_{t+1}) for all next states.
159 |         # Expected values of actions for non_final_next_states are computed based
160 |         # on the "older" target_net; selecting their best reward with max(1)[0].
161 |         # This is merged based on the mask, such that we'll have either the expected
162 |         # state value or 0 in case the state was final.
163 |         # detach removes the tensor from the graph -> no gradient computation is
164 |         # required
165 |         next_state_values = torch.zeros(self.BATCH_SIZE, device=self.device)
166 |         next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach()
167 |         next_state_values = next_state_values.view(self.BATCH_SIZE, -1)
168 | 
169 |         # Compute the expected Q values
170 |         expected_state_action_values = (next_state_values * self.GAMMA) + reward_batch
171 |         # print("expected_state_action_values.shape:\t%s"%str(expected_state_action_values.shape))
172 | 
173 |         # Compute MSE loss
174 |         loss = F.mse_loss(state_action_values,
175 |                           expected_state_action_values)  # expected_state_action_values.unsqueeze(1)
176 | 
177 |         # Optimize the model
178 |         self.optimizer.zero_grad()
179 |         loss.backward()
180 |         for param in self.policy_net.parameters():
181 |             param.grad.data.clamp_(-1, 1)
182 |         self.optimizer.step()
183 | 
184 |     def optimize_double_dqn_model(self):
185 |         if len(self.memory) < self.BATCH_SIZE:
186 |             # it will return without doing nothing if we have not enough data to sample
187 |             return
188 |         transitions = self.memory.sample(self.BATCH_SIZE)
189 |         # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
190 |         # detailed explanation). This converts batch-array of Transitions
191 |         # to Transition of batch-arrays.
192 |         # Transition is the named tuple defined above.
193 |         batch = Transition(*zip(*transitions))
194 | 
195 |         # Compute a mask of non-final states and concatenate the batch elements
196 |         # (a final state would've been the one after which simulation ended)
197 |         #
198 |         # non_final_mask is a column vector telling wich state of the sampled is final
199 |         # non_final_next_states contains all the non-final states sampled
200 |         non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=self.device,
201 |                                       dtype=torch.bool)
202 |         nfns = [s for s in batch.next_state if s is not None]
203 |         non_final_next_states = torch.cat(nfns).view(len(nfns), -1)
204 |         non_final_next_states = non_final_next_states.unsqueeze(1)
205 | 
206 |         state_batch = torch.cat(batch.state).view(self.BATCH_SIZE, -1)
207 |         state_batch = state_batch.unsqueeze(1)
208 |         action_batch = torch.cat(batch.action).view(self.BATCH_SIZE, -1)
209 |         reward_batch = torch.cat(batch.reward).view(self.BATCH_SIZE, -1)
210 |         # print("state_batch shape: %s\nstate_batch[0]:%s\nactionbatch shape: %s\nreward_batch shape: %s"%(str(state_batch.view(40,-1).shape),str(state_batch.view(40,-1)[0]),str(action_batch.shape),str(reward_batch.shape)))
211 | 
212 |         # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
213 |         # columns of actions taken. These are the actions which would've been taken
214 |         # for each batch state according to policy_net
215 |         state_action_values = self.policy_net(state_batch).gather(1, action_batch)
216 | 
217 |         # ---------- D-DQN Extra Line---------------
218 |         _, next_state_action = self.policy_net(state_batch).max(1, keepdim=True)
219 | 
220 |         # Compute V(s_{t+1}) for all next states.
221 |         # Expected values of actions for non_final_next_states are computed based
222 |         # on the actions given by policynet.
223 |         # This is merged based on the mask, such that we'll have either the expected
224 |         # state value or 0 in case the state was final.
225 |         # detach removes the tensor from the graph -> no gradient computation is
226 |         # required
227 |         next_state_values = torch.zeros(self.BATCH_SIZE, device=self.device).view(self.BATCH_SIZE, -1)
228 | 
229 |         out = self.target_net(non_final_next_states)
230 |         next_state_values[non_final_mask] = out.gather(1, next_state_action[non_final_mask])
231 |         # next_state_values = next_state_values.view(self.BATCH_SIZE, -1)
232 |         # Compute the expected Q values
233 |         expected_state_action_values = (next_state_values * self.GAMMA) + reward_batch
234 | 
235 |         # Compute MSE loss
236 |         loss = F.mse_loss(state_action_values, expected_state_action_values)
237 | 
238 |         # Optimize the model
239 |         self.optimizer.zero_grad()
240 |         loss.backward()
241 |         for param in self.policy_net.parameters():
242 |             param.grad.data.clamp_(-1, 1)
243 |         self.optimizer.step()
244 | 
245 |     def train(self, env, path, num_episodes=40):
246 |         self.TRAINING = True
247 |         cumulative_reward = [0 for t in range(num_episodes)]
248 |         print("Training:")
249 |         for i_episode in tqdm(range(num_episodes)):
250 |             # Initialize the environment and state
251 |             env.reset()  # reset the env st it is set at the beginning of the time serie
252 |             self.steps_done = 0
253 |             state = env.get_state()
254 |             for t in range(len(env.data)):  # while not env.done
255 | 
256 |                 # Select and perform an action
257 |                 action = self.select_action(state)
258 |                 reward, done, _ = env.step(action)
259 | 
260 |                 cumulative_reward[i_episode] += reward.item()
261 | 
262 |                 # Observe new state: it will be None if env.done = True. It is the next
263 |                 # state since env.step() has been called two rows above.
264 |                 next_state = env.get_state()
265 | 
266 |                 # Store the transition in memory
267 |                 self.memory.push(state, action, next_state, reward)
268 | 
269 |                 # Move to the next state
270 |                 state = next_state
271 | 
272 |                 # Perform one step of the optimization (on the policy network): note that
273 |                 # it will return without doing nothing if we have not enough data to sample
274 | 
275 |                 if self.DOUBLE:
276 |                     self.optimize_double_dqn_model()
277 |                 else:
278 |                     self.optimize_model()
279 | 
280 |                 if done:
281 |                     break
282 | 
283 |             # Update the target network, copying all weights and biases of policy_net
284 |             if i_episode % self.TARGET_UPDATE == 0:
285 |                 self.target_net.load_state_dict(self.policy_net.state_dict())
286 | 
287 |         # save the model
288 |         if self.DOUBLE:
289 |             model_name = env.reward_f + '_reward_double_' + self.MODEL + '_model'
290 |             count = 0
291 |             while os.path.exists(path + model_name):  # avoid overrinding models
292 |                 count += 1
293 |                 model_name = model_name + "_" + str(count)
294 | 
295 |         else:
296 |             model_name = env.reward_f + '_reward_' + self.MODEL + '_model'
297 |             count = 0
298 |             while os.path.exists(path + model_name):  # avoid overrinding models
299 |                 count += 1
300 |                 model_name = model_name + "_" + str(count)
301 | 
302 |         torch.save(self.policy_net.state_dict(), path + model_name)
303 | 
304 |         return cumulative_reward
305 | 
306 |     def test(self, env_test, model_name=None, path=None):
307 |         self.TRAINING = False
308 |         cumulative_reward = [0 for t in range(len(env_test.data))]
309 |         reward_list = [0 for t in range(len(env_test.data))]
310 | 
311 |         if model_name is None:
312 |             pass
313 |         elif path is not None:
314 |             if re.match(".*_dqn_.*", model_name):
315 |                 self.policy_net = ConvDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
316 |                 if str(self.device) == "cuda":
317 |                     self.policy_net.load_state_dict(torch.load(path + model_name))
318 |                 else:
319 |                     self.policy_net.load_state_dict(torch.load(path + model_name, map_location=torch.device('cpu')))
320 |             elif re.match(".*_ddqn_.*", model_name):
321 |                 self.policy_net = ConvDuelingDQN(self.INPUT_DIM, self.ACTION_NUMBER).to(self.device)
322 |                 if str(self.device) == "cuda":
323 |                     self.policy_net.load_state_dict(torch.load(path + model_name))
324 |                 else:
325 |                     self.policy_net.load_state_dict(torch.load(path + model_name, map_location=torch.device('cpu')))
326 |             else:
327 |                 raise RuntimeError("Please Provide a valid model name or valid path.")
328 |         else:
329 |             raise RuntimeError('Path can not be None if model Name is not None.')
330 | 
331 |         env_test.reset()  # reset the env st it is set at the beginning of the time serie
332 |         state = env_test.get_state()
333 |         for t in tqdm(range(len(env_test.data))):  # while not env.done
334 | 
335 |             # Select and perform an action
336 |             action = self.select_action(state)
337 | 
338 |             reward, done, _ = env_test.step(action)
339 | 
340 |             cumulative_reward[t] += reward.item() + cumulative_reward[t - 1 if t - 1 > 0 else 0]
341 |             reward_list[t] = reward
342 | 
343 |             # Observe new state: it will be None if env.done = True. It is the next
344 |             # state since env.step() has been called two rows above.
345 |             next_state = env_test.get_state()
346 | 
347 |             # Move to the next state
348 |             state = next_state
349 | 
350 |             if done:
351 |                 break
352 | 
353 |         return cumulative_reward, reward_list
354 | 


--------------------------------------------------------------------------------
/Bot_code_and_models/Environment.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | 
  5 | # TODO: modify the reward st. we can choose between sharpe ratio reward or profit
  6 | # reward as shown in the paper.
  7 | class Environment:
  8 |     """Definition of the trading environment for the DQN-Agent.
  9 | 
 10 |     Attributes:
 11 |         data (pandas.DataFrame): Time serie to be considered within the environment.
 12 | 
 13 |         t (:obj:`int`): Current time instant we are considering.
 14 | 
 15 |         profits (:obj:`float`): profit of the agent at time self.t
 16 | 
 17 |         agent_positions(:obj:`list` :obj:`float`): list of the positions
 18 |            currently owned by the agent.
 19 | 
 20 |         agent_position_value(:obj:`float`): current value of open positions
 21 |            (positions in self.agent_positions)
 22 | 
 23 |         cumulative_return(:obj:`list` :obj:`float`): econometric measure of profit
 24 |             during time
 25 | 
 26 |         init_price(:obj:`float`): the price of stocks at the beginning of trading
 27 |             period.
 28 |     """
 29 | 
 30 |     def __init__(self, data, reward):
 31 |         """
 32 |         Creates the environment. Note: Before using the environment you must call
 33 |         the Environment.reset() method.
 34 | 
 35 |         Args:
 36 |            data (:obj:`pd.DataFrane`): Time serie to be initialize the environment.
 37 |            reward (:obj:`str`): Type of reward function to use, either sharpe ratio
 38 |               "sr" or profit function "profit"
 39 |         """
 40 |         self.data = data
 41 |         self.reward_f = reward if reward == "sr" else "profit"
 42 |         self.reset()
 43 | 
 44 |     def reset(self):
 45 |         """
 46 |         Reset the environment or makes a further step of initialization if called
 47 |         on an environment never used before. It must always be called before .step()
 48 |         method to avoid errors.
 49 |         """
 50 |         self.t = 23
 51 |         self.done = False
 52 |         self.profits = [0 for e in range(len(self.data))]
 53 |         self.agent_positions = []
 54 |         self.agent_open_position_value = 0
 55 | 
 56 |         self.cumulative_return = [0 for e in range(len(self.data))]
 57 |         self.init_price = self.data.iloc[0, :]['Close']
 58 | 
 59 |     def get_state(self):
 60 |         """
 61 |             Return the current state of the environment. NOTE: if called after
 62 |             Environment.step() it will return the next state.
 63 |         """
 64 | 
 65 |         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 66 |         if not self.done:
 67 |             return torch.tensor([el for el in self.data.iloc[self.t - 23:self.t + 1, :]['Close']], device=device,
 68 |                                 dtype=torch.float)
 69 |         else:
 70 |             return None
 71 | 
 72 |     def step(self, act):
 73 |         """
 74 |         Perform the action of the Agent on the environment, computes the reward
 75 |         and update some datastructures to keep track of some econometric indexes
 76 |         during time.
 77 | 
 78 |         Args:
 79 |            act (:obj:`int`): Action to be performed on the environment.
 80 | 
 81 |         Returns:
 82 |             reward (:obj:`torch.tensor` :dtype:`torch.float`): the reward of
 83 |                 performing the action on the current env state.
 84 |             self.done (:obj:`bool`): A boolean flag telling if we are in a final
 85 |                 state
 86 |             current_state (:obj:`torch.tensor` :dtype:`torch.float`):
 87 |                 the state of the environment after the action execution.
 88 |         """
 89 |         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 90 | 
 91 |         reward = 0
 92 |         # GET CURRENT STATE
 93 |         state = self.data.iloc[self.t, :]['Close']
 94 | 
 95 |         # EXECUTE THE ACTION (act = 0: stay, 1: buy, 2: sell)
 96 |         if act == 0:  # Do Nothing
 97 |             pass
 98 | 
 99 |         if act == 1:  # Buy
100 |             self.agent_positions.append(self.data.iloc[self.t, :]['Close'])
101 | 
102 |         sell_nothing = False
103 |         if act == 2:  # Sell
104 |             profits = 0
105 |             if len(self.agent_positions) < 1:
106 |                 sell_nothing = True
107 |             for position in self.agent_positions:
108 |                 profits += (self.data.iloc[self.t, :][
109 |                                 'Close'] - position)  # profit = close - my_position for each my_position "p"
110 | 
111 |             self.profits[self.t] = profits
112 |             self.agent_positions = []
113 |             # reward += profits
114 | 
115 |         self.agent_open_position_value = 0
116 |         for position in self.agent_positions:
117 |             self.agent_open_position_value += (self.data.iloc[self.t, :]['Close'] - position)
118 |             # TO CHECK if the calculus is correct according to the definition
119 |             self.cumulative_return[self.t] += (position - self.init_price) / self.init_price
120 | 
121 |         # COLLECT THE REWARD
122 |         reward = 0
123 |         if self.reward_f == "sr":
124 |             sr = self.agent_open_position_value / np.std(np.array(self.data.iloc[0:self.t]['Close'])) if np.std(
125 |                 np.array(self.data.iloc[0:self.t]['Close'])) != 0 else 0
126 |             # sr = self.profits[self.t] / np.std(np.array(self.profits))
127 |             if sr <= -4:
128 |                 reward = -10
129 |             elif sr < -1:
130 |                 reward = -4
131 |             elif sr < 0:
132 |                 reward = -1
133 |             elif sr == 0:
134 |                 reward = 0
135 |             elif sr <= 1:
136 |                 reward = 1
137 |             elif sr < 4:
138 |                 reward = 4
139 |             else:
140 |                 reward = 10
141 | 
142 |         if self.reward_f == "profit":
143 |             p = self.profits[self.t]
144 |             if p > 0:
145 |                 reward = 1
146 |             elif p < 0:
147 |                 reward = -1
148 |             elif p == 0:
149 |                 reward = 0
150 | 
151 |         if sell_nothing and (reward > -5):
152 |             reward = -5
153 | 
154 |         # UPDATE THE STATE
155 |         self.t += 1
156 | 
157 |         if (self.t == len(self.data) - 1):
158 |             self.done = True
159 | 
160 |         return torch.tensor([reward], device=device, dtype=torch.float), self.done, torch.tensor([state],
161 |                                                                                                  dtype=torch.float)  # reward, done, current_state


--------------------------------------------------------------------------------
/Bot_code_and_models/Readme.md:
--------------------------------------------------------------------------------
 1 | # Dependencies 
 2 | Dependecies:    
 3 | * prettytable (`pip install prettytable`)  
 4 | * PyTorch
 5 | * tqdm
 6 | * pandas
 7 | * numpy
 8 | * matplotlib
 9 |     
10 | To Run Pretrained Models:   
11 | * `python main.py`  
12 | 
13 | To Run Train and Test:  
14 | * `python train_test.py`
15 | 
16 | 


--------------------------------------------------------------------------------
/Bot_code_and_models/main.py:
--------------------------------------------------------------------------------
  1 | from prettytable import PrettyTable as PrettyTable
  2 | from utils import load_data, print_stats, plot_multiple_conf_interval
  3 | import random
  4 | import os
  5 | #from google.colab import drive
  6 | #drive.mount('/content/drive')
  7 | from Environment import Environment
  8 | from Agent import Agent
  9 | 
 10 | 
 11 | 
 12 | def main():
 13 |     #----------------------------- LOAD DATA ---------------------------------------------------------------------------
 14 |     path = ''
 15 |     df = load_data(path)
 16 | 
 17 | 
 18 |     # ----------------------------- AGENTS COMPARISON --------------------------------
 19 |     REPLAY_MEM_SIZE = 10000
 20 |     BATCH_SIZE = 40
 21 |     GAMMA = 0.98
 22 |     EPS_START = 1
 23 |     EPS_END = 0.12
 24 |     EPS_STEPS = 300
 25 |     LEARNING_RATE = 0.001
 26 |     INPUT_DIM = 24
 27 |     HIDDEN_DIM = 120
 28 |     ACTION_NUMBER = 3
 29 |     TARGET_UPDATE = 10
 30 |     N_TEST = 10
 31 |     TRADING_PERIOD = 4000
 32 |     index = random.randrange(len(df) - TRADING_PERIOD - 1)
 33 | 
 34 |     dqn_agent = Agent(REPLAY_MEM_SIZE,
 35 |                       BATCH_SIZE,
 36 |                       GAMMA,
 37 |                       EPS_START,
 38 |                       EPS_END,
 39 |                       EPS_STEPS,
 40 |                       LEARNING_RATE,
 41 |                       INPUT_DIM,
 42 |                       HIDDEN_DIM,
 43 |                       ACTION_NUMBER,
 44 |                       TARGET_UPDATE,
 45 |                       MODEL='dqn',
 46 |                       DOUBLE=False)
 47 | 
 48 |     double_dqn_agent = Agent(REPLAY_MEM_SIZE,
 49 |                              BATCH_SIZE,
 50 |                              GAMMA,
 51 |                              EPS_START,
 52 |                              EPS_END,
 53 |                              EPS_STEPS,
 54 |                              LEARNING_RATE,
 55 |                              INPUT_DIM,
 56 |                              HIDDEN_DIM,
 57 |                              ACTION_NUMBER,
 58 |                              TARGET_UPDATE,
 59 |                              MODEL='dqn',
 60 |                              DOUBLE=True)
 61 | 
 62 |     dueling_double_dqn_agent = Agent(REPLAY_MEM_SIZE,
 63 |                                      BATCH_SIZE,
 64 |                                      GAMMA,
 65 |                                      EPS_START,
 66 |                                      EPS_END,
 67 |                                      EPS_STEPS,
 68 |                                      LEARNING_RATE,
 69 |                                      INPUT_DIM,
 70 |                                      HIDDEN_DIM,
 71 |                                      ACTION_NUMBER,
 72 |                                      TARGET_UPDATE,
 73 |                                      MODEL='ddqn',
 74 |                                      DOUBLE=True)
 75 | 
 76 |     train_size = int(TRADING_PERIOD * 0.8)
 77 |     profit_dqn_return = []
 78 |     sharpe_dqn_return = []
 79 |     profit_ddqn_return = []
 80 |     sharpe_ddqn_return = []
 81 |     profit_dueling_ddqn_return = []
 82 |     sharpe_dueling_ddqn_return = []
 83 | 
 84 |     #profit_train_env = Environment(df[index:index + train_size], "profit")
 85 |     sharpe_train_env = Environment(df[index:index + train_size], "sr")
 86 | 
 87 |     # ProfitDQN
 88 |     #cr_profit_dqn = dqn_agent.train(profit_train_env, path)
 89 |     #profit_train_env.reset()
 90 | 
 91 |     # Profit Double DQN
 92 |     #cr_profit_ddqn = double_dqn_agent.train(profit_train_env, path)
 93 |     #profit_train_env.reset()
 94 | 
 95 |     # Profit Dueling Double DQN
 96 |     #cr_profit_dueling_ddqn = dueling_double_dqn_agent.train(profit_train_env, path)
 97 |     #profit_train_env.reset()
 98 | 
 99 |     i = 0
100 |     while i < N_TEST:
101 |         print("Test nr. %s" % str(i+1))
102 |         index = random.randrange(len(df) - TRADING_PERIOD - 1)
103 | 
104 |         profit_test_env = Environment(df[index + train_size:index + TRADING_PERIOD], "profit")
105 | 
106 |         # ProfitDQN
107 |         cr_profit_dqn_test, _ = dqn_agent.test(profit_test_env, model_name="profit_reward_dqn_model" , path=path)
108 |         profit_dqn_return.append(profit_test_env.cumulative_return)
109 |         profit_test_env.reset()
110 | 
111 |         # Profit Double DQN
112 |         cr_profit_ddqn_test, _ = double_dqn_agent.test(profit_test_env, model_name="profit_reward_double_dqn_model" , path=path)
113 |         profit_ddqn_return.append(profit_test_env.cumulative_return)
114 |         profit_test_env.reset()
115 | 
116 |         # Profit Dueling Double DQN
117 |         cr_profit_dueling_ddqn_test, _ = dueling_double_dqn_agent.test(profit_test_env, model_name="profit_reward_double_ddqn_model" , path=path)
118 |         profit_dueling_ddqn_return.append(profit_test_env.cumulative_return)
119 |         profit_test_env.reset()
120 | 
121 |         i += 1
122 | 
123 |     dqn_agent = Agent(REPLAY_MEM_SIZE,
124 |                       BATCH_SIZE,
125 |                       GAMMA,
126 |                       EPS_START,
127 |                       EPS_END,
128 |                       EPS_STEPS,
129 |                       LEARNING_RATE,
130 |                       INPUT_DIM,
131 |                       HIDDEN_DIM,
132 |                       ACTION_NUMBER,
133 |                       TARGET_UPDATE,
134 |                       MODEL='dqn',
135 |                       DOUBLE=False)
136 | 
137 |     double_dqn_agent = Agent(REPLAY_MEM_SIZE,
138 |                              BATCH_SIZE,
139 |                              GAMMA,
140 |                              EPS_START,
141 |                              EPS_END,
142 |                              EPS_STEPS,
143 |                              LEARNING_RATE,
144 |                              INPUT_DIM,
145 |                              HIDDEN_DIM,
146 |                              ACTION_NUMBER,
147 |                              TARGET_UPDATE,
148 |                              MODEL='dqn',
149 |                              DOUBLE=True)
150 | 
151 |     dueling_double_dqn_agent = Agent(REPLAY_MEM_SIZE,
152 |                                      BATCH_SIZE,
153 |                                      GAMMA,
154 |                                      EPS_START,
155 |                                      EPS_END,
156 |                                      EPS_STEPS,
157 |                                      LEARNING_RATE,
158 |                                      INPUT_DIM,
159 |                                      HIDDEN_DIM,
160 |                                      ACTION_NUMBER,
161 |                                      TARGET_UPDATE,
162 |                                      MODEL='ddqn',
163 |                                      DOUBLE=True)
164 | 
165 |     # SharpeDQN
166 |     #cr_sharpe_dqn = dqn_agent.train(sharpe_train_env, path)
167 |     #sharpe_train_env.reset()
168 | 
169 |     # Sharpe Double DQN
170 |     #cr_sharpe_ddqn = double_dqn_agent.train(sharpe_train_env, path)
171 |     #sharpe_train_env.reset()
172 | 
173 |     # Sharpe Dueling Double DQN
174 |     #cr_sharpe_dueling_ddqn = dueling_double_dqn_agent.train(sharpe_train_env, path)
175 |     #sharpe_train_env.reset()
176 | 
177 |     i = 0
178 |     while i < N_TEST:
179 |         print("Test nr. %s"%str(i+1))
180 |         index = random.randrange(len(df) - TRADING_PERIOD - 1)
181 | 
182 |         sharpe_test_env = Environment(df[index + train_size:index + TRADING_PERIOD], "sr")
183 | 
184 |         # SharpeDQN
185 |         cr_sharpe_dqn_test, _ = dqn_agent.test(sharpe_test_env, model_name="sr_reward_dqn_model", path=path)
186 |         sharpe_dqn_return.append(sharpe_test_env.cumulative_return)
187 |         sharpe_test_env.reset()
188 | 
189 |         # Sharpe Double DQN
190 |         cr_sharpe_ddqn_test, _ = double_dqn_agent.test(sharpe_test_env, model_name="sr_reward_double_dqn_model"  , path=path)
191 |         sharpe_ddqn_return.append(sharpe_test_env.cumulative_return)
192 |         sharpe_test_env.reset()
193 | 
194 |         # Sharpe Dueling Double DQN
195 |         cr_sharpe_dueling_ddqn_test, _ = dueling_double_dqn_agent.test(sharpe_test_env, model_name="sr_reward_double_ddqn_model" , path=path)
196 |         sharpe_dueling_ddqn_return.append(sharpe_test_env.cumulative_return)
197 |         sharpe_test_env.reset()
198 | 
199 |         i += 1
200 | 
201 |     #--------------------------------------- Print Test Stats ---------------------------------------------------------
202 |     t = PrettyTable(["Trading System", "Avg. Return (%)", "Max Return (%)", "Min Return (%)", "Std. Dev."])
203 |     print_stats("ProfitDQN", profit_dqn_return, t)
204 |     print_stats("SharpeDQN", sharpe_dqn_return, t)
205 |     print_stats("ProfitDDQN", profit_ddqn_return, t)
206 |     print_stats("SharpeDDQN", sharpe_ddqn_return, t)
207 |     print_stats("ProfitD-DDQN", profit_dueling_ddqn_return, t)
208 |     print_stats("SharpeD-DDQN", sharpe_dueling_ddqn_return, t)
209 | 
210 |     print(t)
211 |     plot_multiple_conf_interval(["ProfitDQN", "SharpeDQN", "ProfitDDQN","SharpeDDQN","ProfitD-DDQN","SharpeD-DDQN"],
212 |                                 [profit_dqn_return,sharpe_dqn_return,profit_ddqn_return,sharpe_ddqn_return,
213 |                                  profit_dueling_ddqn_return,sharpe_dueling_ddqn_return])
214 | 
215 | 
216 | 
217 | if __name__ == "__main__":
218 |     main()


--------------------------------------------------------------------------------
/Bot_code_and_models/models.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | # Definition of the netwroks
  4 | class DQN(nn.Module):
  5 |     # Deep Q Network
  6 |     def __init__(self, obs_len, hidden_size, actions_n):
  7 |         super(DQN, self).__init__()
  8 |         # we might want Conv1d ?
  9 |         self.fc_val = nn.Sequential(
 10 |             nn.Linear(obs_len, hidden_size),
 11 |             nn.LeakyReLU(),
 12 |             nn.Linear(hidden_size, hidden_size),
 13 |             nn.LeakyReLU(),
 14 |             nn.Linear(hidden_size, actions_n)
 15 |         )
 16 | 
 17 |     def forward(self, x):
 18 |         h = self.fc_val(x)
 19 |         return h
 20 | 
 21 | 
 22 | 
 23 | class DuelingDQN(nn.Module):
 24 |     # Linear Dueling Deep Q Network
 25 |     def __init__(self, obs_len, hidden_size, actions_n):
 26 |         super(DuelingDQN, self).__init__()
 27 | 
 28 |         self.feauture_layer = nn.Sequential(
 29 |             nn.Linear(obs_len, hidden_size),
 30 |             nn.LeakyReLU(),
 31 |             nn.Linear(hidden_size, hidden_size),
 32 |             nn.LeakyReLU(),
 33 |         )
 34 | 
 35 |         self.value_stream = nn.Sequential(
 36 |             nn.Linear(hidden_size, hidden_size),
 37 |             nn.LeakyReLU(),
 38 |             nn.Linear(hidden_size, 1),
 39 |         )
 40 | 
 41 |         self.advantage_stream = nn.Sequential(
 42 |             nn.Linear(hidden_size, hidden_size),
 43 |             nn.LeakyReLU(),
 44 |             nn.Linear(hidden_size, actions_n)
 45 |         )
 46 | 
 47 |     def forward(self, state):
 48 |         features = self.feauture_layer(state)
 49 |         values = self.value_stream(features)
 50 |         advantages = self.advantage_stream(features)
 51 |         qvals = values + (advantages - advantages.mean())
 52 | 
 53 |         return qvals
 54 | 
 55 | 
 56 | # Convolutional DQN
 57 | class ConvDQN(nn.Module):
 58 |     def __init__(self, seq_len_in, actions_n, kernel_size=8):
 59 |         super(ConvDQN, self).__init__()
 60 |         n_filters = 64
 61 |         max_pool_kernel = 2
 62 |         self.conv1 = nn.Conv1d(1, n_filters, kernel_size)
 63 |         self.maxPool = nn.MaxPool1d(max_pool_kernel, stride=1)
 64 |         self.LRelu = nn.LeakyReLU()
 65 |         self.conv2 = nn.Conv1d(n_filters, n_filters, kernel_size // 2)
 66 | 
 67 |         self.hidden_dim = n_filters * ((((
 68 |                                                      seq_len_in - kernel_size + 1) - max_pool_kernel + 1) - kernel_size // 2 + 1) - max_pool_kernel + 1)
 69 | 
 70 |         self.out_layer = nn.Linear(self.hidden_dim, actions_n)
 71 | 
 72 |     def forward(self, x):
 73 |         c1_out = self.conv1(x)
 74 |         max_pool_1 = self.maxPool(self.LRelu(c1_out))
 75 |         c2_out = self.conv2(max_pool_1)
 76 |         max_pool_2 = self.maxPool(self.LRelu(c2_out))
 77 |         #    print("c1_out:\t%s"%str(c1_out.shape))
 78 |         #    print("max_pool_1:\t%s"%str(max_pool_1.shape))
 79 |         #    print("c2_out:\t%s"%str(c2_out.shape))
 80 |         #    print("max_pool_2:\t%s"%str(max_pool_2.shape))
 81 | 
 82 |         max_pool_2 = max_pool_2.view(-1, self.hidden_dim)
 83 |         #    print("max_pool_2_view:\t%s"%str(max_pool_2.shape))
 84 | 
 85 |         return self.LRelu(self.out_layer(max_pool_2))
 86 | 
 87 | 
 88 | # Convolutional Dueling DQN
 89 | class ConvDuelingDQN(nn.Module):
 90 |     def __init__(self, seq_len_in, actions_n, kernel_size=8):
 91 |         super(ConvDuelingDQN, self).__init__()
 92 |         n_filters = 64
 93 |         max_pool_kernel = 2
 94 |         self.conv1 = nn.Conv1d(1, n_filters, kernel_size)
 95 |         self.maxPool = nn.MaxPool1d(max_pool_kernel, stride=1)
 96 |         self.LRelu = nn.LeakyReLU()
 97 |         self.conv2 = nn.Conv1d(n_filters, n_filters, kernel_size // 2)
 98 |         self.hidden_dim = n_filters * ((((
 99 |                                                      seq_len_in - kernel_size + 1) - max_pool_kernel + 1) - kernel_size // 2 + 1) - max_pool_kernel + 1)
100 |         paper_hidden_dim = 120
101 |         self.split_layer = nn.Linear(self.hidden_dim, paper_hidden_dim)
102 | 
103 |         self.value_stream = nn.Sequential(
104 |             nn.Linear(paper_hidden_dim, paper_hidden_dim),
105 |             nn.LeakyReLU(),
106 |             nn.Linear(paper_hidden_dim, 1),
107 |         )
108 | 
109 |         self.advantage_stream = nn.Sequential(
110 |             nn.Linear(paper_hidden_dim, paper_hidden_dim),
111 |             nn.LeakyReLU(),
112 |             nn.Linear(paper_hidden_dim, actions_n)
113 |         )
114 | 
115 |     def forward(self, x):
116 |         c1_out = self.conv1(x)
117 |         max_pool_1 = self.maxPool(self.LRelu(c1_out))
118 |         c2_out = self.conv2(max_pool_1)
119 |         max_pool_2 = self.maxPool(self.LRelu(c2_out))
120 |         # DEBUG code:
121 |         #    print("c1_out:\t%s"%str(c1_out.shape))
122 |         #    print("max_pool_1:\t%s"%str(max_pool_1.shape))
123 |         #    print("c2_out:\t%s"%str(c2_out.shape))
124 |         #    print("max_pool_2:\t%s"%str(max_pool_2.shape))
125 | 
126 |         max_pool_2 = max_pool_2.view(-1, self.hidden_dim)
127 |         #    print("max_pool_2_view:\t%s"%str(max_pool_2.shape))
128 | 
129 |         split = self.split_layer(max_pool_2)
130 |         values = self.value_stream(split)
131 |         advantages = self.advantage_stream(split)
132 |         qvals = values + (advantages - advantages.mean())
133 |         return qvals
134 | 
135 | 


--------------------------------------------------------------------------------
/Bot_code_and_models/profit_reward_double_ddqn_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicoDs96/Trading-Bot---Deep-Reinforcement-Learning/818fafc5e836e85011889ee04c5b8b1b804c5b22/Bot_code_and_models/profit_reward_double_ddqn_model


--------------------------------------------------------------------------------
/Bot_code_and_models/profit_reward_double_dqn_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicoDs96/Trading-Bot---Deep-Reinforcement-Learning/818fafc5e836e85011889ee04c5b8b1b804c5b22/Bot_code_and_models/profit_reward_double_dqn_model


--------------------------------------------------------------------------------
/Bot_code_and_models/profit_reward_dqn_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicoDs96/Trading-Bot---Deep-Reinforcement-Learning/818fafc5e836e85011889ee04c5b8b1b804c5b22/Bot_code_and_models/profit_reward_dqn_model


--------------------------------------------------------------------------------
/Bot_code_and_models/sr_reward_double_ddqn_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicoDs96/Trading-Bot---Deep-Reinforcement-Learning/818fafc5e836e85011889ee04c5b8b1b804c5b22/Bot_code_and_models/sr_reward_double_ddqn_model


--------------------------------------------------------------------------------
/Bot_code_and_models/sr_reward_double_dqn_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicoDs96/Trading-Bot---Deep-Reinforcement-Learning/818fafc5e836e85011889ee04c5b8b1b804c5b22/Bot_code_and_models/sr_reward_double_dqn_model


--------------------------------------------------------------------------------
/Bot_code_and_models/sr_reward_dqn_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicoDs96/Trading-Bot---Deep-Reinforcement-Learning/818fafc5e836e85011889ee04c5b8b1b804c5b22/Bot_code_and_models/sr_reward_dqn_model


--------------------------------------------------------------------------------
/Bot_code_and_models/train_test.py:
--------------------------------------------------------------------------------
  1 | from prettytable import PrettyTable as PrettyTable
  2 | from utils import load_data, print_stats, plot_multiple_conf_interval
  3 | import random
  4 | import warnings
  5 | # from google.colab import drive
  6 | # drive.mount('/content/drive')
  7 | from Environment import Environment
  8 | from Agent import Agent
  9 | 
 10 | 
 11 | def main():
 12 |     # ----------------------------- LOAD DATA ---------------------------------------------------------------------------
 13 |     path = ''
 14 |     df = load_data(path)
 15 | 
 16 |     # ----------------------------- AGENTS COMPARISON --------------------------------
 17 |     REPLAY_MEM_SIZE = 10000
 18 |     BATCH_SIZE = 40
 19 |     GAMMA = 0.98
 20 |     EPS_START = 1
 21 |     EPS_END = 0.12
 22 |     EPS_STEPS = 300
 23 |     LEARNING_RATE = 0.001
 24 |     INPUT_DIM = 24
 25 |     HIDDEN_DIM = 120
 26 |     ACTION_NUMBER = 3
 27 |     TARGET_UPDATE = 10
 28 |     N_TEST = 10
 29 |     TRADING_PERIOD = 4000
 30 |     index = random.randrange(len(df) - TRADING_PERIOD - 1)
 31 | 
 32 |     dqn_agent = Agent(REPLAY_MEM_SIZE,
 33 |                       BATCH_SIZE,
 34 |                       GAMMA,
 35 |                       EPS_START,
 36 |                       EPS_END,
 37 |                       EPS_STEPS,
 38 |                       LEARNING_RATE,
 39 |                       INPUT_DIM,
 40 |                       HIDDEN_DIM,
 41 |                       ACTION_NUMBER,
 42 |                       TARGET_UPDATE,
 43 |                       MODEL='dqn',
 44 |                       DOUBLE=False)
 45 |     if str(dqn_agent.device) == "cpu":
 46 |         warnings.warn("Device is set to CPU. This will lead to a very slow training. Consider to run pretained models by"
 47 |                       "executing main.py script instead of train_test.py!")
 48 | 
 49 | 
 50 |     double_dqn_agent = Agent(REPLAY_MEM_SIZE,
 51 |                              BATCH_SIZE,
 52 |                              GAMMA,
 53 |                              EPS_START,
 54 |                              EPS_END,
 55 |                              EPS_STEPS,
 56 |                              LEARNING_RATE,
 57 |                              INPUT_DIM,
 58 |                              HIDDEN_DIM,
 59 |                              ACTION_NUMBER,
 60 |                              TARGET_UPDATE,
 61 |                              MODEL='dqn',
 62 |                              DOUBLE=True)
 63 | 
 64 |     dueling_double_dqn_agent = Agent(REPLAY_MEM_SIZE,
 65 |                                      BATCH_SIZE,
 66 |                                      GAMMA,
 67 |                                      EPS_START,
 68 |                                      EPS_END,
 69 |                                      EPS_STEPS,
 70 |                                      LEARNING_RATE,
 71 |                                      INPUT_DIM,
 72 |                                      HIDDEN_DIM,
 73 |                                      ACTION_NUMBER,
 74 |                                      TARGET_UPDATE,
 75 |                                      MODEL='ddqn',
 76 |                                      DOUBLE=True)
 77 | 
 78 |     train_size = int(TRADING_PERIOD * 0.8)
 79 |     profit_dqn_return = []
 80 |     sharpe_dqn_return = []
 81 |     profit_ddqn_return = []
 82 |     sharpe_ddqn_return = []
 83 |     profit_dueling_ddqn_return = []
 84 |     sharpe_dueling_ddqn_return = []
 85 | 
 86 |     profit_train_env = Environment(df[index:index + train_size], "profit")
 87 |     sharpe_train_env = Environment(df[index:index + train_size], "sr")
 88 | 
 89 |     # ProfitDQN
 90 |     cr_profit_dqn = dqn_agent.train(profit_train_env, path)
 91 |     profit_train_env.reset()
 92 | 
 93 |     # Profit Double DQN
 94 |     cr_profit_ddqn = double_dqn_agent.train(profit_train_env, path)
 95 |     profit_train_env.reset()
 96 | 
 97 |     # Profit Dueling Double DQN
 98 |     cr_profit_dueling_ddqn = dueling_double_dqn_agent.train(profit_train_env, path)
 99 |     profit_train_env.reset()
100 | 
101 |     i = 0
102 |     while i < N_TEST:
103 |         print("Test nr. %s" % str(i + 1))
104 |         index = random.randrange(len(df) - TRADING_PERIOD - 1)
105 | 
106 |         profit_test_env = Environment(df[index + train_size:index + TRADING_PERIOD], "profit")
107 | 
108 |         # ProfitDQN
109 |         cr_profit_dqn_test, _ = dqn_agent.test(profit_test_env)
110 |         profit_dqn_return.append(profit_test_env.cumulative_return)
111 |         profit_test_env.reset()
112 | 
113 |         # Profit Double DQN
114 |         cr_profit_ddqn_test, _ = double_dqn_agent.test(profit_test_env)
115 |         profit_ddqn_return.append(profit_test_env.cumulative_return)
116 |         profit_test_env.reset()
117 | 
118 |         # Profit Dueling Double DQN
119 |         cr_profit_dueling_ddqn_test, _ = dueling_double_dqn_agent.test(profit_test_env)
120 |         profit_dueling_ddqn_return.append(profit_test_env.cumulative_return)
121 |         profit_test_env.reset()
122 | 
123 |         i += 1
124 | 
125 |     dqn_agent = Agent(REPLAY_MEM_SIZE,
126 |                       BATCH_SIZE,
127 |                       GAMMA,
128 |                       EPS_START,
129 |                       EPS_END,
130 |                       EPS_STEPS,
131 |                       LEARNING_RATE,
132 |                       INPUT_DIM,
133 |                       HIDDEN_DIM,
134 |                       ACTION_NUMBER,
135 |                       TARGET_UPDATE,
136 |                       MODEL='dqn',
137 |                       DOUBLE=False)
138 | 
139 |     double_dqn_agent = Agent(REPLAY_MEM_SIZE,
140 |                              BATCH_SIZE,
141 |                              GAMMA,
142 |                              EPS_START,
143 |                              EPS_END,
144 |                              EPS_STEPS,
145 |                              LEARNING_RATE,
146 |                              INPUT_DIM,
147 |                              HIDDEN_DIM,
148 |                              ACTION_NUMBER,
149 |                              TARGET_UPDATE,
150 |                              MODEL='dqn',
151 |                              DOUBLE=True)
152 | 
153 |     dueling_double_dqn_agent = Agent(REPLAY_MEM_SIZE,
154 |                                      BATCH_SIZE,
155 |                                      GAMMA,
156 |                                      EPS_START,
157 |                                      EPS_END,
158 |                                      EPS_STEPS,
159 |                                      LEARNING_RATE,
160 |                                      INPUT_DIM,
161 |                                      HIDDEN_DIM,
162 |                                      ACTION_NUMBER,
163 |                                      TARGET_UPDATE,
164 |                                      MODEL='ddqn',
165 |                                      DOUBLE=True)
166 | 
167 |     # SharpeDQN
168 |     cr_sharpe_dqn = dqn_agent.train(sharpe_train_env, path)
169 |     sharpe_train_env.reset()
170 | 
171 |     # Sharpe Double DQN
172 |     cr_sharpe_ddqn = double_dqn_agent.train(sharpe_train_env, path)
173 |     sharpe_train_env.reset()
174 | 
175 |     # Sharpe Dueling Double DQN
176 |     cr_sharpe_dueling_ddqn = dueling_double_dqn_agent.train(sharpe_train_env, path)
177 |     sharpe_train_env.reset()
178 | 
179 |     i = 0
180 |     while i < N_TEST:
181 |         print("Test nr. %s" % str(i + 1))
182 |         index = random.randrange(len(df) - TRADING_PERIOD - 1)
183 | 
184 |         sharpe_test_env = Environment(df[index + train_size:index + TRADING_PERIOD], "sr")
185 | 
186 |         # SharpeDQN
187 |         cr_sharpe_dqn_test, _ = dqn_agent.test(sharpe_test_env)
188 |         sharpe_dqn_return.append(sharpe_test_env.cumulative_return)
189 |         sharpe_test_env.reset()
190 | 
191 |         # Sharpe Double DQN
192 |         cr_sharpe_ddqn_test, _ = double_dqn_agent.test(sharpe_test_env)
193 |         sharpe_ddqn_return.append(sharpe_test_env.cumulative_return)
194 |         sharpe_test_env.reset()
195 | 
196 |         # Sharpe Dueling Double DQN
197 |         cr_sharpe_dueling_ddqn_test, _ = dueling_double_dqn_agent.test(sharpe_test_env)
198 |         sharpe_dueling_ddqn_return.append(sharpe_test_env.cumulative_return)
199 |         sharpe_test_env.reset()
200 | 
201 |         i += 1
202 | 
203 |     # --------------------------------------- Print Test Stats ---------------------------------------------------------
204 |     t = PrettyTable(["Trading System", "Avg. Return (%)", "Max Return (%)", "Min Return (%)", "Std. Dev."])
205 |     print_stats("ProfitDQN", profit_dqn_return, t)
206 |     print_stats("SharpeDQN", sharpe_dqn_return, t)
207 |     print_stats("ProfitDDQN", profit_ddqn_return, t)
208 |     print_stats("SharpeDDQN", sharpe_ddqn_return, t)
209 |     print_stats("ProfitD-DDQN", profit_dueling_ddqn_return, t)
210 |     print_stats("SharpeD-DDQN", sharpe_dueling_ddqn_return, t)
211 | 
212 |     print(t)
213 |     plot_multiple_conf_interval(["ProfitDQN", "SharpeDQN", "ProfitDDQN", "SharpeDDQN", "ProfitD-DDQN", "SharpeD-DDQN"],
214 |                                 [profit_dqn_return, sharpe_dqn_return, profit_ddqn_return, sharpe_ddqn_return,
215 |                                  profit_dueling_ddqn_return, sharpe_dueling_ddqn_return])
216 | 
217 | 
218 | if __name__ == "__main__":
219 |     main()


--------------------------------------------------------------------------------
/Bot_code_and_models/utils.py:
--------------------------------------------------------------------------------
  1 | #  It essentially maps (state, action) pairs to their (next_state, reward) result,
  2 | #  with the state being the current stock price
  3 | from collections import namedtuple
  4 | import random
  5 | import pandas as pd
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | import os
  9 | 
 10 | Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward') )
 11 | 
 12 | 
 13 | class ReplayMemory(object):
 14 | 
 15 |     def __init__(self, capacity):
 16 |         self.capacity = capacity
 17 |         self.memory = []
 18 |         self.position = 0
 19 | 
 20 |     def push(self, *args):
 21 |         """Saves a transition."""
 22 |         if len(self.memory) < self.capacity:
 23 |             self.memory.append(None)
 24 |         self.memory[self.position] = Transition(*args)
 25 |         self.position = (self.position + 1) % self.capacity
 26 | 
 27 |     def sample(self, batch_size):
 28 |         return random.sample(self.memory, batch_size)
 29 | 
 30 |     def __len__(self):
 31 |         return len(self.memory)
 32 | 
 33 | 
 34 | 
 35 | def print_stats(model, c_return, t):
 36 |     c_return = np.array(c_return).flatten()
 37 |     t.add_row([str(model), "%.2f" % np.mean(c_return), "%.2f" % np.amax(c_return), "%.2f" % np.amin(c_return),
 38 |                "%.2f" % np.std(c_return)])
 39 | 
 40 | 
 41 | def plot_conf_interval(name, cum_returns ):
 42 |     """ NB. cum_returns must be 2-dim """
 43 |     # Mean
 44 |     M = np.mean(np.array(cum_returns), axis=0)
 45 |     # std dev
 46 |     S = np.std(np.array(cum_returns), axis=0)
 47 |     # upper and lower limit of confidence intervals
 48 |     LL = M - 0.95 * S
 49 |     UL = M + 0.95 * S
 50 | 
 51 |     plt.figure(figsize=(20, 5))
 52 |     plt.xlabel("Trading Instant (h)")
 53 |     plt.ylabel(name)
 54 |     plt.legend(['Cumulative Averadge Return (%)'], loc='upper left')
 55 |     plt.grid(True)
 56 |     plt.ylim(-5, 15)
 57 |     plt.plot(range(len(M)), M, linewidth=2)  # mean curve.
 58 |     plt.fill_between(range(len(M)), LL, UL, color='b', alpha=.2)  # std curves.
 59 |     plt.show()
 60 | 
 61 | def plot_multiple_conf_interval(names, cum_returns_list ):
 62 |     """ NB. cum_returns[i] must be 2-dim """
 63 |     i = 1
 64 | 
 65 |     for cr in cum_returns_list:
 66 |         plt.subplot(len(cum_returns_list), 2, i)
 67 |         # Mean
 68 |         M = np.mean(np.array(cr), axis=0)
 69 |         # std dev
 70 |         S = np.std(np.array(cr), axis=0)
 71 |         # upper and lower limit of confidence intervals
 72 |         LL = M - 0.95 * S
 73 |         UL = M + 0.95 * S
 74 | 
 75 |         plt.xlabel("Trading Instant (h)")
 76 |         plt.ylabel(names[i-1])
 77 |         plt.title('Cumulative Averadge Return (%)')
 78 |         plt.grid(True)
 79 |         plt.plot(range(len(M)), M, linewidth=2)  # mean curve.
 80 |         plt.fill_between(range(len(M)), LL, UL, color='b', alpha=.2)  # std curves.
 81 |         i += 1
 82 | 
 83 |     plt.show()
 84 | 
 85 | 
 86 | 
 87 | 
 88 | def load_data(path):
 89 |     if os.path.isfile(path + 'hourly_aggregated_dataset.csv'):
 90 |         df = pd.read_csv(path + 'hourly_aggregated_dataset.csv')
 91 |     else:
 92 |         # Aggregate the dataset hourly by picking the value at first row for Open,
 93 |         # the max within an hour for High, the minimum for Low, the last value for Close
 94 | 
 95 |         df = pd.read_csv(path + 'coinbaseUSD_1-min_data_2014-12-01_to_2019-01-09.csv')
 96 |         df_hourly_aggregated = pd.DataFrame()
 97 | 
 98 |         for count in range(0, len(df) - 60, 60):
 99 |             hour_interval = pd.DataFrame(df.iloc[count:count + 60])
100 |             df_hourly_aggregated = df_hourly_aggregated.append(pd.DataFrame([[hour_interval['Open'].iloc[0],
101 |                                                                               hour_interval['High'].max(),
102 |                                                                               hour_interval['Low'].min(),
103 |                                                                               hour_interval['Close'].iloc[
104 |                                                                                   len(hour_interval) - 1]]]))
105 | 
106 |         df_hourly_aggregated.columns = ['Open', 'High', 'Low', 'Close']
107 |         df_hourly_aggregated.index = np.arange(1, len(df_hourly_aggregated) + 1)
108 |         df_hourly_aggregated.interpolate(inplace=True)
109 |         df_hourly_aggregated.fillna(method='bfill', axis=0, inplace=True)
110 |         df_hourly_aggregated.to_csv(path + 'hourly_aggregated_dataset.csv', index=False)
111 |         df = df_hourly_aggregated
112 |         del df_hourly_aggregated
113 |     return df
114 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # A Deep Reinforcement Learning Trading Bot
 2 | 
 3 | This job is mine personal implementation of an existing paper.
 4 | For a detailed description and references to the original work check the report.pdf file. For a short introduction check [here](https://nicods96.github.io/hi//designing-a-pytorch-deep-reinforcement-learning-trading-bot/).
 5 | 
 6 | - Note 1: Since I am not an expert of financial markets, the environment definition is just a sketch. To train the model for a real use the environment must be redefined. I am open to any suggestion on how to implement a real simulation of a financial market. 
 7 | - Note 2: This job is just an implementation, with some modifications, of an existing work. The model's parameters are not tuned, they just replicate the paper setting. In my opinion, as stated and motivated in the [report.pdf](https://github.com/nicoDs96/Trading-Bot---Deep-Reinforcement-Learning/blob/master/report.pdf), those parameters are not valid for a well trained Deep Reinforcement Learning agent.
 8 | - Note 3: DRL techniques available in the model: Deep Reinforcement Learning, Double Deep Reinforcement Learning, Dueling Deep Reinforcement Learning, Dueling Double Deep Reinforcement Learning. 
 9 | 
10 | ## To Do:
11 | - Add command line arguments to run custom training/test
12 | - Implement real market simulation
13 | 


--------------------------------------------------------------------------------
/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicoDs96/Trading-Bot---Deep-Reinforcement-Learning/818fafc5e836e85011889ee04c5b8b1b804c5b22/report.pdf


--------------------------------------------------------------------------------