├── LICENSE ├── README.md ├── backtest.py ├── ex1-self_learning_quant.py ├── ex2-self_learning_quant.py └── ex3-self_learning_quant.py /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 danielzak 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sl-quant 2 | Companion code for the "Self Learning Quant" medium post, read the entire post here: https://medium.com/@danielzakrisson/the-self-learning-quant-d3329fcc9915#.3b4ghaoa7 3 | -------------------------------------------------------------------------------- /backtest.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Name: backtest 3 | # Purpose: perform routine backtesting tasks. 4 | # This module should be useable as a stand-alone library outide of the TWP package. 5 | # 6 | # Author: Jev Kuznetsov 7 | # 8 | # Created: 03/07/2014 9 | # Copyright: (c) Jev Kuznetsov 2013 10 | # Licence: BSD 11 | #------------------------------------------------------------------------------- 12 | 13 | import pandas as pd 14 | import matplotlib.pyplot as plt 15 | import sys 16 | import numpy as np 17 | 18 | 19 | 20 | 21 | def tradeBracket(price,entryBar,upper=None, lower=None, timeout=None): 22 | ''' 23 | trade a bracket on price series, return price delta and exit bar # 24 | Input 25 | ------ 26 | price : numpy array of price values 27 | entryBar: entry bar number, *determines entry price* 28 | upper : high stop 29 | lower : low stop 30 | timeout : max number of periods to hold 31 | 32 | Returns exit price and number of bars held 33 | 34 | ''' 35 | assert isinstance(price, np.ndarray) , 'price must be a numpy array' 36 | 37 | 38 | # create list of exit indices and add max trade duration. Exits are relative to entry bar 39 | if timeout: # set trade length to timeout or series length 40 | exits = [min(timeout,len(price)-entryBar-1)] 41 | else: 42 | exits = [len(price)-entryBar-1] 43 | 44 | p = price[entryBar:entryBar+exits[0]+1] # subseries of price 45 | 46 | # extend exits list with conditional exits 47 | # check upper bracket 48 | if upper: 49 | assert upper>p[0] , 'Upper bracket must be higher than entry price ' 50 | idx = np.where(p>upper)[0] # find where price is higher than the upper bracket 51 | if idx.any(): 52 | exits.append(idx[0]) # append first occurence 53 | # same for lower bracket 54 | if lower: 55 | assert lower
0].index , 144 | # 'ko':self.trades[self.trades == 0].index, 145 | # 'rv':self.trades[self.trades < 0].index} 146 | # 147 | # 148 | # for style, idx in indices.iteritems(): 149 | # if len(idx) > 0: 150 | # p[idx].plot(style=style) 151 | 152 | # --- plot trades 153 | #colored line for long positions 154 | idx = (self.data['shares'] > 0) | (self.data['shares'] > 0).shift(1) 155 | if idx.any(): 156 | p[idx].plot(style='go') 157 | l.append('long') 158 | 159 | #colored line for short positions 160 | idx = (self.data['shares'] < 0) | (self.data['shares'] < 0).shift(1) 161 | if idx.any(): 162 | p[idx].plot(style='ro') 163 | l.append('short') 164 | 165 | plt.xlim([p.index[0],p.index[-1]]) # show full axis 166 | 167 | plt.legend(l,loc='best') 168 | plt.title('trades') 169 | 170 | 171 | class ProgressBar: 172 | def __init__(self, iterations): 173 | self.iterations = iterations 174 | self.prog_bar = '[]' 175 | self.fill_char = '*' 176 | self.width = 50 177 | self.__update_amount(0) 178 | 179 | def animate(self, iteration): 180 | print '\r',self, 181 | sys.stdout.flush() 182 | self.update_iteration(iteration + 1) 183 | 184 | def update_iteration(self, elapsed_iter): 185 | self.__update_amount((elapsed_iter / float(self.iterations)) * 100.0) 186 | self.prog_bar += ' %d of %s complete' % (elapsed_iter, self.iterations) 187 | 188 | def __update_amount(self, new_amount): 189 | percent_done = int(round((new_amount / 100.0) * 100.0)) 190 | all_full = self.width - 2 191 | num_hashes = int(round((percent_done / 100.0) * all_full)) 192 | self.prog_bar = '[' + self.fill_char * num_hashes + ' ' * (all_full - num_hashes) + ']' 193 | pct_place = (len(self.prog_bar) // 2) - len(str(percent_done)) 194 | pct_string = '%d%%' % percent_done 195 | self.prog_bar = self.prog_bar[0:pct_place] + \ 196 | (pct_string + self.prog_bar[pct_place + len(pct_string):]) 197 | def __str__(self): 198 | return str(self.prog_bar) 199 | 200 | def sharpe(pnl): 201 | return np.sqrt(250)*pnl.mean()/pnl.std() 202 | 203 | -------------------------------------------------------------------------------- /ex1-self_learning_quant.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | np.random.seed(1335) # for reproducibility 5 | np.set_printoptions(precision=5, suppress=True, linewidth=150) 6 | 7 | import pandas as pd 8 | import backtest as twp 9 | from matplotlib import pyplot as plt 10 | 11 | from sklearn import metrics, preprocessing 12 | 13 | ''' 14 | Name: The Self Learning Quant, Example 1 15 | 16 | Author: Daniel Zakrisson 17 | 18 | Created: 30/03/2016 19 | Copyright: (c) Daniel Zakrisson 2016 20 | Licence: BSD 21 | 22 | Requirements: 23 | Numpy 24 | Pandas 25 | MatplotLib 26 | scikit-learn 27 | Keras, https://keras.io/ 28 | backtest.py from the TWP library. Download backtest.py and put in the same folder 29 | 30 | /plt create a subfolder in the same directory where plot files will be saved 31 | 32 | ''' 33 | 34 | #Load data 35 | def load_data(): 36 | price = np.arange(200/10.0) #linearly increasing prices 37 | return price 38 | 39 | #Initialize first state, all items are placed deterministically 40 | def init_state(data): 41 | 42 | close = data 43 | diff = np.diff(data) 44 | diff = np.insert(diff, 0, 0) 45 | 46 | #--- Preprocess data 47 | xdata = np.column_stack((close, diff)) 48 | xdata = np.nan_to_num(xdata) 49 | scaler = preprocessing.StandardScaler() 50 | xdata = scaler.fit_transform(xdata) 51 | 52 | state = xdata[0:1, :] 53 | return state, xdata 54 | 55 | #Take Action 56 | def take_action(state, xdata, action, signal, time_step): 57 | #this should generate a list of trade signals that at evaluation time are fed to the backtester 58 | #the backtester should get a list of trade signals and a list of price data for the assett 59 | 60 | #make necessary adjustments to state and then return it 61 | time_step += 1 62 | 63 | #if the current iteration is the last state ("terminal state") then set terminal_state to 1 64 | if time_step == xdata.shape[0]: 65 | state = xdata[time_step-1:time_step, :] 66 | terminal_state = 1 67 | signal.loc[time_step] = 0 68 | return state, time_step, signal, terminal_state 69 | 70 | #move the market data window one step forward 71 | state = xdata[time_step-1:time_step, :] 72 | #take action 73 | if action != 0: 74 | if action == 1: 75 | signal.loc[time_step] = 100 76 | elif action == 2: 77 | signal.loc[time_step] = -100 78 | elif action == 3: 79 | signal.loc[time_step] = 0 80 | terminal_state = 0 81 | 82 | return state, time_step, signal, terminal_state 83 | 84 | #Get Reward, the reward is returned at the end of an episode 85 | def get_reward(new_state, time_step, action, xdata, signal, terminal_state, epoch=0): 86 | reward = 0 87 | signal.fillna(value=0, inplace=True) 88 | if terminal_state == 0: 89 | #get reward for the most current action 90 | if signal[time_step] != signal[time_step-1] and terminal_state == 0: 91 | i=1 92 | while signal[time_step-i] == signal[time_step-1-i] and time_step - 1 - i > 0: 93 | i += 1 94 | reward = (xdata[time_step-1, 0] - xdata[time_step - i-1, 0]) * signal[time_step - 1]*-100 + i*np.abs(signal[time_step - 1])/10.0 95 | if signal[time_step] == 0 and signal[time_step - 1] == 0: 96 | reward -= 10 97 | 98 | #calculate the reward for all actions if the last iteration in set 99 | if terminal_state == 1: 100 | #run backtest, send list of trade signals and asset data to backtest function 101 | bt = twp.Backtest(pd.Series(data=[x[0] for x in xdata]), signal, signalType='shares') 102 | reward = bt.pnl.iloc[-1] 103 | 104 | return reward 105 | 106 | def evaluate_Q(eval_data, eval_model): 107 | #This function is used to evaluate the perofrmance of the system each epoch, without the influence of epsilon and random actions 108 | signal = pd.Series(index=np.arange(len(eval_data))) 109 | state, xdata = init_state(eval_data) 110 | status = 1 111 | terminal_state = 0 112 | time_step = 1 113 | while(status == 1): 114 | #We start in state S 115 | #Run the Q function on S to get predicted reward values on all the possible actions 116 | qval = eval_model.predict(state.reshape(1,2), batch_size=1) 117 | action = (np.argmax(qval)) 118 | #Take action, observe new state S' 119 | new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step) 120 | #Observe reward 121 | eval_reward = get_reward(new_state, time_step, action, xdata, signal, terminal_state, i) 122 | state = new_state 123 | if terminal_state == 1: #terminal state 124 | status = 0 125 | return eval_reward 126 | 127 | #This neural network is the the Q-function, run it like this: 128 | #model.predict(state.reshape(1,64), batch_size=1) 129 | 130 | from keras.models import Sequential 131 | from keras.layers.core import Dense, Dropout, Activation 132 | from keras.optimizers import RMSprop 133 | 134 | model = Sequential() 135 | model.add(Dense(4, init='lecun_uniform', input_shape=(2,))) 136 | model.add(Activation('relu')) 137 | #model.add(Dropout(0.2)) I'm not using dropout in this example 138 | 139 | model.add(Dense(4, init='lecun_uniform')) 140 | model.add(Activation('relu')) 141 | #model.add(Dropout(0.2)) 142 | 143 | model.add(Dense(4, init='lecun_uniform')) 144 | model.add(Activation('linear')) #linear output so we can have range of real-valued outputs 145 | 146 | rms = RMSprop() 147 | model.compile(loss='mse', optimizer=rms) 148 | 149 | 150 | import random, timeit 151 | 152 | start_time = timeit.default_timer() 153 | 154 | indata = load_data() 155 | epochs = 10 156 | gamma = 0.9 #a high gamma makes a long term reward more valuable 157 | epsilon = 1 158 | learning_progress = [] 159 | #stores tuples of (S, A, R, S') 160 | h = 0 161 | signal = pd.Series(index=np.arange(len(indata))) 162 | for i in range(epochs): 163 | 164 | state, xdata = init_state(indata) 165 | status = 1 166 | terminal_state = 0 167 | time_step = 1 168 | #while learning is still in progress 169 | while(status == 1): 170 | #We start in state S 171 | #Run the Q function on S to get predicted reward values on all the possible actions 172 | qval = model.predict(state.reshape(1,2), batch_size=1) 173 | if (random.random() < epsilon) and i != epochs - 1: #maybe choose random action if not the last epoch 174 | action = np.random.randint(0,4) #assumes 4 different actions 175 | else: #choose best action from Q(s,a) values 176 | action = (np.argmax(qval)) 177 | #Take action, observe new state S' 178 | new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step) 179 | #Observe reward 180 | reward = get_reward(new_state, time_step, action, xdata, signal, terminal_state, i) 181 | #Get max_Q(S',a) 182 | newQ = model.predict(new_state.reshape(1,2), batch_size=1) 183 | maxQ = np.max(newQ) 184 | y = np.zeros((1,4)) 185 | y[:] = qval[:] 186 | if terminal_state == 0: #non-terminal state 187 | update = (reward + (gamma * maxQ)) 188 | else: #terminal state (means that it is the last state) 189 | update = reward 190 | y[0][action] = update #target output 191 | model.fit(state.reshape(1,2), y, batch_size=1, nb_epoch=1, verbose=0) 192 | state = new_state 193 | if terminal_state == 1: #terminal state 194 | status = 0 195 | eval_reward = evaluate_Q(indata, model) 196 | print("Epoch #: %s Reward: %f Epsilon: %f" % (i,eval_reward, epsilon)) 197 | learning_progress.append((eval_reward)) 198 | if epsilon > 0.1: 199 | epsilon -= (1.0/epochs) 200 | 201 | elapsed = np.round(timeit.default_timer() - start_time, decimals=2) 202 | print("Completed in %f" % (elapsed,)) 203 | 204 | #plot results 205 | bt = twp.Backtest(pd.Series(data=[x[0] for x in xdata]), signal, signalType='shares') 206 | bt.data['delta'] = bt.data['shares'].diff().fillna(0) 207 | 208 | print(bt.data) 209 | 210 | plt.figure() 211 | bt.plotTrades() 212 | plt.suptitle('epoch' + str(i)) 213 | plt.savefig('plt/final_trades'+'.png', bbox_inches='tight', pad_inches=1, dpi=72) #assumes there is a ./plt dir 214 | plt.close('all') 215 | 216 | plt.figure() 217 | plt.subplot(3,1,1) 218 | bt.plotTrades() 219 | plt.subplot(3,1,2) 220 | bt.pnl.plot(style='x-') 221 | plt.subplot(3,1,3) 222 | plt.plot(learning_progress) 223 | 224 | plt.show() 225 | 226 | 227 | -------------------------------------------------------------------------------- /ex2-self_learning_quant.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | np.random.seed(1335) # for reproducibility 5 | np.set_printoptions(precision=5, suppress=True, linewidth=150) 6 | 7 | import pandas as pd 8 | import backtest as twp 9 | from matplotlib import pyplot as plt 10 | 11 | from sklearn import metrics, preprocessing 12 | 13 | ''' 14 | Name: The Self Learning Quant, Example 2 15 | 16 | Author: Daniel Zakrisson 17 | 18 | Created: 30/03/2016 19 | Copyright: (c) Daniel Zakrisson 2016 20 | Licence: BSD 21 | 22 | Requirements: 23 | Numpy 24 | Pandas 25 | MatplotLib 26 | scikit-learn 27 | Keras, https://keras.io/ 28 | backtest.py from the TWP library. Download backtest.py and put in the same folder 29 | 30 | /plt create a subfolder in the same directory where plot files will be saved 31 | 32 | ''' 33 | 34 | #Load data 35 | def load_data(): 36 | price = np.sin(np.arange(200)/30.0) #sine prices 37 | return price 38 | 39 | #Initialize first state, all items are placed deterministically 40 | def init_state(data): 41 | 42 | close = data 43 | diff = np.diff(data) 44 | diff = np.insert(diff, 0, 0) 45 | 46 | #--- Preprocess data 47 | xdata = np.column_stack((close, diff)) 48 | xdata = np.nan_to_num(xdata) 49 | scaler = preprocessing.StandardScaler() 50 | xdata = scaler.fit_transform(xdata) 51 | 52 | state = xdata[0:1, :] 53 | return state, xdata 54 | 55 | #Take Action 56 | def take_action(state, xdata, action, signal, time_step): 57 | #this should generate a list of trade signals that at evaluation time are fed to the backtester 58 | #the backtester should get a list of trade signals and a list of price data for the assett 59 | 60 | #make necessary adjustments to state and then return it 61 | time_step += 1 62 | 63 | #if the current iteration is the last state ("terminal state") then set terminal_state to 1 64 | if time_step == xdata.shape[0]: 65 | state = xdata[time_step-1:time_step, :] 66 | terminal_state = 1 67 | signal.loc[time_step] = 0 68 | return state, time_step, signal, terminal_state 69 | 70 | #move the market data window one step forward 71 | state = xdata[time_step-1:time_step, :] 72 | #take action 73 | if action != 0: 74 | if action == 1: 75 | signal.loc[time_step] = 100 76 | elif action == 2: 77 | signal.loc[time_step] = -100 78 | elif action == 3: 79 | signal.loc[time_step] = 0 80 | terminal_state = 0 81 | 82 | return state, time_step, signal, terminal_state 83 | 84 | #Get Reward, the reward is returned at the end of an episode 85 | def get_reward(new_state, time_step, action, xdata, signal, terminal_state, epoch=0): 86 | reward = 0 87 | signal.fillna(value=0, inplace=True) 88 | if terminal_state == 0: 89 | #get reward for the most current action 90 | if signal[time_step] != signal[time_step-1] and terminal_state == 0: 91 | i=1 92 | while signal[time_step-i] == signal[time_step-1-i] and time_step - 1 - i > 0: 93 | i += 1 94 | reward = (xdata[time_step-1, 0] - xdata[time_step - i-1, 0]) * signal[time_step - 1]*-100 + i*np.abs(signal[time_step - 1])/10.0 95 | if signal[time_step] == 0 and signal[time_step - 1] == 0: 96 | reward -= 10 97 | 98 | #calculate the reward for all actions if the last iteration in set 99 | if terminal_state == 1: 100 | #run backtest, send list of trade signals and asset data to backtest function 101 | bt = twp.Backtest(pd.Series(data=[x[0] for x in xdata]), signal, signalType='shares') 102 | reward = bt.pnl.iloc[-1] 103 | 104 | return reward 105 | 106 | def evaluate_Q(eval_data, eval_model): 107 | #This function is used to evaluate the perofrmance of the system each epoch, without the influence of epsilon and random actions 108 | signal = pd.Series(index=np.arange(len(eval_data))) 109 | state, xdata = init_state(eval_data) 110 | status = 1 111 | terminal_state = 0 112 | time_step = 1 113 | while(status == 1): 114 | #We start in state S 115 | #Run the Q function on S to get predicted reward values on all the possible actions 116 | qval = eval_model.predict(state.reshape(1,2), batch_size=1) 117 | action = (np.argmax(qval)) 118 | #Take action, observe new state S' 119 | new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step) 120 | #Observe reward 121 | eval_reward = get_reward(new_state, time_step, action, xdata, signal, terminal_state, i) 122 | state = new_state 123 | if terminal_state == 1: #terminal state 124 | status = 0 125 | return eval_reward 126 | 127 | #This neural network is the the Q-function, run it like this: 128 | #model.predict(state.reshape(1,64), batch_size=1) 129 | 130 | from keras.models import Sequential 131 | from keras.layers.core import Dense, Dropout, Activation 132 | from keras.optimizers import RMSprop 133 | 134 | model = Sequential() 135 | model.add(Dense(4, init='lecun_uniform', input_shape=(2,))) 136 | model.add(Activation('relu')) 137 | #model.add(Dropout(0.2)) I'm not using dropout in this example 138 | 139 | model.add(Dense(4, init='lecun_uniform')) 140 | model.add(Activation('relu')) 141 | #model.add(Dropout(0.2)) 142 | 143 | model.add(Dense(4, init='lecun_uniform')) 144 | model.add(Activation('linear')) #linear output so we can have range of real-valued outputs 145 | 146 | rms = RMSprop() 147 | model.compile(loss='mse', optimizer=rms) 148 | 149 | 150 | import random, timeit 151 | 152 | start_time = timeit.default_timer() 153 | 154 | indata = load_data() 155 | epochs = 100 156 | gamma = 0.9 #a high gamma makes a long term reward more valuable 157 | epsilon = 1 158 | learning_progress = [] 159 | #stores tuples of (S, A, R, S') 160 | h = 0 161 | signal = pd.Series(index=np.arange(len(indata))) 162 | for i in range(epochs): 163 | 164 | state, xdata = init_state(indata) 165 | status = 1 166 | terminal_state = 0 167 | time_step = 1 168 | #while learning is still in progress 169 | while(status == 1): 170 | #We start in state S 171 | #Run the Q function on S to get predicted reward values on all the possible actions 172 | qval = model.predict(state.reshape(1,2), batch_size=1) 173 | if (random.random() < epsilon) and i != epochs - 1: #maybe choose random action if not the last epoch 174 | action = np.random.randint(0,4) #assumes 4 different actions 175 | else: #choose best action from Q(s,a) values 176 | action = (np.argmax(qval)) 177 | #Take action, observe new state S' 178 | new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step) 179 | #Observe reward 180 | reward = get_reward(new_state, time_step, action, xdata, signal, terminal_state, i) 181 | #Get max_Q(S',a) 182 | newQ = model.predict(new_state.reshape(1,2), batch_size=1) 183 | maxQ = np.max(newQ) 184 | y = np.zeros((1,4)) 185 | y[:] = qval[:] 186 | if terminal_state == 0: #non-terminal state 187 | update = (reward + (gamma * maxQ)) 188 | else: #terminal state (means that it is the last state) 189 | update = reward 190 | y[0][action] = update #target output 191 | model.fit(state.reshape(1,2), y, batch_size=1, nb_epoch=1, verbose=0) 192 | state = new_state 193 | if terminal_state == 1: #terminal state 194 | status = 0 195 | eval_reward = evaluate_Q(indata, model) 196 | print("Epoch #: %s Reward: %f Epsilon: %f" % (i,eval_reward, epsilon)) 197 | learning_progress.append((eval_reward)) 198 | if epsilon > 0.1: 199 | epsilon -= (1.0/epochs) 200 | 201 | elapsed = np.round(timeit.default_timer() - start_time, decimals=2) 202 | print("Completed in %f" % (elapsed,)) 203 | 204 | #plot results 205 | bt = twp.Backtest(pd.Series(data=[x[0] for x in xdata]), signal, signalType='shares') 206 | bt.data['delta'] = bt.data['shares'].diff().fillna(0) 207 | 208 | print(bt.data) 209 | 210 | plt.figure() 211 | bt.plotTrades() 212 | plt.suptitle('epoch' + str(i)) 213 | plt.savefig('plt/final_trades'+'.png', bbox_inches='tight', pad_inches=1, dpi=72) #assumes there is a ./plt dir 214 | plt.close('all') 215 | 216 | plt.figure() 217 | plt.subplot(3,1,1) 218 | bt.plotTrades() 219 | plt.subplot(3,1,2) 220 | bt.pnl.plot(style='x-') 221 | plt.subplot(3,1,3) 222 | plt.plot(learning_progress) 223 | 224 | plt.show() -------------------------------------------------------------------------------- /ex3-self_learning_quant.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | np.random.seed(1335) # for reproducibility 6 | np.set_printoptions(precision=5, suppress=True, linewidth=150) 7 | 8 | import pandas as pd 9 | import backtest as twp 10 | from matplotlib import pyplot as plt 11 | from sklearn import metrics, preprocessing 12 | from talib.abstract import * 13 | from sklearn.externals import joblib 14 | 15 | import Quandl 16 | 17 | ''' 18 | Name: The Self Learning Quant, Example 3 19 | 20 | Author: Daniel Zakrisson 21 | 22 | Created: 30/03/2016 23 | Copyright: (c) Daniel Zakrisson 2016 24 | Licence: BSD 25 | 26 | Requirements: 27 | Numpy 28 | Pandas 29 | MatplotLib 30 | scikit-learn 31 | TA-Lib, instructions at https://mrjbq7.github.io/ta-lib/install.html 32 | Keras, https://keras.io/ 33 | Quandl, https://www.quandl.com/tools/python 34 | backtest.py from the TWP library. Download backtest.py and put in the same folder 35 | 36 | /plt create a subfolder in the same directory where plot files will be saved 37 | 38 | ''' 39 | 40 | #Load data 41 | def read_convert_data(symbol='XBTEUR'): 42 | if symbol == 'XBTEUR': 43 | prices = Quandl.get("BCHARTS/KRAKENEUR") 44 | prices.to_pickle('data/XBTEUR_1day.pkl') # a /data folder must exist 45 | if symbol == 'EURUSD_1day': 46 | #prices = Quandl.get("ECB/EURUSD") 47 | prices = pd.read_csv('data/EURUSD_1day.csv',sep=",", skiprows=0, header=0, index_col=0, parse_dates=True, names=['ticker', 'date', 'time', 'open', 'low', 'high', 'close']) 48 | prices.to_pickle('data/EURUSD_1day.pkl') 49 | print(prices) 50 | return 51 | 52 | def load_data(test=False): 53 | #prices = pd.read_pickle('data/OILWTI_1day.pkl') 54 | #prices = pd.read_pickle('data/EURUSD_1day.pkl') 55 | #prices.rename(columns={'Value': 'close'}, inplace=True) 56 | prices = pd.read_pickle('data/XBTEUR_1day.pkl') 57 | prices.rename(columns={'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume (BTC)': 'volume'}, inplace=True) 58 | print(prices) 59 | x_train = prices.iloc[-2000:-300,] 60 | x_test= prices.iloc[-2000:,] 61 | if test: 62 | return x_test 63 | else: 64 | return x_train 65 | 66 | #Initialize first state, all items are placed deterministically 67 | def init_state(indata, test=False): 68 | close = indata['close'].values 69 | diff = np.diff(close) 70 | diff = np.insert(diff, 0, 0) 71 | sma15 = SMA(indata, timeperiod=15) 72 | sma60 = SMA(indata, timeperiod=60) 73 | rsi = RSI(indata, timeperiod=14) 74 | atr = ATR(indata, timeperiod=14) 75 | 76 | #--- Preprocess data 77 | xdata = np.column_stack((close, diff, sma15, close-sma15, sma15-sma60, rsi, atr)) 78 | 79 | xdata = np.nan_to_num(xdata) 80 | if test == False: 81 | scaler = preprocessing.StandardScaler() 82 | xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1) 83 | joblib.dump(scaler, 'data/scaler.pkl') 84 | elif test == True: 85 | scaler = joblib.load('data/scaler.pkl') 86 | xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1) 87 | state = xdata[0:1, 0:1, :] 88 | 89 | return state, xdata, close 90 | 91 | #Take Action 92 | def take_action(state, xdata, action, signal, time_step): 93 | #this should generate a list of trade signals that at evaluation time are fed to the backtester 94 | #the backtester should get a list of trade signals and a list of price data for the assett 95 | 96 | #make necessary adjustments to state and then return it 97 | time_step += 1 98 | 99 | #if the current iteration is the last state ("terminal state") then set terminal_state to 1 100 | if time_step + 1 == xdata.shape[0]: 101 | state = xdata[time_step-1:time_step, 0:1, :] 102 | terminal_state = 1 103 | signal.loc[time_step] = 0 104 | 105 | return state, time_step, signal, terminal_state 106 | 107 | #move the market data window one step forward 108 | state = xdata[time_step-1:time_step, 0:1, :] 109 | #take action 110 | if action == 1: 111 | signal.loc[time_step] = 100 112 | elif action == 2: 113 | signal.loc[time_step] = -100 114 | else: 115 | signal.loc[time_step] = 0 116 | #print(state) 117 | terminal_state = 0 118 | #print(signal) 119 | 120 | return state, time_step, signal, terminal_state 121 | 122 | #Get Reward, the reward is returned at the end of an episode 123 | def get_reward(new_state, time_step, action, xdata, signal, terminal_state, eval=False, epoch=0): 124 | reward = 0 125 | signal.fillna(value=0, inplace=True) 126 | 127 | if eval == False: 128 | bt = twp.Backtest(pd.Series(data=[x for x in xdata[time_step-2:time_step]], index=signal[time_step-2:time_step].index.values), signal[time_step-2:time_step], signalType='shares') 129 | reward = ((bt.data['price'].iloc[-1] - bt.data['price'].iloc[-2])*bt.data['shares'].iloc[-1]) 130 | 131 | if terminal_state == 1 and eval == True: 132 | #save a figure of the test set 133 | bt = twp.Backtest(pd.Series(data=[x for x in xdata], index=signal.index.values), signal, signalType='shares') 134 | reward = bt.pnl.iloc[-1] 135 | plt.figure(figsize=(3,4)) 136 | bt.plotTrades() 137 | plt.axvline(x=400, color='black', linestyle='--') 138 | plt.text(250, 400, 'training data') 139 | plt.text(450, 400, 'test data') 140 | plt.suptitle(str(epoch)) 141 | plt.savefig('plt/'+str(epoch)+'.png', bbox_inches='tight', pad_inches=1, dpi=72) 142 | plt.close('all') 143 | #print(time_step, terminal_state, eval, reward) 144 | 145 | return reward 146 | 147 | def evaluate_Q(eval_data, eval_model, price_data, epoch=0): 148 | #This function is used to evaluate the performance of the system each epoch, without the influence of epsilon and random actions 149 | signal = pd.Series(index=np.arange(len(eval_data))) 150 | state, xdata, price_data = init_state(eval_data) 151 | status = 1 152 | terminal_state = 0 153 | time_step = 1 154 | while(status == 1): 155 | #We start in state S 156 | #Run the Q function on S to get predicted reward values on all the possible actions 157 | qval = eval_model.predict(state, batch_size=1) 158 | action = (np.argmax(qval)) 159 | #Take action, observe new state S' 160 | new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step) 161 | #Observe reward 162 | eval_reward = get_reward(new_state, time_step, action, price_data, signal, terminal_state, eval=True, epoch=epoch) 163 | state = new_state 164 | if terminal_state == 1: #terminal state 165 | status = 0 166 | 167 | return eval_reward 168 | 169 | #This neural network is the the Q-function, run it like this: 170 | #model.predict(state.reshape(1,64), batch_size=1) 171 | 172 | from keras.models import Sequential 173 | from keras.layers.core import Dense, Dropout, Activation 174 | from keras.layers.recurrent import LSTM 175 | from keras.optimizers import RMSprop, Adam 176 | 177 | tsteps = 1 178 | batch_size = 1 179 | num_features = 7 180 | 181 | model = Sequential() 182 | model.add(LSTM(64, 183 | input_shape=(1, num_features), 184 | return_sequences=True, 185 | stateful=False)) 186 | model.add(Dropout(0.5)) 187 | 188 | model.add(LSTM(64, 189 | input_shape=(1, num_features), 190 | return_sequences=False, 191 | stateful=False)) 192 | model.add(Dropout(0.5)) 193 | 194 | model.add(Dense(4, init='lecun_uniform')) 195 | model.add(Activation('linear')) #linear output so we can have range of real-valued outputs 196 | 197 | rms = RMSprop() 198 | adam = Adam() 199 | model.compile(loss='mse', optimizer=adam) 200 | 201 | 202 | import random, timeit 203 | 204 | start_time = timeit.default_timer() 205 | 206 | read_convert_data(symbol='XBTEUR') #run once to read indata, resample and convert to pickle 207 | indata = load_data() 208 | test_data = load_data(test=True) 209 | epochs = 100 210 | gamma = 0.95 #since the reward can be several time steps away, make gamma high 211 | epsilon = 1 212 | batchSize = 100 213 | buffer = 200 214 | replay = [] 215 | learning_progress = [] 216 | #stores tuples of (S, A, R, S') 217 | h = 0 218 | #signal = pd.Series(index=market_data.index) 219 | signal = pd.Series(index=np.arange(len(indata))) 220 | for i in range(epochs): 221 | if i == epochs-1: #the last epoch, use test data set 222 | indata = load_data(test=True) 223 | state, xdata, price_data = init_state(indata, test=True) 224 | else: 225 | state, xdata, price_data = init_state(indata) 226 | status = 1 227 | terminal_state = 0 228 | #time_step = market_data.index[0] + 64 #when using market_data 229 | time_step = 14 230 | #while game still in progress 231 | while(status == 1): 232 | #We are in state S 233 | #Let's run our Q function on S to get Q values for all possible actions 234 | qval = model.predict(state, batch_size=1) 235 | if (random.random() < epsilon): #choose random action 236 | action = np.random.randint(0,4) #assumes 4 different actions 237 | else: #choose best action from Q(s,a) values 238 | action = (np.argmax(qval)) 239 | #Take action, observe new state S' 240 | new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step) 241 | #Observe reward 242 | reward = get_reward(new_state, time_step, action, price_data, signal, terminal_state) 243 | 244 | #Experience replay storage 245 | if (len(replay) < buffer): #if buffer not filled, add to it 246 | replay.append((state, action, reward, new_state)) 247 | #print(time_step, reward, terminal_state) 248 | else: #if buffer full, overwrite old values 249 | if (h < (buffer-1)): 250 | h += 1 251 | else: 252 | h = 0 253 | replay[h] = (state, action, reward, new_state) 254 | #randomly sample our experience replay memory 255 | minibatch = random.sample(replay, batchSize) 256 | X_train = [] 257 | y_train = [] 258 | for memory in minibatch: 259 | #Get max_Q(S',a) 260 | old_state, action, reward, new_state = memory 261 | old_qval = model.predict(old_state, batch_size=1) 262 | newQ = model.predict(new_state, batch_size=1) 263 | maxQ = np.max(newQ) 264 | y = np.zeros((1,4)) 265 | y[:] = old_qval[:] 266 | if terminal_state == 0: #non-terminal state 267 | update = (reward + (gamma * maxQ)) 268 | else: #terminal state 269 | update = reward 270 | y[0][action] = update 271 | #print(time_step, reward, terminal_state) 272 | X_train.append(old_state) 273 | y_train.append(y.reshape(4,)) 274 | 275 | X_train = np.squeeze(np.array(X_train), axis=(1)) 276 | y_train = np.array(y_train) 277 | model.fit(X_train, y_train, batch_size=batchSize, nb_epoch=1, verbose=0) 278 | 279 | state = new_state 280 | if terminal_state == 1: #if reached terminal state, update epoch status 281 | status = 0 282 | eval_reward = evaluate_Q(test_data, model, price_data, i) 283 | learning_progress.append((eval_reward)) 284 | print("Epoch #: %s Reward: %f Epsilon: %f" % (i,eval_reward, epsilon)) 285 | #learning_progress.append((reward)) 286 | if epsilon > 0.1: #decrement epsilon over time 287 | epsilon -= (1.0/epochs) 288 | 289 | 290 | elapsed = np.round(timeit.default_timer() - start_time, decimals=2) 291 | print("Completed in %f" % (elapsed,)) 292 | 293 | bt = twp.Backtest(pd.Series(data=[x[0,0] for x in xdata]), signal, signalType='shares') 294 | bt.data['delta'] = bt.data['shares'].diff().fillna(0) 295 | 296 | print(bt.data) 297 | unique, counts = np.unique(filter(lambda v: v==v, signal.values), return_counts=True) 298 | print(np.asarray((unique, counts)).T) 299 | 300 | plt.figure() 301 | plt.subplot(3,1,1) 302 | bt.plotTrades() 303 | plt.subplot(3,1,2) 304 | bt.pnl.plot(style='x-') 305 | plt.subplot(3,1,3) 306 | plt.plot(learning_progress) 307 | 308 | plt.savefig('plt/summary'+'.png', bbox_inches='tight', pad_inches=1, dpi=72) 309 | #plt.show() 310 | 311 | 312 | --------------------------------------------------------------------------------