├── LICENSE
├── README.md
├── backtest.py
├── ex1-self_learning_quant.py
├── ex2-self_learning_quant.py
└── ex3-self_learning_quant.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 danielzak
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # sl-quant
2 | Companion code for the "Self Learning Quant" medium post, read the entire post here: https://medium.com/@danielzakrisson/the-self-learning-quant-d3329fcc9915#.3b4ghaoa7
3 | 


--------------------------------------------------------------------------------
/backtest.py:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------------------------------
  2 | # Name:        backtest
  3 | # Purpose:     perform routine backtesting  tasks. 
  4 | #              This module should be useable as a stand-alone library outide of the TWP package.
  5 | #
  6 | # Author:      Jev Kuznetsov
  7 | #
  8 | # Created:     03/07/2014
  9 | # Copyright:   (c) Jev Kuznetsov 2013
 10 | # Licence:     BSD
 11 | #-------------------------------------------------------------------------------
 12 | 
 13 | import pandas as pd
 14 | import matplotlib.pyplot as plt
 15 | import sys
 16 | import numpy as np
 17 | 
 18 | 
 19 | 
 20 | 
 21 | def tradeBracket(price,entryBar,upper=None, lower=None, timeout=None):
 22 |     '''
 23 |     trade a  bracket on price series, return price delta and exit bar #
 24 |     Input
 25 |     ------
 26 |         price : numpy array of price values
 27 |         entryBar: entry bar number, *determines entry price*
 28 |         upper : high stop
 29 |         lower : low stop
 30 |         timeout : max number of periods to hold
 31 | 
 32 |     Returns exit price  and number of bars held
 33 | 
 34 |     '''
 35 |     assert isinstance(price, np.ndarray) , 'price must be a numpy array'
 36 |     
 37 |     
 38 |     # create list of exit indices and add max trade duration. Exits are relative to entry bar
 39 |     if timeout: # set trade length to timeout or series length
 40 |         exits = [min(timeout,len(price)-entryBar-1)]
 41 |     else:
 42 |         exits = [len(price)-entryBar-1] 
 43 |         
 44 |     p = price[entryBar:entryBar+exits[0]+1] # subseries of price
 45 |     
 46 |     # extend exits list with conditional exits
 47 |     # check upper bracket
 48 |     if upper:
 49 |         assert upper>p[0] , 'Upper bracket must be higher than entry price '
 50 |         idx = np.where(p>upper)[0] # find where price is higher than the upper bracket
 51 |         if idx.any(): 
 52 |             exits.append(idx[0]) # append first occurence
 53 |     # same for lower bracket
 54 |     if lower:
 55 |         assert lower<p[0] , 'Lower bracket must be lower than entry price '
 56 |         idx = np.where(p<lower)[0]
 57 |         if idx.any(): 
 58 |             exits.append(idx[0]) 
 59 |    
 60 |     
 61 |     exitBar = min(exits) # choose first exit    
 62 |   
 63 |     
 64 | 
 65 |     return p[exitBar], exitBar
 66 | 
 67 | 
 68 | class Backtest(object):
 69 |     """
 70 |     Backtest class, simple vectorized one. Works with pandas objects.
 71 |     """
 72 |     
 73 |     def __init__(self,price, signal, signalType='capital',initialCash = 0, roundShares=True):
 74 |         """
 75 |         Arguments:
 76 |         
 77 |         *price*  Series with instrument price.
 78 |         *signal* Series with capital to invest (long+,short-) or number of shares. 
 79 |         *sitnalType* capital to bet or number of shares 'capital' mode is default.
 80 |         *initialCash* starting cash. 
 81 |         *roundShares* round off number of shares to integers
 82 |         
 83 |         """
 84 |         
 85 |         #TODO: add auto rebalancing
 86 |         
 87 |         # check for correct input
 88 |         assert signalType in ['capital','shares'], "Wrong signal type provided, must be 'capital' or 'shares'"
 89 |         
 90 |         #save internal settings to a dict
 91 |         self.settings = {'signalType':signalType}
 92 |         
 93 |         # first thing to do is to clean up the signal, removing nans and duplicate entries or exits
 94 |         self.signal = signal.ffill().fillna(0)
 95 |         
 96 |         # now find dates with a trade
 97 |         tradeIdx = self.signal.diff().fillna(0) !=0 # days with trades are set to True
 98 |         if signalType == 'shares':
 99 |             self.trades = self.signal[tradeIdx] # selected rows where tradeDir changes value. trades are in Shares
100 |         elif signalType =='capital':
101 |             self.trades = (self.signal[tradeIdx]/price[tradeIdx])
102 |             if roundShares:
103 |                 self.trades = self.trades.round()
104 |         
105 |         # now create internal data structure 
106 |         self.data = pd.DataFrame(index=price.index , columns = ['price','shares','value','cash','pnl'])
107 |         self.data['price'] = price
108 |         
109 |         self.data['shares'] = self.trades.reindex(self.data.index).ffill().fillna(0)
110 |         self.data['value'] = self.data['shares'] * self.data['price']
111 |        
112 |         delta = self.data['shares'].diff() # shares bought sold
113 |         
114 |         self.data['cash'] = (-delta*self.data['price']).fillna(0).cumsum()+initialCash
115 |         self.data['pnl'] = self.data['cash']+self.data['value']-initialCash
116 |       
117 |       
118 |     @property
119 |     def sharpe(self):
120 |         ''' return annualized sharpe ratio of the pnl '''
121 |         pnl = (self.data['pnl'].diff()).shift(-1)[self.data['shares']!=0] # use only days with position.
122 |         return sharpe(pnl)  # need the diff here as sharpe works on daily returns.
123 |         
124 |     @property
125 |     def pnl(self):
126 |         '''easy access to pnl data column '''
127 |         return self.data['pnl']
128 |     
129 |     def plotTrades(self):
130 |         """ 
131 |         visualise trades on the price chart 
132 |             long entry : green triangle up
133 |             short entry : red triangle down
134 |             exit : black circle
135 |         """
136 |         l = ['price']
137 |         
138 |         p = self.data['price']
139 |         p.plot(style='x-')
140 |         
141 |         # ---plot markers
142 |         # this works, but I rather prefer colored markers for each day of position rather than entry-exit signals
143 | #         indices = {'g^': self.trades[self.trades > 0].index , 
144 | #                    'ko':self.trades[self.trades == 0].index, 
145 | #                    'rv':self.trades[self.trades < 0].index}
146 | #        
147 | #         
148 | #         for style, idx in indices.iteritems():
149 | #             if len(idx) > 0:
150 | #                 p[idx].plot(style=style)
151 |         
152 |         # --- plot trades
153 |         #colored line for long positions
154 |         idx = (self.data['shares'] > 0) | (self.data['shares'] > 0).shift(1) 
155 |         if idx.any():
156 |             p[idx].plot(style='go')
157 |             l.append('long')
158 |         
159 |         #colored line for short positions    
160 |         idx = (self.data['shares'] < 0) | (self.data['shares'] < 0).shift(1) 
161 |         if idx.any():
162 |             p[idx].plot(style='ro')
163 |             l.append('short')
164 | 
165 |         plt.xlim([p.index[0],p.index[-1]]) # show full axis
166 |         
167 |         plt.legend(l,loc='best')
168 |         plt.title('trades')
169 |         
170 |         
171 | class ProgressBar:
172 |     def __init__(self, iterations):
173 |         self.iterations = iterations
174 |         self.prog_bar = '[]'
175 |         self.fill_char = '*'
176 |         self.width = 50
177 |         self.__update_amount(0)
178 | 
179 |     def animate(self, iteration):
180 |         print '\r',self,
181 |         sys.stdout.flush()
182 |         self.update_iteration(iteration + 1)
183 | 
184 |     def update_iteration(self, elapsed_iter):
185 |         self.__update_amount((elapsed_iter / float(self.iterations)) * 100.0)
186 |         self.prog_bar += '  %d of %s complete' % (elapsed_iter, self.iterations)
187 | 
188 |     def __update_amount(self, new_amount):
189 |         percent_done = int(round((new_amount / 100.0) * 100.0))
190 |         all_full = self.width - 2
191 |         num_hashes = int(round((percent_done / 100.0) * all_full))
192 |         self.prog_bar = '[' + self.fill_char * num_hashes + ' ' * (all_full - num_hashes) + ']'
193 |         pct_place = (len(self.prog_bar) // 2) - len(str(percent_done))
194 |         pct_string = '%d%%' % percent_done
195 |         self.prog_bar = self.prog_bar[0:pct_place] + \
196 |             (pct_string + self.prog_bar[pct_place + len(pct_string):])
197 |     def __str__(self):
198 |         return str(self.prog_bar)
199 |     
200 | def sharpe(pnl):
201 |     return  np.sqrt(250)*pnl.mean()/pnl.std()
202 | 
203 | 


--------------------------------------------------------------------------------
/ex1-self_learning_quant.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | np.random.seed(1335)  # for reproducibility
  5 | np.set_printoptions(precision=5, suppress=True, linewidth=150)
  6 | 
  7 | import pandas as pd
  8 | import backtest as twp
  9 | from matplotlib import pyplot as plt
 10 | 
 11 | from sklearn import metrics, preprocessing
 12 | 
 13 | '''
 14 | Name:        The Self Learning Quant, Example 1
 15 | 
 16 | Author:      Daniel Zakrisson
 17 | 
 18 | Created:     30/03/2016
 19 | Copyright:   (c) Daniel Zakrisson 2016
 20 | Licence:     BSD
 21 | 
 22 | Requirements:
 23 | Numpy
 24 | Pandas
 25 | MatplotLib
 26 | scikit-learn
 27 | Keras, https://keras.io/
 28 | backtest.py from the TWP library. Download backtest.py and put in the same folder
 29 | 
 30 | /plt create a subfolder in the same directory where plot files will be saved
 31 | 
 32 | '''
 33 | 
 34 | #Load data
 35 | def load_data():
 36 |     price = np.arange(200/10.0) #linearly increasing prices
 37 |     return price
 38 | 
 39 | #Initialize first state, all items are placed deterministically
 40 | def init_state(data):
 41 |     
 42 |     close = data
 43 |     diff = np.diff(data)
 44 |     diff = np.insert(diff, 0, 0)
 45 |     
 46 |     #--- Preprocess data
 47 |     xdata = np.column_stack((close, diff))
 48 |     xdata = np.nan_to_num(xdata)
 49 |     scaler = preprocessing.StandardScaler()
 50 |     xdata = scaler.fit_transform(xdata)
 51 |     
 52 |     state = xdata[0:1, :]
 53 |     return state, xdata
 54 | 
 55 | #Take Action
 56 | def take_action(state, xdata, action, signal, time_step):
 57 |     #this should generate a list of trade signals that at evaluation time are fed to the backtester
 58 |     #the backtester should get a list of trade signals and a list of price data for the assett
 59 |     
 60 |     #make necessary adjustments to state and then return it
 61 |     time_step += 1
 62 |     
 63 |     #if the current iteration is the last state ("terminal state") then set terminal_state to 1
 64 |     if time_step == xdata.shape[0]:
 65 |         state = xdata[time_step-1:time_step, :]
 66 |         terminal_state = 1
 67 |         signal.loc[time_step] = 0
 68 |         return state, time_step, signal, terminal_state
 69 | 
 70 |     #move the market data window one step forward
 71 |     state = xdata[time_step-1:time_step, :]
 72 |     #take action
 73 |     if action != 0:
 74 |         if action == 1:
 75 |             signal.loc[time_step] = 100
 76 |         elif action == 2:
 77 |             signal.loc[time_step] = -100
 78 |         elif action == 3:
 79 |             signal.loc[time_step] = 0
 80 |     terminal_state = 0
 81 | 
 82 |     return state, time_step, signal, terminal_state
 83 | 
 84 | #Get Reward, the reward is returned at the end of an episode
 85 | def get_reward(new_state, time_step, action, xdata, signal, terminal_state, epoch=0):
 86 |     reward = 0
 87 |     signal.fillna(value=0, inplace=True)
 88 |     if terminal_state == 0:
 89 |         #get reward for the most current action
 90 |         if signal[time_step] != signal[time_step-1] and terminal_state == 0:
 91 |             i=1
 92 |             while signal[time_step-i] == signal[time_step-1-i] and time_step - 1 - i > 0:
 93 |                 i += 1
 94 |             reward = (xdata[time_step-1, 0] - xdata[time_step - i-1, 0]) * signal[time_step - 1]*-100 + i*np.abs(signal[time_step - 1])/10.0
 95 |         if signal[time_step] == 0 and signal[time_step - 1] == 0:
 96 |             reward -= 10
 97 | 
 98 |     #calculate the reward for all actions if the last iteration in set
 99 |     if terminal_state == 1:
100 |         #run backtest, send list of trade signals and asset data to backtest function
101 |         bt = twp.Backtest(pd.Series(data=[x[0] for x in xdata]), signal, signalType='shares')
102 |         reward = bt.pnl.iloc[-1]
103 | 
104 |     return reward
105 | 
106 | def evaluate_Q(eval_data, eval_model):
107 |     #This function is used to evaluate the perofrmance of the system each epoch, without the influence of epsilon and random actions
108 |     signal = pd.Series(index=np.arange(len(eval_data)))
109 |     state, xdata = init_state(eval_data)
110 |     status = 1
111 |     terminal_state = 0
112 |     time_step = 1
113 |     while(status == 1):
114 |         #We start in state S
115 |         #Run the Q function on S to get predicted reward values on all the possible actions
116 |         qval = eval_model.predict(state.reshape(1,2), batch_size=1)
117 |         action = (np.argmax(qval))
118 |         #Take action, observe new state S'
119 |         new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step)
120 |         #Observe reward
121 |         eval_reward = get_reward(new_state, time_step, action, xdata, signal, terminal_state, i)
122 |         state = new_state
123 |         if terminal_state == 1: #terminal state
124 |             status = 0
125 |     return eval_reward
126 | 
127 | #This neural network is the the Q-function, run it like this:
128 | #model.predict(state.reshape(1,64), batch_size=1)
129 | 
130 | from keras.models import Sequential
131 | from keras.layers.core import Dense, Dropout, Activation
132 | from keras.optimizers import RMSprop
133 | 
134 | model = Sequential()
135 | model.add(Dense(4, init='lecun_uniform', input_shape=(2,)))
136 | model.add(Activation('relu'))
137 | #model.add(Dropout(0.2)) I'm not using dropout in this example
138 | 
139 | model.add(Dense(4, init='lecun_uniform'))
140 | model.add(Activation('relu'))
141 | #model.add(Dropout(0.2))
142 | 
143 | model.add(Dense(4, init='lecun_uniform'))
144 | model.add(Activation('linear')) #linear output so we can have range of real-valued outputs
145 | 
146 | rms = RMSprop()
147 | model.compile(loss='mse', optimizer=rms)
148 | 
149 | 
150 | import random, timeit
151 | 
152 | start_time = timeit.default_timer()
153 | 
154 | indata = load_data()
155 | epochs = 10
156 | gamma = 0.9 #a high gamma makes a long term reward more valuable
157 | epsilon = 1
158 | learning_progress = []
159 | #stores tuples of (S, A, R, S')
160 | h = 0
161 | signal = pd.Series(index=np.arange(len(indata)))
162 | for i in range(epochs):
163 | 
164 |     state, xdata = init_state(indata)
165 |     status = 1
166 |     terminal_state = 0
167 |     time_step = 1
168 |     #while learning is still in progress
169 |     while(status == 1):
170 |         #We start in state S
171 |         #Run the Q function on S to get predicted reward values on all the possible actions
172 |         qval = model.predict(state.reshape(1,2), batch_size=1)
173 |         if (random.random() < epsilon) and i != epochs - 1: #maybe choose random action if not the last epoch
174 |             action = np.random.randint(0,4) #assumes 4 different actions
175 |         else: #choose best action from Q(s,a) values
176 |             action = (np.argmax(qval))
177 |         #Take action, observe new state S'
178 |         new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step)
179 |         #Observe reward
180 |         reward = get_reward(new_state, time_step, action, xdata, signal, terminal_state, i)
181 |         #Get max_Q(S',a)
182 |         newQ = model.predict(new_state.reshape(1,2), batch_size=1)
183 |         maxQ = np.max(newQ)
184 |         y = np.zeros((1,4))
185 |         y[:] = qval[:]
186 |         if terminal_state == 0: #non-terminal state
187 |             update = (reward + (gamma * maxQ))
188 |         else: #terminal state (means that it is the last state)
189 |             update = reward
190 |         y[0][action] = update #target output
191 |         model.fit(state.reshape(1,2), y, batch_size=1, nb_epoch=1, verbose=0)
192 |         state = new_state
193 |         if terminal_state == 1: #terminal state
194 |             status = 0
195 |     eval_reward = evaluate_Q(indata, model)
196 |     print("Epoch #: %s Reward: %f Epsilon: %f" % (i,eval_reward, epsilon))
197 |     learning_progress.append((eval_reward))
198 |     if epsilon > 0.1:
199 |         epsilon -= (1.0/epochs)
200 | 
201 | elapsed = np.round(timeit.default_timer() - start_time, decimals=2)
202 | print("Completed in %f" % (elapsed,))
203 | 
204 | #plot results
205 | bt = twp.Backtest(pd.Series(data=[x[0] for x in xdata]), signal, signalType='shares')
206 | bt.data['delta'] = bt.data['shares'].diff().fillna(0)
207 | 
208 | print(bt.data)
209 | 
210 | plt.figure()
211 | bt.plotTrades()
212 | plt.suptitle('epoch' + str(i))
213 | plt.savefig('plt/final_trades'+'.png', bbox_inches='tight', pad_inches=1, dpi=72) #assumes there is a ./plt dir
214 | plt.close('all')
215 | 
216 | plt.figure()
217 | plt.subplot(3,1,1)
218 | bt.plotTrades()
219 | plt.subplot(3,1,2)
220 | bt.pnl.plot(style='x-')
221 | plt.subplot(3,1,3)
222 | plt.plot(learning_progress)
223 | 
224 | plt.show()
225 | 
226 | 
227 | 


--------------------------------------------------------------------------------
/ex2-self_learning_quant.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | np.random.seed(1335)  # for reproducibility
  5 | np.set_printoptions(precision=5, suppress=True, linewidth=150)
  6 | 
  7 | import pandas as pd
  8 | import backtest as twp
  9 | from matplotlib import pyplot as plt
 10 | 
 11 | from sklearn import metrics, preprocessing
 12 | 
 13 | '''
 14 | Name:        The Self Learning Quant, Example 2
 15 | 
 16 | Author:      Daniel Zakrisson
 17 | 
 18 | Created:     30/03/2016
 19 | Copyright:   (c) Daniel Zakrisson 2016
 20 | Licence:     BSD
 21 | 
 22 | Requirements:
 23 | Numpy
 24 | Pandas
 25 | MatplotLib
 26 | scikit-learn
 27 | Keras, https://keras.io/
 28 | backtest.py from the TWP library. Download backtest.py and put in the same folder
 29 | 
 30 | /plt create a subfolder in the same directory where plot files will be saved
 31 | 
 32 | '''
 33 | 
 34 | #Load data
 35 | def load_data():
 36 |     price = np.sin(np.arange(200)/30.0) #sine prices
 37 |     return price
 38 | 
 39 | #Initialize first state, all items are placed deterministically
 40 | def init_state(data):
 41 |     
 42 |     close = data
 43 |     diff = np.diff(data)
 44 |     diff = np.insert(diff, 0, 0)
 45 |     
 46 |     #--- Preprocess data
 47 |     xdata = np.column_stack((close, diff))
 48 |     xdata = np.nan_to_num(xdata)
 49 |     scaler = preprocessing.StandardScaler()
 50 |     xdata = scaler.fit_transform(xdata)
 51 |     
 52 |     state = xdata[0:1, :]
 53 |     return state, xdata
 54 | 
 55 | #Take Action
 56 | def take_action(state, xdata, action, signal, time_step):
 57 |     #this should generate a list of trade signals that at evaluation time are fed to the backtester
 58 |     #the backtester should get a list of trade signals and a list of price data for the assett
 59 |     
 60 |     #make necessary adjustments to state and then return it
 61 |     time_step += 1
 62 |     
 63 |     #if the current iteration is the last state ("terminal state") then set terminal_state to 1
 64 |     if time_step == xdata.shape[0]:
 65 |         state = xdata[time_step-1:time_step, :]
 66 |         terminal_state = 1
 67 |         signal.loc[time_step] = 0
 68 |         return state, time_step, signal, terminal_state
 69 | 
 70 |     #move the market data window one step forward
 71 |     state = xdata[time_step-1:time_step, :]
 72 |     #take action
 73 |     if action != 0:
 74 |         if action == 1:
 75 |             signal.loc[time_step] = 100
 76 |         elif action == 2:
 77 |             signal.loc[time_step] = -100
 78 |         elif action == 3:
 79 |             signal.loc[time_step] = 0
 80 |     terminal_state = 0
 81 | 
 82 |     return state, time_step, signal, terminal_state
 83 | 
 84 | #Get Reward, the reward is returned at the end of an episode
 85 | def get_reward(new_state, time_step, action, xdata, signal, terminal_state, epoch=0):
 86 |     reward = 0
 87 |     signal.fillna(value=0, inplace=True)
 88 |     if terminal_state == 0:
 89 |         #get reward for the most current action
 90 |         if signal[time_step] != signal[time_step-1] and terminal_state == 0:
 91 |             i=1
 92 |             while signal[time_step-i] == signal[time_step-1-i] and time_step - 1 - i > 0:
 93 |                 i += 1
 94 |             reward = (xdata[time_step-1, 0] - xdata[time_step - i-1, 0]) * signal[time_step - 1]*-100 + i*np.abs(signal[time_step - 1])/10.0
 95 |         if signal[time_step] == 0 and signal[time_step - 1] == 0:
 96 |             reward -= 10
 97 | 
 98 |     #calculate the reward for all actions if the last iteration in set
 99 |     if terminal_state == 1:
100 |         #run backtest, send list of trade signals and asset data to backtest function
101 |         bt = twp.Backtest(pd.Series(data=[x[0] for x in xdata]), signal, signalType='shares')
102 |         reward = bt.pnl.iloc[-1]
103 | 
104 |     return reward
105 | 
106 | def evaluate_Q(eval_data, eval_model):
107 |     #This function is used to evaluate the perofrmance of the system each epoch, without the influence of epsilon and random actions
108 |     signal = pd.Series(index=np.arange(len(eval_data)))
109 |     state, xdata = init_state(eval_data)
110 |     status = 1
111 |     terminal_state = 0
112 |     time_step = 1
113 |     while(status == 1):
114 |         #We start in state S
115 |         #Run the Q function on S to get predicted reward values on all the possible actions
116 |         qval = eval_model.predict(state.reshape(1,2), batch_size=1)
117 |         action = (np.argmax(qval))
118 |         #Take action, observe new state S'
119 |         new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step)
120 |         #Observe reward
121 |         eval_reward = get_reward(new_state, time_step, action, xdata, signal, terminal_state, i)
122 |         state = new_state
123 |         if terminal_state == 1: #terminal state
124 |             status = 0
125 |     return eval_reward
126 | 
127 | #This neural network is the the Q-function, run it like this:
128 | #model.predict(state.reshape(1,64), batch_size=1)
129 | 
130 | from keras.models import Sequential
131 | from keras.layers.core import Dense, Dropout, Activation
132 | from keras.optimizers import RMSprop
133 | 
134 | model = Sequential()
135 | model.add(Dense(4, init='lecun_uniform', input_shape=(2,)))
136 | model.add(Activation('relu'))
137 | #model.add(Dropout(0.2)) I'm not using dropout in this example
138 | 
139 | model.add(Dense(4, init='lecun_uniform'))
140 | model.add(Activation('relu'))
141 | #model.add(Dropout(0.2))
142 | 
143 | model.add(Dense(4, init='lecun_uniform'))
144 | model.add(Activation('linear')) #linear output so we can have range of real-valued outputs
145 | 
146 | rms = RMSprop()
147 | model.compile(loss='mse', optimizer=rms)
148 | 
149 | 
150 | import random, timeit
151 | 
152 | start_time = timeit.default_timer()
153 | 
154 | indata = load_data()
155 | epochs = 100
156 | gamma = 0.9 #a high gamma makes a long term reward more valuable
157 | epsilon = 1
158 | learning_progress = []
159 | #stores tuples of (S, A, R, S')
160 | h = 0
161 | signal = pd.Series(index=np.arange(len(indata)))
162 | for i in range(epochs):
163 | 
164 |     state, xdata = init_state(indata)
165 |     status = 1
166 |     terminal_state = 0
167 |     time_step = 1
168 |     #while learning is still in progress
169 |     while(status == 1):
170 |         #We start in state S
171 |         #Run the Q function on S to get predicted reward values on all the possible actions
172 |         qval = model.predict(state.reshape(1,2), batch_size=1)
173 |         if (random.random() < epsilon) and i != epochs - 1: #maybe choose random action if not the last epoch
174 |             action = np.random.randint(0,4) #assumes 4 different actions
175 |         else: #choose best action from Q(s,a) values
176 |             action = (np.argmax(qval))
177 |         #Take action, observe new state S'
178 |         new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step)
179 |         #Observe reward
180 |         reward = get_reward(new_state, time_step, action, xdata, signal, terminal_state, i)
181 |         #Get max_Q(S',a)
182 |         newQ = model.predict(new_state.reshape(1,2), batch_size=1)
183 |         maxQ = np.max(newQ)
184 |         y = np.zeros((1,4))
185 |         y[:] = qval[:]
186 |         if terminal_state == 0: #non-terminal state
187 |             update = (reward + (gamma * maxQ))
188 |         else: #terminal state (means that it is the last state)
189 |             update = reward
190 |         y[0][action] = update #target output
191 |         model.fit(state.reshape(1,2), y, batch_size=1, nb_epoch=1, verbose=0)
192 |         state = new_state
193 |         if terminal_state == 1: #terminal state
194 |             status = 0
195 |     eval_reward = evaluate_Q(indata, model)
196 |     print("Epoch #: %s Reward: %f Epsilon: %f" % (i,eval_reward, epsilon))
197 |     learning_progress.append((eval_reward))
198 |     if epsilon > 0.1:
199 |         epsilon -= (1.0/epochs)
200 | 
201 | elapsed = np.round(timeit.default_timer() - start_time, decimals=2)
202 | print("Completed in %f" % (elapsed,))
203 | 
204 | #plot results
205 | bt = twp.Backtest(pd.Series(data=[x[0] for x in xdata]), signal, signalType='shares')
206 | bt.data['delta'] = bt.data['shares'].diff().fillna(0)
207 | 
208 | print(bt.data)
209 | 
210 | plt.figure()
211 | bt.plotTrades()
212 | plt.suptitle('epoch' + str(i))
213 | plt.savefig('plt/final_trades'+'.png', bbox_inches='tight', pad_inches=1, dpi=72) #assumes there is a ./plt dir
214 | plt.close('all')
215 | 
216 | plt.figure()
217 | plt.subplot(3,1,1)
218 | bt.plotTrades()
219 | plt.subplot(3,1,2)
220 | bt.pnl.plot(style='x-')
221 | plt.subplot(3,1,3)
222 | plt.plot(learning_progress)
223 | 
224 | plt.show()


--------------------------------------------------------------------------------
/ex3-self_learning_quant.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | np.random.seed(1335)  # for reproducibility
  6 | np.set_printoptions(precision=5, suppress=True, linewidth=150)
  7 | 
  8 | import pandas as pd
  9 | import backtest as twp
 10 | from matplotlib import pyplot as plt
 11 | from sklearn import metrics, preprocessing
 12 | from talib.abstract import *
 13 | from sklearn.externals import joblib
 14 | 
 15 | import Quandl
 16 | 
 17 | '''
 18 | Name:        The Self Learning Quant, Example 3
 19 | 
 20 | Author:      Daniel Zakrisson
 21 | 
 22 | Created:     30/03/2016
 23 | Copyright:   (c) Daniel Zakrisson 2016
 24 | Licence:     BSD
 25 | 
 26 | Requirements:
 27 | Numpy
 28 | Pandas
 29 | MatplotLib
 30 | scikit-learn
 31 | TA-Lib, instructions at https://mrjbq7.github.io/ta-lib/install.html
 32 | Keras, https://keras.io/
 33 | Quandl, https://www.quandl.com/tools/python
 34 | backtest.py from the TWP library. Download backtest.py and put in the same folder
 35 | 
 36 | /plt create a subfolder in the same directory where plot files will be saved
 37 | 
 38 | '''
 39 | 
 40 | #Load data
 41 | def read_convert_data(symbol='XBTEUR'):
 42 |     if symbol == 'XBTEUR':
 43 |         prices = Quandl.get("BCHARTS/KRAKENEUR")
 44 |         prices.to_pickle('data/XBTEUR_1day.pkl') # a /data folder must exist
 45 |     if symbol == 'EURUSD_1day':
 46 |         #prices = Quandl.get("ECB/EURUSD")
 47 |         prices = pd.read_csv('data/EURUSD_1day.csv',sep=",", skiprows=0, header=0, index_col=0, parse_dates=True, names=['ticker', 'date', 'time', 'open', 'low', 'high', 'close'])
 48 |         prices.to_pickle('data/EURUSD_1day.pkl')
 49 |     print(prices)
 50 |     return
 51 | 
 52 | def load_data(test=False):
 53 |     #prices = pd.read_pickle('data/OILWTI_1day.pkl')
 54 |     #prices = pd.read_pickle('data/EURUSD_1day.pkl')
 55 |     #prices.rename(columns={'Value': 'close'}, inplace=True)
 56 |     prices = pd.read_pickle('data/XBTEUR_1day.pkl')
 57 |     prices.rename(columns={'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume (BTC)': 'volume'}, inplace=True)
 58 |     print(prices)
 59 |     x_train = prices.iloc[-2000:-300,]
 60 |     x_test= prices.iloc[-2000:,]
 61 |     if test:
 62 |         return x_test
 63 |     else:
 64 |         return x_train
 65 | 
 66 | #Initialize first state, all items are placed deterministically
 67 | def init_state(indata, test=False):
 68 |     close = indata['close'].values
 69 |     diff = np.diff(close)
 70 |     diff = np.insert(diff, 0, 0)
 71 |     sma15 = SMA(indata, timeperiod=15)
 72 |     sma60 = SMA(indata, timeperiod=60)
 73 |     rsi = RSI(indata, timeperiod=14)
 74 |     atr = ATR(indata, timeperiod=14)
 75 | 
 76 |     #--- Preprocess data
 77 |     xdata = np.column_stack((close, diff, sma15, close-sma15, sma15-sma60, rsi, atr))
 78 |     
 79 |     xdata = np.nan_to_num(xdata)
 80 |     if test == False:
 81 |         scaler = preprocessing.StandardScaler()
 82 |         xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
 83 |         joblib.dump(scaler, 'data/scaler.pkl')
 84 |     elif test == True:
 85 |         scaler = joblib.load('data/scaler.pkl')
 86 |         xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
 87 |     state = xdata[0:1, 0:1, :]
 88 |     
 89 |     return state, xdata, close
 90 | 
 91 | #Take Action
 92 | def take_action(state, xdata, action, signal, time_step):
 93 |     #this should generate a list of trade signals that at evaluation time are fed to the backtester
 94 |     #the backtester should get a list of trade signals and a list of price data for the assett
 95 |     
 96 |     #make necessary adjustments to state and then return it
 97 |     time_step += 1
 98 |     
 99 |     #if the current iteration is the last state ("terminal state") then set terminal_state to 1
100 |     if time_step + 1 == xdata.shape[0]:
101 |         state = xdata[time_step-1:time_step, 0:1, :]
102 |         terminal_state = 1
103 |         signal.loc[time_step] = 0
104 | 
105 |         return state, time_step, signal, terminal_state
106 | 
107 |     #move the market data window one step forward
108 |     state = xdata[time_step-1:time_step, 0:1, :]
109 |     #take action
110 |     if action == 1:
111 |         signal.loc[time_step] = 100
112 |     elif action == 2:
113 |         signal.loc[time_step] = -100
114 |     else:
115 |         signal.loc[time_step] = 0
116 |     #print(state)
117 |     terminal_state = 0
118 |     #print(signal)
119 | 
120 |     return state, time_step, signal, terminal_state
121 | 
122 | #Get Reward, the reward is returned at the end of an episode
123 | def get_reward(new_state, time_step, action, xdata, signal, terminal_state, eval=False, epoch=0):
124 |     reward = 0
125 |     signal.fillna(value=0, inplace=True)
126 | 
127 |     if eval == False:
128 |         bt = twp.Backtest(pd.Series(data=[x for x in xdata[time_step-2:time_step]], index=signal[time_step-2:time_step].index.values), signal[time_step-2:time_step], signalType='shares')
129 |         reward = ((bt.data['price'].iloc[-1] - bt.data['price'].iloc[-2])*bt.data['shares'].iloc[-1])
130 | 
131 |     if terminal_state == 1 and eval == True:
132 |         #save a figure of the test set
133 |         bt = twp.Backtest(pd.Series(data=[x for x in xdata], index=signal.index.values), signal, signalType='shares')
134 |         reward = bt.pnl.iloc[-1]
135 |         plt.figure(figsize=(3,4))
136 |         bt.plotTrades()
137 |         plt.axvline(x=400, color='black', linestyle='--')
138 |         plt.text(250, 400, 'training data')
139 |         plt.text(450, 400, 'test data')
140 |         plt.suptitle(str(epoch))
141 |         plt.savefig('plt/'+str(epoch)+'.png', bbox_inches='tight', pad_inches=1, dpi=72)
142 |         plt.close('all')
143 |     #print(time_step, terminal_state, eval, reward)
144 | 
145 |     return reward
146 | 
147 | def evaluate_Q(eval_data, eval_model, price_data, epoch=0):
148 |     #This function is used to evaluate the performance of the system each epoch, without the influence of epsilon and random actions
149 |     signal = pd.Series(index=np.arange(len(eval_data)))
150 |     state, xdata, price_data = init_state(eval_data)
151 |     status = 1
152 |     terminal_state = 0
153 |     time_step = 1
154 |     while(status == 1):
155 |         #We start in state S
156 |         #Run the Q function on S to get predicted reward values on all the possible actions
157 |         qval = eval_model.predict(state, batch_size=1)
158 |         action = (np.argmax(qval))
159 |         #Take action, observe new state S'
160 |         new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step)
161 |         #Observe reward
162 |         eval_reward = get_reward(new_state, time_step, action, price_data, signal, terminal_state, eval=True, epoch=epoch)
163 |         state = new_state
164 |         if terminal_state == 1: #terminal state
165 |             status = 0
166 | 
167 |     return eval_reward
168 | 
169 | #This neural network is the the Q-function, run it like this:
170 | #model.predict(state.reshape(1,64), batch_size=1)
171 | 
172 | from keras.models import Sequential
173 | from keras.layers.core import Dense, Dropout, Activation
174 | from keras.layers.recurrent import LSTM
175 | from keras.optimizers import RMSprop, Adam
176 | 
177 | tsteps = 1
178 | batch_size = 1
179 | num_features = 7
180 | 
181 | model = Sequential()
182 | model.add(LSTM(64,
183 |                input_shape=(1, num_features),
184 |                return_sequences=True,
185 |                stateful=False))
186 | model.add(Dropout(0.5))
187 | 
188 | model.add(LSTM(64,
189 |                input_shape=(1, num_features),
190 |                return_sequences=False,
191 |                stateful=False))
192 | model.add(Dropout(0.5))
193 | 
194 | model.add(Dense(4, init='lecun_uniform'))
195 | model.add(Activation('linear')) #linear output so we can have range of real-valued outputs
196 | 
197 | rms = RMSprop()
198 | adam = Adam()
199 | model.compile(loss='mse', optimizer=adam)
200 | 
201 | 
202 | import random, timeit
203 | 
204 | start_time = timeit.default_timer()
205 | 
206 | read_convert_data(symbol='XBTEUR') #run once to read indata, resample and convert to pickle
207 | indata = load_data()
208 | test_data = load_data(test=True)
209 | epochs = 100
210 | gamma = 0.95 #since the reward can be several time steps away, make gamma high
211 | epsilon = 1
212 | batchSize = 100
213 | buffer = 200
214 | replay = []
215 | learning_progress = []
216 | #stores tuples of (S, A, R, S')
217 | h = 0
218 | #signal = pd.Series(index=market_data.index)
219 | signal = pd.Series(index=np.arange(len(indata)))
220 | for i in range(epochs):
221 |     if i == epochs-1: #the last epoch, use test data set
222 |         indata = load_data(test=True)
223 |         state, xdata, price_data = init_state(indata, test=True)
224 |     else:
225 |         state, xdata, price_data = init_state(indata)
226 |     status = 1
227 |     terminal_state = 0
228 |     #time_step = market_data.index[0] + 64 #when using market_data
229 |     time_step = 14
230 |     #while game still in progress
231 |     while(status == 1):
232 |         #We are in state S
233 |         #Let's run our Q function on S to get Q values for all possible actions
234 |         qval = model.predict(state, batch_size=1)
235 |         if (random.random() < epsilon): #choose random action
236 |             action = np.random.randint(0,4) #assumes 4 different actions
237 |         else: #choose best action from Q(s,a) values
238 |             action = (np.argmax(qval))
239 |         #Take action, observe new state S'
240 |         new_state, time_step, signal, terminal_state = take_action(state, xdata, action, signal, time_step)
241 |         #Observe reward
242 |         reward = get_reward(new_state, time_step, action, price_data, signal, terminal_state)
243 | 
244 |         #Experience replay storage
245 |         if (len(replay) < buffer): #if buffer not filled, add to it
246 |             replay.append((state, action, reward, new_state))
247 |             #print(time_step, reward, terminal_state)
248 |         else: #if buffer full, overwrite old values
249 |             if (h < (buffer-1)):
250 |                 h += 1
251 |             else:
252 |                 h = 0
253 |             replay[h] = (state, action, reward, new_state)
254 |             #randomly sample our experience replay memory
255 |             minibatch = random.sample(replay, batchSize)
256 |             X_train = []
257 |             y_train = []
258 |             for memory in minibatch:
259 |                 #Get max_Q(S',a)
260 |                 old_state, action, reward, new_state = memory
261 |                 old_qval = model.predict(old_state, batch_size=1)
262 |                 newQ = model.predict(new_state, batch_size=1)
263 |                 maxQ = np.max(newQ)
264 |                 y = np.zeros((1,4))
265 |                 y[:] = old_qval[:]
266 |                 if terminal_state == 0: #non-terminal state
267 |                     update = (reward + (gamma * maxQ))
268 |                 else: #terminal state
269 |                     update = reward
270 |                 y[0][action] = update
271 |                 #print(time_step, reward, terminal_state)
272 |                 X_train.append(old_state)
273 |                 y_train.append(y.reshape(4,))
274 | 
275 |             X_train = np.squeeze(np.array(X_train), axis=(1))
276 |             y_train = np.array(y_train)
277 |             model.fit(X_train, y_train, batch_size=batchSize, nb_epoch=1, verbose=0)
278 |             
279 |             state = new_state
280 |         if terminal_state == 1: #if reached terminal state, update epoch status
281 |             status = 0
282 |     eval_reward = evaluate_Q(test_data, model, price_data, i)
283 |     learning_progress.append((eval_reward))
284 |     print("Epoch #: %s Reward: %f Epsilon: %f" % (i,eval_reward, epsilon))
285 |     #learning_progress.append((reward))
286 |     if epsilon > 0.1: #decrement epsilon over time
287 |         epsilon -= (1.0/epochs)
288 | 
289 | 
290 | elapsed = np.round(timeit.default_timer() - start_time, decimals=2)
291 | print("Completed in %f" % (elapsed,))
292 | 
293 | bt = twp.Backtest(pd.Series(data=[x[0,0] for x in xdata]), signal, signalType='shares')
294 | bt.data['delta'] = bt.data['shares'].diff().fillna(0)
295 | 
296 | print(bt.data)
297 | unique, counts = np.unique(filter(lambda v: v==v, signal.values), return_counts=True)
298 | print(np.asarray((unique, counts)).T)
299 | 
300 | plt.figure()
301 | plt.subplot(3,1,1)
302 | bt.plotTrades()
303 | plt.subplot(3,1,2)
304 | bt.pnl.plot(style='x-')
305 | plt.subplot(3,1,3)
306 | plt.plot(learning_progress)
307 | 
308 | plt.savefig('plt/summary'+'.png', bbox_inches='tight', pad_inches=1, dpi=72)
309 | #plt.show()
310 | 
311 | 
312 | 


--------------------------------------------------------------------------------