├── README.md
├── bi_lstm_forex.py
├── deep_lstm_forex.py
├── forex.py
├── lstm_forex.py
├── lstm_ts_2.py
├── quant.py
└── svr_ts.py


/README.md:
--------------------------------------------------------------------------------
 1 | # LSTM Forex prediction
 2 | A long term short term memory recurrent neural network to predict forex time series 
 3 | 
 4 | The model can be trained on daily or minute data of any forex pair. The data can be downloaded 
 5 | from [here](http://www.histdata.com/download-free-forex-data/).
 6 | 
 7 | The lstm-rnn should learn to predict the next day or minute based on previous data.
 8 | 
 9 | The neural network is implemented on Theano. 
10 | 
11 | This code is not mantained anymore.
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/bi_lstm_forex.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Bidirection LSTM RNN for forex predictions
  3 | Based on sentiment analysis lstm found in deeplearning tutorials
  4 | '''
  5 | from collections import OrderedDict
  6 | import copy
  7 | import cPickle as pkl
  8 | import random
  9 | import sys
 10 | import time
 11 | import pdb
 12 | import os
 13 | 
 14 | import numpy
 15 | import theano
 16 | import theano.tensor as tensor
 17 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 18 | from theano.ifelse import ifelse
 19 | 
 20 | 
 21 | from forex import read_data, prepare_data
 22 | 
 23 | #### rectified linear unit
 24 | def ReLU(x):
 25 |     y = tensor.maximum(0.0, x)
 26 |     return(y)
 27 | 
 28 | def get_minibatches_idx(n, minibatch_size, shuffle=False):
 29 |     """
 30 |     Used to shuffle the dataset at each iteration.
 31 |     """
 32 | 
 33 |     idx_list = numpy.arange(n, dtype="int32")
 34 | 
 35 |     if shuffle:
 36 |         random.shuffle(idx_list)
 37 | 
 38 |     minibatches = []
 39 |     minibatch_start = 0
 40 |     for i in range(n // minibatch_size):
 41 |         minibatches.append(idx_list[minibatch_start:
 42 |                                     minibatch_start + minibatch_size])
 43 |         minibatch_start += minibatch_size
 44 | 
 45 |     if (minibatch_start != n):
 46 |         # Make a minibatch out of what is left
 47 |         minibatches.append(idx_list[minibatch_start:])
 48 | 
 49 |     return zip(range(len(minibatches)), minibatches)
 50 | 
 51 | def zipp(params, tparams):
 52 |     """
 53 |     When we reload the model. Needed for the GPU stuff.
 54 |     """
 55 |     for kk, vv in params.iteritems():
 56 |         tparams[kk].set_value(vv)
 57 | 
 58 | 
 59 | def unzip(zipped):
 60 |     """
 61 |     When we pickle the model. Needed for the GPU stuff.
 62 |     """
 63 |     new_params = OrderedDict()
 64 |     for kk, vv in zipped.iteritems():
 65 |         new_params[kk] = vv.get_value()
 66 |     return new_params
 67 | 
 68 | 
 69 | def dropout_layer(state_before, use_noise, trng):
 70 |     proj = tensor.switch(use_noise,
 71 |                          (state_before *
 72 |                           trng.binomial(state_before.shape,
 73 |                                         p=0.5, n=1,
 74 |                                         dtype=state_before.dtype)),
 75 |                          state_before * 0.5)
 76 |     return proj
 77 | 
 78 | 
 79 | def _p(pp, name):
 80 |     return '%s_%s' % (pp, name)
 81 | 
 82 | 
 83 | def init_params(options):
 84 |     """
 85 |     Global (not LSTM) parameter. For the embeding and the classifier.
 86 |     """
 87 |     params = OrderedDict()
 88 |     # embedding
 89 |     randn = numpy.random.rand(options['n_input'],
 90 |                               options['dim_proj'])
 91 |     params['Wemb'] = (0.01 * randn).astype('float32')
 92 |     params = get_layer(options['encoder'])[0](options,
 93 |                                               params,
 94 |                                               prefix=options['encoder'])
 95 |     # classifier
 96 |     params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
 97 |                                             options['ydim']).astype('float32')
 98 |     params['b'] = numpy.zeros((options['ydim'],)).astype('float32')
 99 | 
100 |     return params
101 | 
102 | 
103 | def load_params(path, params):
104 |     pp = numpy.load(path)
105 |     for kk, vv in params.iteritems():
106 |         if kk not in pp:
107 |             raise Warning('%s is not in the archive' % kk)
108 |         params[kk] = pp[kk]
109 | 
110 |     return params
111 | 
112 | 
113 | def init_tparams(params):
114 |     tparams = OrderedDict()
115 |     for kk, pp in params.iteritems():
116 |         tparams[kk] = theano.shared(params[kk], name=kk)
117 |     return tparams
118 | 
119 | 
120 | def get_layer(name):
121 |     fns = layers[name]
122 |     return fns
123 | 
124 | 
125 | def ortho_weight(ndim):
126 |     W = numpy.random.randn(ndim, ndim)
127 |     u, s, v = numpy.linalg.svd(W)
128 |     return u.astype('float32')
129 | 
130 | 
131 | def param_init_lstm(options, params, prefix='lstm'):
132 |     """
133 |     Init the LSTM parameter:
134 | 
135 |     :see: init_params
136 |     """
137 |     W = numpy.concatenate([ortho_weight(options['dim_proj']),
138 |                            ortho_weight(options['dim_proj']),
139 |                            ortho_weight(options['dim_proj']),
140 |                            ortho_weight(options['dim_proj'])], axis=1)
141 |     params[_p(prefix, 'W')] = W.astype('float32')
142 |     U = numpy.concatenate([ortho_weight(options['dim_proj']),
143 |                            ortho_weight(options['dim_proj']),
144 |                            ortho_weight(options['dim_proj']),
145 |                            ortho_weight(options['dim_proj'])], axis=1)
146 |     params[_p(prefix, 'U')] = U.astype('float32')
147 |     b = numpy.zeros((4 * options['dim_proj'],))
148 |     params[_p(prefix, 'b')] = b.astype('float32')
149 | 
150 |     return params
151 | 
152 | 
153 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
154 |     nsteps = state_below.shape[0]
155 |     if state_below.ndim == 3:
156 |         n_samples = state_below.shape[1]
157 |     else:
158 |         n_samples = 1
159 | 
160 |     #assert mask is not None
161 | 
162 |     def _slice(_x, n, dim):
163 |         if _x.ndim == 3:
164 |             return _x[:, :, n*dim:(n+1)*dim]
165 |         return _x[:, n*dim:(n+1)*dim]
166 | 
167 |     def _step(x_, h_, c_):
168 |         preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
169 |         preact += x_
170 |         preact += tparams[_p(prefix, 'b')]
171 | 
172 |         i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
173 |         f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
174 |         o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
175 |         c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
176 | 
177 |         c = f * c_ + i * c
178 |         #c = m_[:, None] * c + (1. - m_)[:, None] * c_
179 | 
180 |         h = o * tensor.tanh(c)
181 |         #TODO: I think this don't apply since is made to avoid sequences smaller tan max_len
182 |         #h = m_[:, None] * h + (1. - m_)[:, None] * h_
183 | 
184 |         return h, c
185 | 
186 |     state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
187 |                    tparams[_p(prefix, 'b')])
188 | 
189 |     dim_proj = options['dim_proj']
190 |     rval, updates = theano.scan(_step,
191 |                                 sequences=[state_below],
192 |                                 outputs_info=[tensor.alloc(0., n_samples,
193 |                                                            dim_proj),
194 |                                               tensor.alloc(0., n_samples,
195 |                                                            dim_proj)],
196 |                                 name=_p(prefix, '_layers'),
197 |                                 n_steps=nsteps)
198 |     return rval[0]
199 | 
200 | 
201 | # ff: Feed Forward (normal neural net), only useful to put after lstm
202 | #     before the classifier.
203 | layers = {'lstm': (param_init_lstm, lstm_layer)}
204 | 
205 | def mom_sgd(lr, tparams, grads, x, rx, y, cost):
206 |     """ Stochastic Gradient Descent
207 | 
208 |     :note: A more complicated version of sgd then needed.  This is
209 |         done like that for adadelta and rmsprop.
210 | 
211 |     """
212 | 
213 |     updates = OrderedDict()
214 | 
215 |     mom = tensor.scalar(name='mom')
216 |     gmomshared = [theano.shared(p.get_value(), name='%s_mom_grad' %k)
217 |         for k,p in tparams.iteritems()]
218 | 
219 |     # New set of shared variable that will contain the gradient
220 |     # for a mini-batch.
221 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
222 |                for k, p in tparams.iteritems()]
223 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
224 | 
225 |     # Function that computes gradients for a mini-batch, but do not
226 |     # updates the weights.
227 |     f_grad_shared = theano.function([x,rx, y], cost, updates=gsup,
228 |                                     name='sgd_f_grad_shared')
229 | 
230 |     for gm,gp in zip(gmomshared,gshared):
231 |         updates[gm] = mom*gm - (1.0 - mom) * lr * gp
232 |     #gmomup = [(gm, mom*gm - (1.0 - mom) * lr * gp) for gm,gp in
233 |     #    zip(gmomshared, gshared)]
234 |     
235 |     #pup = [(p, p + gm) for p, gm in zip(tparams.values(), gmomup)]
236 |     for p,gm in zip(tparams.values(), gmomshared):
237 |         updates[p] = p + updates[gm]
238 | 
239 |     # Function that updates the weights from the previously computed
240 |     # gradient.
241 |     f_update = theano.function([lr,mom], [], updates=updates,
242 |                                name='sgd_f_update')
243 | 
244 |     return f_grad_shared, f_update
245 | 
246 | 
247 | def sgd(lr, tparams, grads, x, rx, y, cost):
248 |     """ Stochastic Gradient Descent
249 | 
250 |     :note: A more complicated version of sgd then needed.  This is
251 |         done like that for adadelta and rmsprop.
252 | 
253 |     """
254 |     # New set of shared variable that will contain the gradient
255 |     # for a mini-batch.
256 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
257 |                for k, p in tparams.iteritems()]
258 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
259 | 
260 |     # Function that computes gradients for a mini-batch, but do not
261 |     # updates the weights.
262 |     f_grad_shared = theano.function([x,rx, y], cost, updates=gsup,
263 |                                     name='sgd_f_grad_shared')
264 | 
265 |     pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
266 | 
267 |     # Function that updates the weights from the previously computed
268 |     # gradient.
269 |     f_update = theano.function([lr], [], updates=pup,
270 |                                name='sgd_f_update')
271 | 
272 |     return f_grad_shared, f_update
273 | 
274 | 
275 | def adadelta(lr, tparams, grads, x,rx,y, cost):
276 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
277 |                                   name='%s_grad' % k)
278 |                     for k, p in tparams.iteritems()]
279 |     running_up2 = [theano.shared(p.get_value() * numpy.float32(0.),
280 |                                  name='%s_rup2' % k)
281 |                    for k, p in tparams.iteritems()]
282 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
283 |                                     name='%s_rgrad2' % k)
284 |                       for k, p in tparams.iteritems()]
285 | 
286 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
287 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
288 |              for rg2, g in zip(running_grads2, grads)]
289 | 
290 |     f_grad_shared = theano.function([x,rx, y], cost, updates=zgup+rg2up,
291 |                                     name='adadelta_f_grad_shared')
292 | 
293 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
294 |              for zg, ru2, rg2 in zip(zipped_grads,
295 |                                      running_up2,
296 |                                      running_grads2)]
297 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
298 |              for ru2, ud in zip(running_up2, updir)]
299 |     param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
300 | 
301 |     f_update = theano.function([lr], [], updates=ru2up+param_up,
302 |                                on_unused_input='ignore',
303 |                                name='adadelta_f_update',
304 |                                mode='DebugMode')
305 | 
306 |     return f_grad_shared, f_update
307 | 
308 | 
309 | def rmsprop(lr, tparams, grads, x, rx, y, cost):
310 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
311 |                                   name='%s_grad' % k)
312 |                     for k, p in tparams.iteritems()]
313 |     running_grads = [theano.shared(p.get_value() * numpy.float32(0.),
314 |                                    name='%s_rgrad' % k)
315 |                      for k, p in tparams.iteritems()]
316 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
317 |                                     name='%s_rgrad2' % k)
318 |                       for k, p in tparams.iteritems()]
319 | 
320 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
321 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
322 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
323 |              for rg2, g in zip(running_grads2, grads)]
324 | 
325 |     f_grad_shared = theano.function([x,rx, y], cost,
326 |                                     updates=zgup + rgup + rg2up,
327 |                                     name='rmsprop_f_grad_shared')
328 | 
329 |     updir = [theano.shared(p.get_value() * numpy.float32(0.),
330 |                            name='%s_updir' % k)
331 |              for k, p in tparams.iteritems()]
332 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
333 |                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
334 |                                             running_grads2)]
335 |     param_up = [(p, p + udn[1])
336 |                 for p, udn in zip(tparams.values(), updir_new)]
337 |     f_update = theano.function([lr], [], updates=updir_new+param_up,
338 |                                on_unused_input='ignore',
339 |                                name='rmsprop_f_update')
340 | 
341 |     return f_grad_shared, f_update
342 | 
343 | 
344 | def build_model(tparams, options):
345 |     trng = RandomStreams(1234)
346 | 
347 |     # Used for dropout.
348 |     use_noise = theano.shared(numpy.float32(0.))
349 | 
350 |     x = tensor.tensor3('x', dtype='float32')
351 |     rx = tensor.tensor3('rx', dtype='float32')
352 |     #mask = tensor.matrix('mask', dtype='float32')
353 |     y = tensor.vector('y', dtype='float32')
354 | 
355 |     n_timesteps = x.shape[0]
356 |     n_samples = x.shape[1]
357 |     n_dim = x.shape[2]
358 | 
359 |     lstm_outs = []
360 |     for inp in [x, rx]:
361 |         emb = tensor.dot(inp,tparams['Wemb'])
362 |         #emb = tensor.nnet.sigmoid(emb)
363 |         #emb = ReLU(emb)
364 | 
365 |         if options['use_dropout']:
366 |             emb = dropout_layer(emb, use_noise, trng)
367 | 
368 |         proj = get_layer(options['encoder'])[1](tparams, emb, options,
369 |                                                 prefix=options['encoder']
370 |                                                 )
371 |         
372 | 
373 |         if options['encoder'] == 'lstm' and options['sum_pool'] == True:
374 |             proj = proj.sum(axis=0)
375 |             proj = proj / options['n_iter'] 
376 |         else:
377 |             proj = proj[-1]
378 |         lstm_outs.append(proj)
379 | 
380 |     del proj
381 |     #if options['use_dropout']:
382 |     #    proj = dropout_layer(proj, use_noise, trng)
383 | 
384 |     #pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U'])+tparams['b'])
385 |     #pred = tensor.nnet.sigmoid(tensor.dot(proj, tparams['U'])\
386 |     #        + tparams['b'])
387 |     pred = tensor.dot(tensor.concatenate(lstm_outs), tparams['U']) + tparams['b']
388 | 
389 |     pred = pred.reshape((2,pred.shape[0]/2, pred.shape[1])).mean(axis=0)
390 |     f_pred_prob = theano.function([x,rx], pred, name='f_pred_prob')
391 |     #f_pred = theano.function(x, pred.argmax(axis=1), name='f_pred')
392 | 
393 |     cost = tensor.mean((y-pred.T)**2)
394 | 
395 |     #cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean()
396 | 
397 |     return use_noise, x, rx, y, f_pred_prob, cost
398 | 
399 | 
400 | def pred_probs(f_pred, prepare_data, data, model_options, verbose=False):
401 |     """ If you want to use a trained model, this is useful to compute
402 |     the probabilities of new examples.
403 |     """
404 |     n_samples = len(data)
405 | 
406 |     x,y = prepare_data(data, numpy.array([]), 
407 |                                 model_options['n_iter'], model_options['n_input'])
408 |     rx,_ = prepare_data(data[:][::-1], numpy.array([]),
409 |                                 model_options['n_iter'], model_options['n_input'])
410 | 
411 |     pred = f_pred(x,rx) 
412 | 
413 |     return pred
414 | 
415 | 
416 | def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False):
417 |     """
418 |     Just compute the error
419 |     f_pred: Theano fct computing the prediction
420 |     prepare_data: usual prepare_data for that dataset.
421 |     """
422 |     valid_err = 0
423 |     for _, valid_index in iterator:
424 |         # TODO: This is not very efficient I should check
425 |         x,  y = prepare_data([data[0][t] for t in valid_index],
426 |                                   numpy.array(data[1])[valid_index],
427 |                                   model_options['n_iter'],model_options['n_input'])
428 |         rx,  _ = prepare_data([data[0][t][::-1] for t in valid_index],
429 |                                   numpy.array(data[1])[valid_index],
430 |                                   model_options['n_iter'],model_options['n_input'])
431 | 
432 | 
433 | 
434 |         preds = f_pred(x,rx)
435 |         targets = numpy.array(data[1])[valid_index]
436 |         valid_err += tensor.sum((targets-preds.T)**2)
437 |     #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
438 |     valid_err = valid_err / len(data[0])    
439 | 
440 |     return valid_err.eval()
441 | 
442 | 
443 | def R_score(f_pred, prepare_data, data, iterator, model_options, verbose=False):
444 |     """
445 |     Just compute the error
446 |     f_pred: Theano fct computing the prediction
447 |     prepare_data: usual prepare_data for that dataset.
448 |     """
449 |     valid_err = 0
450 |     denom = 0
451 |     data_mean = numpy.array(data[1]).mean()
452 |     for _, valid_index in iterator:
453 |         # TODO: This is not very efficient I should check
454 |         x,  y = prepare_data([data[0][t] for t in valid_index],
455 |                                   numpy.array(data[1])[valid_index],
456 |                                   model_options['n_iter'],model_options['n_input'])
457 |         rx,  _ = prepare_data([data[0][t][::-1] for t in valid_index],
458 |                                   numpy.array(data[1])[valid_index],
459 |                                   model_options['n_iter'],model_options['n_input'])
460 | 
461 | 
462 |         preds = f_pred(x,rx)
463 |         targets = numpy.array(data[1])[valid_index]
464 |         valid_err += tensor.sum((targets-preds.T)**2)
465 |         denom += ((numpy.array(data[1]) - data_mean)**2).sum()
466 |     #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
467 |     valid_err = 1. - (valid_err / denom) 
468 | 
469 |     return valid_err.eval()
470 | 
471 | 
472 | def backforecast(f_pred, data, model_options):
473 |     """
474 |     Compute the amount of times in which
475 |     the RNN correctly predict a up or 
476 |     down trend
477 |     """
478 |     # TODO: Use the prepare data
479 |     x, y = prepare_data(data[0], data[1], model_options['n_iter'],
480 |         model_options['n_input'])
481 |     rx, _ = prepare_data(data[0][:][::-1], data[1], model_options['n_iter'],
482 |         model_options['n_input'])
483 | 
484 | 
485 | 
486 |     targets =  (y > x[-1,:,0])
487 |     #TODO: not need for this asarray
488 |     preds = f_pred(numpy.asarray(x,dtype='float32'),rx)
489 |     preds_up = (preds[:,0] > x[-1,:,0])
490 |     err = (targets <> preds_up).sum()
491 | 
492 |     ret = float(err) / float(len(data[0]))
493 |     return ret
494 | 
495 | def predict_lstm(input, model_options):
496 |     
497 |     params = init_params(model_options)
498 | 
499 |     load_params(model_options['saveto'], params)
500 | 
501 |     # This create Theano Shared Variable from the parameters.
502 |     # Dict name (string) -> Theano Tensor Shared Variable
503 |     # params and tparams have different copy of the weights.
504 |     tparams = init_tparams(params)
505 | 
506 |     # use_noise is for dropout
507 |     (use_noise, x, rx,
508 |      y, f_pred_prob, cost) = build_model(tparams, model_options)
509 | 
510 |     preds = pred_probs(f_pred_prob, prepare_data, input, model_options)
511 | 
512 |     return preds
513 | 
514 | 
515 | def train_lstm(
516 |     dim_proj=32,  # word embeding dimension and LSTM number of hidden units.
517 |     patience=10,  # Number of epoch to wait before early stop if no progress
518 |     max_epochs=150,  # The maximum number of epoch to run
519 |     dispFreq=40,  # Display to stdout the training progress every N updates
520 |     decay_c=0.,  # Weight decay for the classifier applied to the U weights.
521 |     lrate=0.1,  # Learning rate for sgd (not used for adadelta and rmsprop)
522 |     n_input = 4,  # Vocabulary size
523 |     optimizer=mom_sgd,  # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
524 |     encoder='lstm',  # TODO: can be removed must be lstm.
525 |     validFreq=20,  # Compute the validation error after this number of update.
526 |     saveFreq=20,  # Save the parameters after every saveFreq updates
527 |     maxlen=100,  # Sequence longer then this get ignored
528 |     batch_size=50,  # The batch size during training.
529 |     valid_batch_size=64,  # The batch size used for validation/test set.
530 |     exchange='AUDJPY',
531 | 
532 |     # Parameter for extra option
533 |     noise_std=0.,
534 |     use_dropout=False,  # if False slightly faster, but worst test error
535 |                        # This frequently need a bigger model.
536 |     reload_model="",  # Path to a saved model we want to start from.
537 |     sum_pool = False,
538 |     mom_start = 0.5,
539 |     mom_end = 0.99,
540 |     mom_epoch_interval = 300,
541 |     learning_rate_decay=0.99995,
542 |     #learning_rate_decay=0.98,
543 |     predict=False,
544 |     input_pred=None
545 | ):
546 | 
547 |     model_path = "/user/j/jgpavez/rnn_trading/models/"
548 |     data_path = "/user/j/jgpavez/rnn_trading/data/"
549 |     
550 |     saveto = exchange + '_model.npz'
551 |     params_file = exchange + '_params.npz'
552 |     dataset = exchange + '_hour.csv'
553 | 
554 |     saveto = os.path.join(model_path, saveto)
555 |     params_file = os.path.join(data_path, params_file)
556 | 
557 |     ydim = 1
558 |     n_iter = 10
559 | 
560 |     # Model options
561 |     model_options = locals().copy()
562 | 
563 |     if predict == True:
564 |         return predict_lstm(input_pred, model_options)
565 | 
566 | 
567 |     print "model options", model_options
568 | 
569 |     print 'Loading data'
570 |     train, valid, test, mean, std = read_data(max_len=n_iter, path=dataset, params_file=params_file)
571 | 
572 |     #YDIM??
573 |     #number of labels (output)
574 | 
575 |     theano.config.optimizer = 'None'
576 | 
577 |     print 'Building model'
578 |     # This create the initial parameters as numpy ndarrays.
579 |     # Dict name (string) -> numpy ndarray
580 |     params = init_params(model_options)
581 | 
582 |     if reload_model:
583 |         load_params(saveto, params)
584 | 
585 |     # This create Theano Shared Variable from the parameters.
586 |     # Dict name (string) -> Theano Tensor Shared Variable
587 |     # params and tparams have different copy of the weights.
588 |     tparams = init_tparams(params)
589 | 
590 |     # use_noise is for dropout
591 |     (use_noise, x, rx,
592 |      y, f_pred_prob, cost) = build_model(tparams, model_options)
593 | 
594 |     if decay_c > 0.:
595 |         decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
596 |         weight_decay = 0.
597 |         weight_decay += (tparams['U']**2).sum()
598 |         weight_decay *= decay_c
599 |         cost += weight_decay
600 | 
601 |     f_cost = theano.function([x,rx,y], cost, name='f_cost')
602 | 
603 |     grads = tensor.grad(cost, wrt=tparams.values())
604 |     f_grad = theano.function([x,rx,y], grads, name='f_grad')
605 | 
606 |     lr = tensor.scalar(name='lr')
607 |     f_grad_shared, f_update = optimizer(lr, tparams, grads,
608 |                              x,rx, y, cost)
609 | 
610 |     print 'Optimization'
611 | 
612 | 
613 |     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size,
614 |                                    shuffle=True)
615 |     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size,
616 |                                   shuffle=True)
617 | 
618 |     print "%d train examples" % len(train[0])
619 |     print "%d valid examples" % len(valid[0])
620 |     print "%d test examples" % len(test[0])
621 |     history_errs = []
622 |     best_p = None
623 |     bad_count = 0
624 | 
625 |     if validFreq == -1:
626 |         validFreq = len(train[0])/batch_size
627 |     if saveFreq == -1:
628 |         saveFreq = len(train[0])/batch_size
629 | 
630 |     uidx = 0  # the number of update done
631 |     estop = False  # early stop
632 |     start_time = time.clock()
633 |     mom = 0
634 | 
635 |     try:
636 |         for eidx in xrange(max_epochs):
637 |             n_samples = 0
638 | 
639 |             # Get new shuffled index for the training set.
640 |             kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
641 | 
642 |             if eidx < model_options['mom_epoch_interval']:
643 |                 mom = model_options['mom_start']*\
644 |                 (1.0 - eidx/model_options['mom_epoch_interval'])\
645 |                   + mom_end*(eidx/model_options['mom_epoch_interval'])
646 |             else:
647 |                 mom = mom_end
648 | 
649 |             for _, train_index in kf:
650 |                 uidx += 1
651 |                 use_noise.set_value(1.)
652 | 
653 |                 # Select the random examples for this minibatch
654 |                 y = [train[1][t] for t in train_index]
655 |                 x = [train[0][t]for t in train_index]
656 | 
657 |                 # Get the data in numpy.ndarray formet.
658 |                 # It return something of the shape (minibatch maxlen, n samples)
659 |                 rx,_ = prepare_data(x[:][::-1],y,model_options['n_iter'],model_options['n_input'])
660 |                 x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input'])
661 | 
662 | 
663 |                 if x is None:
664 |                     print 'Minibatch with zero sample under length ', maxlen
665 |                     continue
666 |                 n_samples += x.shape[1]
667 |                 cost = f_grad_shared(x,rx, y)
668 |                 f_update(lrate,mom)
669 | 
670 |                 #decay
671 |                 #TODO: CHECK THIS LEARNING RATE
672 |                 #lrate = learning_rate_decay*lrate
673 | 
674 |                 if numpy.isnan(cost) or numpy.isinf(cost):
675 |                     print 'NaN detected'
676 |                     return 1., 1., 1.
677 | 
678 |                 if numpy.mod(uidx, dispFreq) == 0:
679 |                     print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost
680 | 
681 |             #decay
682 |             #TODO: CHECK THIS LEARNING RATE
683 |             lrate = learning_rate_decay*lrate
684 |             if numpy.mod(eidx, validFreq) == 0:
685 |                 use_noise.set_value(0.)
686 |                 #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
687 |                 valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
688 |                 test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)
689 |                 bckfr_err = backforecast(f_pred_prob, test, model_options)
690 | 
691 |                 history_errs.append([valid_err, test_err])
692 | 
693 |                 if (eidx == 0 or
694 |                     test_err <= numpy.array(history_errs)[:,
695 |                                                            1].min()):
696 | 
697 |                     best_p = unzip(tparams)
698 |                     bad_counter = 0
699 | 
700 |                 print ('Valid ', valid_err,
701 |                        'Test ', test_err, 
702 |                        'Backfore ', bckfr_err)
703 | 
704 |                 if (len(history_errs) > patience and
705 |                     valid_err >= numpy.array(history_errs)[:-patience,
706 |                                                            0].min()):
707 |                     bad_counter += 1
708 |                     if bad_counter > patience:
709 |                         print 'Early Stop!'
710 |                         estop = True
711 |                         break
712 | 
713 | 
714 |             if numpy.mod(eidx, saveFreq) == 0:
715 |                 print 'Saving...',
716 | 
717 |                 if best_p is not None:
718 |                     params = best_p
719 |                 else:
720 |                     params = unzip(tparams)
721 |                 numpy.savez(saveto, history_errs=history_errs, **params)
722 |                 pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
723 |                 print 'Done'
724 | 
725 | 
726 |             print 'Seen %d samples' % n_samples
727 | 
728 |             if estop:
729 |                 break
730 | 
731 |     except KeyboardInterrupt:
732 |         print "Training interupted"
733 | 
734 |     end_time = time.clock()
735 |     if best_p is not None:
736 |         zipp(best_p, tparams)
737 |     else:
738 |         best_p = unzip(tparams)
739 | 
740 |     use_noise.set_value(0.)
741 |     #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
742 |     valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
743 |     test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)
744 |     bckfr_err = backforecast(f_pred_prob, test, model_options)
745 | 
746 |     print 'Valid ', valid_err, 'Test ', test_err, 'Backforecasting ', bckfr_err
747 | 
748 |     numpy.savez(saveto, train_err=train_err,
749 |                 valid_err=valid_err, test_err=test_err,
750 |                 history_errs=history_errs, **best_p)
751 |     print 'The code run for %d epochs, with %f sec/epochs' % (
752 |         (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
753 |     print >> sys.stderr, ('Training took %.1fs' %
754 |                           (end_time - start_time))
755 |     return train_err, valid_err, test_err
756 | 
757 | 
758 | if __name__ == '__main__':
759 | 
760 |     # We must have floatX=float32 for this tutorial to work correctly.
761 |     theano.config.floatX = "float32"
762 |     # The next line is the new Theano default. This is a speed up.
763 |     #theano.config.scan.allow_gc = False
764 |     exchange = 'AUDJPY'
765 |     if len(sys.argv) == 2:
766 |         exchange = sys.argv[1]
767 | 
768 |     # See function train for all possible parameter and there definition.
769 |     train_lstm(
770 |         #reload_model="lstm_model.npz",
771 |         exchange=exchange,
772 |         max_epochs=162,
773 |     )
774 | 
775 | 


--------------------------------------------------------------------------------
/deep_lstm_forex.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Deep LSTM RNN for forex predictions
  3 | Based on sentiment analysis lstm found in deeplearning tutorials
  4 | '''
  5 | from collections import OrderedDict
  6 | import copy
  7 | import cPickle as pkl
  8 | import random
  9 | import sys
 10 | import time
 11 | import pdb
 12 | import os
 13 | 
 14 | import numpy
 15 | import theano
 16 | import theano.tensor as tensor
 17 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 18 | from theano.ifelse import ifelse
 19 | 
 20 | 
 21 | from forex import read_data, prepare_data
 22 | 
 23 | #### rectified linear unit
 24 | def ReLU(x):
 25 |     y = tensor.maximum(0.0, x)
 26 |     return(y)
 27 | 
 28 | def get_minibatches_idx(n, minibatch_size, shuffle=False):
 29 |     """
 30 |     Used to shuffle the dataset at each iteration.
 31 |     """
 32 | 
 33 |     idx_list = numpy.arange(n, dtype="int32")
 34 | 
 35 |     if shuffle:
 36 |         random.shuffle(idx_list)
 37 | 
 38 |     minibatches = []
 39 |     minibatch_start = 0
 40 |     for i in range(n // minibatch_size):
 41 |         minibatches.append(idx_list[minibatch_start:
 42 |                                     minibatch_start + minibatch_size])
 43 |         minibatch_start += minibatch_size
 44 | 
 45 |     if (minibatch_start != n):
 46 |         # Make a minibatch out of what is left
 47 |         minibatches.append(idx_list[minibatch_start:])
 48 | 
 49 |     return zip(range(len(minibatches)), minibatches)
 50 | 
 51 | def zipp(params, tparams):
 52 |     """
 53 |     When we reload the model. Needed for the GPU stuff.
 54 |     """
 55 |     for kk, vv in params.iteritems():
 56 |         tparams[kk].set_value(vv)
 57 | 
 58 | 
 59 | def unzip(zipped):
 60 |     """
 61 |     When we pickle the model. Needed for the GPU stuff.
 62 |     """
 63 |     new_params = OrderedDict()
 64 |     for kk, vv in zipped.iteritems():
 65 |         new_params[kk] = vv.get_value()
 66 |     return new_params
 67 | 
 68 | 
 69 | def dropout_layer(state_before, use_noise, trng):
 70 |     proj = tensor.switch(use_noise,
 71 |                          (state_before *
 72 |                           trng.binomial(state_before.shape,
 73 |                                         p=0.5, n=1,
 74 |                                         dtype=state_before.dtype)),
 75 |                          state_before * 0.5)
 76 |     return proj
 77 | 
 78 | 
 79 | def _p(pp, name):
 80 |     return '%s_%s' % (pp, name)
 81 | 
 82 | 
 83 | def init_params(options):
 84 |     """
 85 |     Global (not LSTM) parameter. For the embeding and the classifier.
 86 |     """
 87 |     params = OrderedDict()
 88 |     # embedding
 89 |     randn = numpy.random.rand(options['n_input'],
 90 |                               options['dim_proj'])
 91 |     params['Wemb'] = (0.01 * randn).astype('float32')
 92 |     params = get_layer(options['encoder'])[0](options,
 93 |                                               params,
 94 |                                               prefix=options['encoder'])
 95 |     # classifier
 96 |     params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
 97 |                                             options['ydim']).astype('float32')
 98 |     params['b'] = numpy.zeros((options['ydim'],)).astype('float32')
 99 | 
100 |     return params
101 | 
102 | 
103 | def load_params(path, params):
104 |     pp = numpy.load(path)
105 |     for kk, vv in params.iteritems():
106 |         if kk not in pp:
107 |             raise Warning('%s is not in the archive' % kk)
108 |         params[kk] = pp[kk]
109 | 
110 |     return params
111 | 
112 | 
113 | def init_tparams(params):
114 |     tparams = OrderedDict()
115 |     for kk, pp in params.iteritems():
116 |         tparams[kk] = theano.shared(params[kk], name=kk)
117 |     return tparams
118 | 
119 | 
120 | def get_layer(name):
121 |     fns = layers[name]
122 |     return fns
123 | 
124 | 
125 | def ortho_weight(ndim):
126 |     W = numpy.random.randn(ndim, ndim)
127 |     u, s, v = numpy.linalg.svd(W)
128 |     return u.astype('float32')
129 | 
130 | 
131 | def param_init_lstm(options, params, prefix='lstm'):
132 |     """
133 |     Init the LSTM parameter:
134 | 
135 |     :see: init_params
136 |     """
137 |     
138 |     for layer in xrange(options['nlayers']):
139 |         # Asuming all layers of same size
140 |         W = numpy.concatenate([ortho_weight(options['dim_proj']),
141 |                                ortho_weight(options['dim_proj']),
142 |                                ortho_weight(options['dim_proj']),
143 |                                ortho_weight(options['dim_proj'])], axis=1)
144 |         params[_p(prefix, 'W%d'%layer)] = W.astype('float32')
145 |         U = numpy.concatenate([ortho_weight(options['dim_proj']),
146 |                                ortho_weight(options['dim_proj']),
147 |                                ortho_weight(options['dim_proj']),
148 |                                ortho_weight(options['dim_proj'])], axis=1)
149 |         params[_p(prefix, 'U%d'%layer)] = U.astype('float32')
150 |         b = numpy.zeros((4 * options['dim_proj'],))
151 |         params[_p(prefix, 'b%d'%layer)] = b.astype('float32')
152 |        
153 |     return params
154 | 
155 | 
156 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None, nlayers=1):
157 |     nsteps = state_below.shape[0]
158 |     if state_below.ndim == 3:
159 |         n_samples = state_below.shape[1]
160 |     else:
161 |         n_samples = 1
162 | 
163 |     #assert mask is not None
164 | 
165 |     def _slice(_x, n, dim):
166 |         if _x.ndim == 3:
167 |             return _x[:, :, n*dim:(n+1)*dim]
168 |         return _x[:, n*dim:(n+1)*dim]
169 | 
170 |     def _step(x_, h_, c_, param_U, param_b):
171 | 
172 |         preact = tensor.dot(h_, param_U)
173 |         preact += x_
174 |         preact += param_b
175 | 
176 |         i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
177 |         f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
178 |         o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
179 |         c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
180 | 
181 |         c = f * c_ + i * c
182 |         #c = m_[:, None] * c + (1. - m_)[:, None] * c_
183 | 
184 |         h = o * tensor.tanh(c)
185 |         #TODO: I think this don't apply since is made to avoid sequences smaller tan max_len
186 |         #h = m_[:, None] * h + (1. - m_)[:, None] * h_
187 | 
188 |         return h, c
189 | 
190 |     for layer in xrange(nlayers):
191 |         state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W%d'%layer)]) +
192 |                        tparams[_p(prefix, 'b%d'%layer)])
193 | 
194 |         dim_proj = options['dim_proj']
195 |         #TODO: Scan over two sequences one for step and other for number of layer
196 |         rval, updates = theano.scan(_step,
197 |                                     sequences=[state_below],
198 |                                     outputs_info=[tensor.alloc(0.,n_samples,
199 |                                                                dim_proj),
200 |                                                   tensor.alloc(0.,n_samples,
201 |                                                                dim_proj)],
202 |                                     non_sequences=[tparams[_p(prefix,'U%d'%layer)],tparams[_p(prefix,'b%d'%layer)]],
203 |                                     name=_p(prefix, '_layer%d'%layer),
204 |                                     n_steps=nsteps)
205 |         state_below = rval[0]
206 | 
207 |     return rval[0]
208 | 
209 | 
210 | # ff: Feed Forward (normal neural net), only useful to put after lstm
211 | #     before the classifier.
212 | layers = {'lstm': (param_init_lstm, lstm_layer)}
213 | 
214 | def mom_sgd(lr, tparams, grads, x, y, cost):
215 |     """ Stochastic Gradient Descent
216 | 
217 |     :note: A more complicated version of sgd then needed.  This is
218 |         done like that for adadelta and rmsprop.
219 | 
220 |     """
221 | 
222 |     updates = OrderedDict()
223 | 
224 |     mom = tensor.scalar(name='mom')
225 |     gmomshared = [theano.shared(p.get_value(), name='%s_mom_grad' %k)
226 |         for k,p in tparams.iteritems()]
227 | 
228 |     # New set of shared variable that will contain the gradient
229 |     # for a mini-batch.
230 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
231 |                for k, p in tparams.iteritems()]
232 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
233 | 
234 |     # Function that computes gradients for a mini-batch, but do not
235 |     # updates the weights.
236 |     f_grad_shared = theano.function([x, y], cost, updates=gsup,
237 |                                     name='sgd_f_grad_shared')
238 | 
239 |     for gm,gp in zip(gmomshared,gshared):
240 |         updates[gm] = mom*gm - (1.0 - mom) * lr * gp
241 |     #gmomup = [(gm, mom*gm - (1.0 - mom) * lr * gp) for gm,gp in
242 |     #    zip(gmomshared, gshared)]
243 |     
244 |     #pup = [(p, p + gm) for p, gm in zip(tparams.values(), gmomup)]
245 |     for p,gm in zip(tparams.values(), gmomshared):
246 |         updates[p] = p + updates[gm]
247 | 
248 |     # Function that updates the weights from the previously computed
249 |     # gradient.
250 |     f_update = theano.function([lr,mom], [], updates=updates,
251 |                                name='sgd_f_update')
252 | 
253 |     return f_grad_shared, f_update
254 | 
255 | 
256 | def sgd(lr, tparams, grads, x, y, cost):
257 |     """ Stochastic Gradient Descent
258 | 
259 |     :note: A more complicated version of sgd then needed.  This is
260 |         done like that for adadelta and rmsprop.
261 | 
262 |     """
263 |     # New set of shared variable that will contain the gradient
264 |     # for a mini-batch.
265 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
266 |                for k, p in tparams.iteritems()]
267 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
268 | 
269 |     # Function that computes gradients for a mini-batch, but do not
270 |     # updates the weights.
271 |     f_grad_shared = theano.function([x, y], cost, updates=gsup,
272 |                                     name='sgd_f_grad_shared')
273 | 
274 |     pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
275 | 
276 |     # Function that updates the weights from the previously computed
277 |     # gradient.
278 |     f_update = theano.function([lr], [], updates=pup,
279 |                                name='sgd_f_update')
280 | 
281 |     return f_grad_shared, f_update
282 | 
283 | 
284 | def adadelta(lr, tparams, grads, x, y, cost):
285 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
286 |                                   name='%s_grad' % k)
287 |                     for k, p in tparams.iteritems()]
288 |     running_up2 = [theano.shared(p.get_value() * numpy.float32(0.),
289 |                                  name='%s_rup2' % k)
290 |                    for k, p in tparams.iteritems()]
291 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
292 |                                     name='%s_rgrad2' % k)
293 |                       for k, p in tparams.iteritems()]
294 | 
295 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
296 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
297 |              for rg2, g in zip(running_grads2, grads)]
298 | 
299 |     f_grad_shared = theano.function([x, y], cost, updates=zgup+rg2up,
300 |                                     name='adadelta_f_grad_shared')
301 | 
302 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
303 |              for zg, ru2, rg2 in zip(zipped_grads,
304 |                                      running_up2,
305 |                                      running_grads2)]
306 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
307 |              for ru2, ud in zip(running_up2, updir)]
308 |     param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
309 | 
310 |     f_update = theano.function([lr], [], updates=ru2up+param_up,
311 |                                on_unused_input='ignore',
312 |                                name='adadelta_f_update',
313 |                                mode='DebugMode')
314 | 
315 |     return f_grad_shared, f_update
316 | 
317 | 
318 | def rmsprop(lr, tparams, grads, x, y, cost):
319 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
320 |                                   name='%s_grad' % k)
321 |                     for k, p in tparams.iteritems()]
322 |     running_grads = [theano.shared(p.get_value() * numpy.float32(0.),
323 |                                    name='%s_rgrad' % k)
324 |                      for k, p in tparams.iteritems()]
325 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
326 |                                     name='%s_rgrad2' % k)
327 |                       for k, p in tparams.iteritems()]
328 | 
329 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
330 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
331 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
332 |              for rg2, g in zip(running_grads2, grads)]
333 | 
334 |     f_grad_shared = theano.function([x, y], cost,
335 |                                     updates=zgup + rgup + rg2up,
336 |                                     name='rmsprop_f_grad_shared')
337 | 
338 |     updir = [theano.shared(p.get_value() * numpy.float32(0.),
339 |                            name='%s_updir' % k)
340 |              for k, p in tparams.iteritems()]
341 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
342 |                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
343 |                                             running_grads2)]
344 |     param_up = [(p, p + udn[1])
345 |                 for p, udn in zip(tparams.values(), updir_new)]
346 |     f_update = theano.function([lr], [], updates=updir_new+param_up,
347 |                                on_unused_input='ignore',
348 |                                name='rmsprop_f_update')
349 | 
350 |     return f_grad_shared, f_update
351 | 
352 | 
353 | def build_model(tparams, options):
354 |     trng = RandomStreams(1234)
355 | 
356 |     # Used for dropout.
357 |     use_noise = theano.shared(numpy.float32(0.))
358 | 
359 |     x = tensor.tensor3('x', dtype='float32')
360 |     #mask = tensor.matrix('mask', dtype='float32')
361 |     y = tensor.vector('y', dtype='float32')
362 | 
363 |     n_timesteps = x.shape[0]
364 |     n_samples = x.shape[1]
365 |     n_dim = x.shape[2]
366 | 
367 |     emb = tensor.dot(x,tparams['Wemb'])
368 |     #emb = tensor.nnet.sigmoid(emb)
369 |     #emb = ReLU(emb)
370 | 
371 |     if options['use_dropout']:
372 |         emb = dropout_layer(emb, use_noise, trng)
373 | 
374 |     proj = get_layer(options['encoder'])[1](tparams, emb, options,
375 |                                             prefix=options['encoder'],
376 |                                             nlayers=options['nlayers']
377 |                                             )
378 |     
379 | 
380 |     if options['encoder'] == 'lstm' and options['sum_pool'] == True:
381 |         proj = proj.sum(axis=0)
382 |         proj = proj / options['n_iter'] 
383 |     else:
384 |         proj = proj[-1]
385 |     #if options['use_dropout']:
386 |     #    proj = dropout_layer(proj, use_noise, trng)
387 | 
388 |     #pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U'])+tparams['b'])
389 |     #pred = tensor.nnet.sigmoid(tensor.dot(proj, tparams['U'])\
390 |     #        + tparams['b'])
391 |     pred = tensor.dot(proj, tparams['U']) + tparams['b']
392 | 
393 |     f_pred_prob = theano.function([x], pred, name='f_pred_prob')
394 |     #f_pred = theano.function(x, pred.argmax(axis=1), name='f_pred')
395 | 
396 |     cost = tensor.mean((y-pred.T)**2)
397 | 
398 |     #cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean()
399 | 
400 |     return use_noise, x, y, f_pred_prob, cost
401 | 
402 | 
403 | def pred_probs(f_pred, prepare_data, data, model_options, verbose=False):
404 |     """ If you want to use a trained model, this is useful to compute
405 |     the probabilities of new examples.
406 |     """
407 |     n_samples = len(data)
408 | 
409 |     x,y = prepare_data(data, numpy.array([]), 
410 |                                 model_options['n_iter'], model_options['n_input'])
411 |     pred = f_pred(x) 
412 | 
413 |     return pred
414 | 
415 | 
416 | def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False):
417 |     """
418 |     Just compute the error
419 |     f_pred: Theano fct computing the prediction
420 |     prepare_data: usual prepare_data for that dataset.
421 |     """
422 |     valid_err = 0
423 |     for _, valid_index in iterator:
424 |         # TODO: This is not very efficient I should check
425 |         x,  y = prepare_data([data[0][t] for t in valid_index],
426 |                                   numpy.array(data[1])[valid_index],
427 |                                   model_options['n_iter'],model_options['n_input'])
428 | 
429 | 
430 |         preds = f_pred(x)
431 |         targets = numpy.array(data[1])[valid_index]
432 |         valid_err += tensor.sum((targets-preds.T)**2)
433 |     #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
434 |     valid_err = valid_err / len(data[0])    
435 | 
436 |     return valid_err.eval()
437 | 
438 | 
439 | def R_score(f_pred, prepare_data, data, iterator, model_options, verbose=False):
440 |     """
441 |     Just compute the error
442 |     f_pred: Theano fct computing the prediction
443 |     prepare_data: usual prepare_data for that dataset.
444 |     """
445 |     valid_err = 0
446 |     denom = 0
447 |     data_mean = numpy.array(data[1]).mean()
448 |     for _, valid_index in iterator:
449 |         # TODO: This is not very efficient I should check
450 |         x,  y = prepare_data([data[0][t] for t in valid_index],
451 |                                   numpy.array(data[1])[valid_index],
452 |                                   model_options['n_iter'],model_options['n_input'])
453 | 
454 | 
455 |         preds = f_pred(x)
456 |         targets = numpy.array(data[1])[valid_index]
457 |         valid_err += tensor.sum((targets-preds.T)**2)
458 |         denom += ((numpy.array(data[1]) - data_mean)**2).sum()
459 |     #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
460 |     valid_err = 1. - (valid_err / denom) 
461 | 
462 |     return valid_err.eval()
463 | 
464 | 
465 | def backforecast(f_pred, data, model_options):
466 |     """
467 |     Compute the amount of times in which
468 |     the RNN correctly predict a up or 
469 |     down trend
470 |     """
471 |     # TODO: Use the prepare data
472 |     x, y = prepare_data(data[0], data[1], model_options['n_iter'],
473 |         model_options['n_input'])
474 | 
475 |     targets =  (y > x[-1,:,0])
476 |     preds = f_pred(numpy.asarray(x,dtype='float32'))
477 |     preds_up = (preds[:,0] > x[-1,:,0])
478 |     err = (targets <> preds_up).sum()
479 | 
480 |     ret = float(err) / float(len(data[0]))
481 |     return ret
482 | 
483 | def predict_lstm(input, model_options):
484 |     
485 |     params = init_params(model_options)
486 | 
487 |     load_params(model_options['saveto'], params)
488 | 
489 |     # This create Theano Shared Variable from the parameters.
490 |     # Dict name (string) -> Theano Tensor Shared Variable
491 |     # params and tparams have different copy of the weights.
492 |     tparams = init_tparams(params)
493 | 
494 |     # use_noise is for dropout
495 |     (use_noise, x,
496 |      y, f_pred_prob, cost) = build_model(tparams, model_options)
497 | 
498 |     preds = pred_probs(f_pred_prob, prepare_data, input, model_options)
499 | 
500 |     return preds
501 | 
502 | 
503 | def train_lstm(
504 |     dim_proj=32,  # word embeding dimension and LSTM number of hidden units.
505 |     patience=10,  # Number of epoch to wait before early stop if no progress
506 |     max_epochs=150,  # The maximum number of epoch to run
507 |     dispFreq=40,  # Display to stdout the training progress every N updates
508 |     decay_c=0.,  # Weight decay for the classifier applied to the U weights.
509 |     lrate=0.1,  # Learning rate for sgd (not used for adadelta and rmsprop)
510 |     n_input = 4,  # Vocabulary size
511 |     optimizer=mom_sgd,  # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
512 |     encoder='lstm',  # TODO: can be removed must be lstm.
513 |     validFreq=20,  # Compute the validation error after this number of update.
514 |     saveFreq=20,  # Save the parameters after every saveFreq updates
515 |     maxlen=100,  # Sequence longer then this get ignored
516 |     batch_size=50,  # The batch size during training.
517 |     valid_batch_size=64,  # The batch size used for validation/test set.
518 |     exchange='AUDJPY',
519 | 
520 |     # Parameter for extra option
521 |     noise_std=0.,
522 |     use_dropout=False,  # if False slightly faster, but worst test error
523 |                        # This frequently need a bigger model.
524 |     reload_model="",  # Path to a saved model we want to start from.
525 |     sum_pool = False,
526 |     mom_start = 0.5,
527 |     mom_end = 0.99,
528 |     mom_epoch_interval = 300,
529 |     learning_rate_decay=0.99995,
530 |     nlayers = 3,
531 |     #learning_rate_decay=0.98,
532 |     predict=False,
533 |     input_pred=None
534 | ):
535 | 
536 |     model_path = "/user/j/jgpavez/rnn_trading/models/"
537 |     data_path = "/user/j/jgpavez/rnn_trading/data/"
538 |     
539 |     saveto = exchange + '_model_deep.npz'
540 |     params_file = exchange + '_params_deep.npz'
541 |     dataset = exchange + '_hour.csv'
542 | 
543 |     saveto = os.path.join(model_path, saveto)
544 |     params_file = os.path.join(data_path, params_file)
545 | 
546 |     ydim = 1
547 |     n_iter = 50
548 | 
549 |     # Model options
550 |     model_options = locals().copy()
551 | 
552 |     if predict == True:
553 |         return predict_lstm(input_pred, model_options)
554 | 
555 | 
556 |     print "model options", model_options
557 | 
558 |     print 'Loading data'
559 |     train, valid, test, mean, std = read_data(max_len=n_iter, path=dataset, params_file=params_file)
560 | 
561 |     #YDIM??
562 |     #number of labels (output)
563 | 
564 |     theano.config.optimizer = 'None'
565 | 
566 |     print 'Building model'
567 |     # This create the initial parameters as numpy ndarrays.
568 |     # Dict name (string) -> numpy ndarray
569 |     params = init_params(model_options)
570 | 
571 |     if reload_model:
572 |         load_params(saveto, params)
573 | 
574 |     # This create Theano Shared Variable from the parameters.
575 |     # Dict name (string) -> Theano Tensor Shared Variable
576 |     # params and tparams have different copy of the weights.
577 |     tparams = init_tparams(params)
578 | 
579 |     # use_noise is for dropout
580 |     (use_noise, x,
581 |      y, f_pred_prob, cost) = build_model(tparams, model_options)
582 | 
583 |     if decay_c > 0.:
584 |         decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
585 |         weight_decay = 0.
586 |         weight_decay += (tparams['U']**2).sum()
587 |         weight_decay *= decay_c
588 |         cost += weight_decay
589 | 
590 |     f_cost = theano.function([x, y], cost, name='f_cost')
591 | 
592 |     grads = tensor.grad(cost, wrt=tparams.values())
593 |     f_grad = theano.function([x, y], grads, name='f_grad')
594 | 
595 |     lr = tensor.scalar(name='lr')
596 |     f_grad_shared, f_update = optimizer(lr, tparams, grads,
597 |                              x, y, cost)
598 | 
599 |     print 'Optimization'
600 | 
601 | 
602 |     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size,
603 |                                    shuffle=True)
604 |     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size,
605 |                                   shuffle=True)
606 | 
607 |     print "%d train examples" % len(train[0])
608 |     print "%d valid examples" % len(valid[0])
609 |     print "%d test examples" % len(test[0])
610 |     history_errs = []
611 |     best_p = None
612 |     bad_count = 0
613 | 
614 |     if validFreq == -1:
615 |         validFreq = len(train[0])/batch_size
616 |     if saveFreq == -1:
617 |         saveFreq = len(train[0])/batch_size
618 | 
619 |     uidx = 0  # the number of update done
620 |     estop = False  # early stop
621 |     start_time = time.clock()
622 |     mom = 0
623 | 
624 |     try:
625 |         for eidx in xrange(max_epochs):
626 |             n_samples = 0
627 | 
628 |             # Get new shuffled index for the training set.
629 |             kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
630 | 
631 |             if eidx < model_options['mom_epoch_interval']:
632 |                 mom = model_options['mom_start']*\
633 |                 (1.0 - eidx/model_options['mom_epoch_interval'])\
634 |                   + mom_end*(eidx/model_options['mom_epoch_interval'])
635 |             else:
636 |                 mom = mom_end
637 | 
638 |             for _, train_index in kf:
639 |                 uidx += 1
640 |                 use_noise.set_value(1.)
641 | 
642 |                 # Select the random examples for this minibatch
643 |                 y = [train[1][t] for t in train_index]
644 |                 x = [train[0][t]for t in train_index]
645 | 
646 |                 # Get the data in numpy.ndarray formet.
647 |                 # It return something of the shape (minibatch maxlen, n samples)
648 |                 x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input'])
649 | 
650 |                 if x is None:
651 |                     print 'Minibatch with zero sample under length ', maxlen
652 |                     continue
653 |                 n_samples += x.shape[1]
654 |                 cost = f_grad_shared(x, y)
655 |                 f_update(lrate,mom)
656 | 
657 |                 #decay
658 |                 #TODO: CHECK THIS LEARNING RATE
659 |                 #lrate = learning_rate_decay*lrate
660 | 
661 |                 if numpy.isnan(cost) or numpy.isinf(cost):
662 |                     print 'NaN detected'
663 |                     return 1., 1., 1.
664 | 
665 |                 if numpy.mod(uidx, dispFreq) == 0:
666 |                     print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost
667 | 
668 |             #decay
669 |             #TODO: CHECK THIS LEARNING RATE
670 |             lrate = learning_rate_decay*lrate
671 |             if numpy.mod(eidx, validFreq) == 0:
672 |                 use_noise.set_value(0.)
673 |                 #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
674 |                 valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
675 |                 test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)
676 |                 bckfr_err = backforecast(f_pred_prob, test, model_options)
677 | 
678 |                 history_errs.append([valid_err, test_err])
679 | 
680 |                 if (eidx == 0 or
681 |                     test_err <= numpy.array(history_errs)[:,
682 |                                                            1].min()):
683 | 
684 |                     best_p = unzip(tparams)
685 |                     bad_counter = 0
686 | 
687 |                 print ('Valid ', valid_err,
688 |                        'Test ', test_err, 
689 |                        'Backfore ', bckfr_err)
690 | 
691 |                 if (len(history_errs) > patience and
692 |                     valid_err >= numpy.array(history_errs)[:-patience,
693 |                                                            0].min()):
694 |                     bad_counter += 1
695 |                     if bad_counter > patience:
696 |                         print 'Early Stop!'
697 |                         estop = True
698 |                         break
699 | 
700 | 
701 |             if numpy.mod(eidx, saveFreq) == 0:
702 |                 print 'Saving...',
703 | 
704 |                 if best_p is not None:
705 |                     params = best_p
706 |                 else:
707 |                     params = unzip(tparams)
708 |                 numpy.savez(saveto, history_errs=history_errs, **params)
709 |                 pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
710 |                 print 'Done'
711 | 
712 | 
713 |             print 'Seen %d samples' % n_samples
714 | 
715 |             if estop:
716 |                 break
717 | 
718 |     except KeyboardInterrupt:
719 |         print "Training interupted"
720 | 
721 |     end_time = time.clock()
722 |     if best_p is not None:
723 |         zipp(best_p, tparams)
724 |     else:
725 |         best_p = unzip(tparams)
726 | 
727 |     use_noise.set_value(0.)
728 |     #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
729 |     valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
730 |     test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)
731 |     bckfr_err = backforecast(f_pred_prob, test, model_options)
732 | 
733 |     print 'Valid ', valid_err, 'Test ', test_err, 'Backforecasting ', bckfr_err
734 | 
735 |     numpy.savez(saveto, train_err=train_err,
736 |                 valid_err=valid_err, test_err=test_err,
737 |                 history_errs=history_errs, **best_p)
738 |     print 'The code run for %d epochs, with %f sec/epochs' % (
739 |         (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
740 |     print >> sys.stderr, ('Training took %.1fs' %
741 |                           (end_time - start_time))
742 |     return train_err, valid_err, test_err
743 | 
744 | 
745 | if __name__ == '__main__':
746 | 
747 |     # We must have floatX=float32 for this tutorial to work correctly.
748 |     theano.config.floatX = "float32"
749 |     # The next line is the new Theano default. This is a speed up.
750 |     #theano.config.scan.allow_gc = False
751 |     exchange = 'AUDJPY'
752 |     if len(sys.argv) == 2:
753 |         exchange = sys.argv[1]
754 | 
755 |     # See function train for all possible parameter and there definition.
756 |     train_lstm(
757 |         #reload_model="lstm_model.npz",
758 |         exchange=exchange,
759 |         max_epochs=162,
760 |     )
761 | 
762 | 


--------------------------------------------------------------------------------
/forex.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |  Code to read from forex data
  3 |  data can be obtained from http://www.histdata.com/download-free-forex-data/
  4 |  both daily data and minute data can be used
  5 | '''
  6 | 
  7 | # Minute data is under implementation
  8 | 
  9 | 
 10 | import os 
 11 | import pdb
 12 | import sys
 13 | 
 14 | import numpy
 15 | 
 16 | import theano
 17 | from numpy import genfromtxt
 18 | from pandas import Series
 19 | import datetime
 20 | import csv
 21 | 
 22 | #import matplotlib.pyplot as plt
 23 | import sklearn.cross_validation as cv
 24 | from sklearn import preprocessing
 25 | 
 26 | 
 27 | def data_preprocessing(data):
 28 |     '''
 29 |     Simple preprocessing of data
 30 |     '''
 31 |     #data = data[51000:]
 32 |     #data = data[350:,:]
 33 |     # Standarization
 34 |     
 35 |     # Compute compound return serie
 36 |     #data = numpy.log(data / numpy.roll(data, 1, axis = 0))
 37 |     #data = data[1:,:]
 38 |     #data = data / numpy.roll(data,1,axis=0)
 39 |     #data = data[1:,:] - 1.
 40 |     
 41 |     print data.shape
 42 | 
 43 |     #print
 44 |     #ts = Series(numpy.ravel(data[:,0]))
 45 |     #ts.plot()
 46 |     #plt.show()
 47 | 
 48 | 
 49 |     mean = data.mean(axis=0)
 50 |     std = data.std(axis=0)
 51 | 
 52 |     data = data - mean
 53 |     data = data/std
 54 | 
 55 |     #Some kind of smoothing??
 56 | 
 57 |     #min_max = preprocessing.MinMaxScaler()
 58 |     #data = min_max.fit_transform(data)
 59 | 
 60 |     #Put between 1 and 0
 61 |     return data,mean,std
 62 | 
 63 | def read_data(path="AUDJPY_hour.csv", dir="/user/j/jgpavez/rnn_trading/data/",
 64 |         max_len=30, valid_portion=0.1, columns=4, up=False, params_file='params.npz',min=False):
 65 | 
 66 |     '''
 67 |     Reading forex data, daily or minute
 68 |     '''
 69 |     path = os.path.join(dir, path)
 70 |     
 71 |     #data = read_csv(path,delimiter=delimiter)
 72 |     data = genfromtxt(path, delimiter=',',skip_header=1)
 73 |     # Adding data bu minute
 74 |     if min == False:
 75 |         date_index = 1
 76 |         values_index = 3
 77 |         hours = data[:,2]
 78 |     else:
 79 |         date_index = 0
 80 |         values_index = 1
 81 |     
 82 |     dates = data[:,date_index]
 83 |     days = numpy.array([datetime.datetime(int(str(date)[0:-2][0:4]),int(str(date)[0:-2][4:6]),
 84 |                     int(str(date)[0:-2][6:8])).weekday() for date in dates])
 85 |     months = numpy.array([datetime.datetime(int(str(date)[0:-2][0:4]),int(str(date)[0:-2][4:6]),
 86 |                     int(str(date)[0:-2][6:8])).month for date in dates])
 87 | 
 88 |     #dates[:,date_index] = days
 89 | 
 90 |     data = data[:,values_index:(values_index+columns)]
 91 | 
 92 |     data,mean,std = data_preprocessing(data)
 93 | 
 94 |     # Save data parameters
 95 |     numpy.savez(params_file, mean=mean, std=std)
 96 | 
 97 |     #x_data = numpy.array([data[i:i+max_len,:] for i in xrange(len(data)-max_len)])
 98 |     #y_data = numpy.array([data[i][-1] for i in xrange(max_len , len(data))])
 99 | 
100 |     # Not consider jumps between days of market closing
101 |     #TODO: Here I'm just considering weekends, have to think about holydays
102 |     x_data = []
103 |     y_data = []
104 |     for i in xrange(len(data)-max_len):
105 |         #TODO: just working for max_len < 24
106 |         if (dates[i+max_len-1] == 4 and dates[i+max_len] <> 4):
107 |             continue
108 |         x_data.append(data[i:i+max_len,:]) 
109 |         y_data.append(data[i+max_len][-1])
110 |     x_data = numpy.array(x_data)
111 |     y_data = numpy.array(y_data)
112 | 
113 |     if up is True:
114 |         y_data = y_data > x_data[:,-1,0]
115 |         y_data = numpy.asarray(y_data, dtype='int64')
116 | 
117 |     # split data into training and test
118 |     train_set_x, test_set_x, train_set_y, test_set_y = cv.train_test_split(x_data, 
119 |     y_data, test_size=0.2, random_state=0)
120 | 
121 |     # split training set into validation set
122 |     n_samples = len(train_set_x)
123 |     sidx = numpy.random.permutation(n_samples)
124 |     n_train = int(numpy.round(n_samples * (1. - valid_portion)))
125 |     valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
126 |     valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
127 |     train_set_x = [train_set_x[s] for s in sidx[:n_train]]
128 |     train_set_y = [train_set_y[s] for s in sidx[:n_train]]
129 | 
130 |     train = (train_set_x, train_set_y)
131 |     valid = (valid_set_x, valid_set_y)
132 |     test = (test_set_x, test_set_y)
133 | 
134 |     return train, valid, test, mean, std 
135 | 
136 | def prepare_data(seqs, labels, steps, x_dim, up=False):
137 | 
138 |     n_samples = len(seqs)
139 |     max_len = steps
140 |     x = numpy.zeros((max_len, n_samples, x_dim)).astype('float32')
141 |     if up is True:
142 |         y = numpy.asarray(labels, dtype='int64')
143 |     else:
144 |         y = numpy.asarray(labels, dtype='float32')
145 | 
146 |     for idx, s in enumerate(seqs):
147 |         x[:,idx,:] = s
148 | 
149 |     return x, y
150 | 
151 | 


--------------------------------------------------------------------------------
/lstm_forex.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | LSTM RNN for forex predictions
  3 | Based on sentiment analysis lstm found in deeplearning tutorials
  4 | '''
  5 | from collections import OrderedDict
  6 | import copy
  7 | import cPickle as pkl
  8 | import random
  9 | import sys
 10 | import time
 11 | import pdb
 12 | import os
 13 | import logging
 14 | 
 15 | import numpy
 16 | import theano
 17 | import theano.tensor as tensor
 18 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 19 | from theano.ifelse import ifelse
 20 | from theano import config
 21 | 
 22 | 
 23 | from forex import read_data, prepare_data
 24 | 
 25 | def numpy_floatX(data):
 26 |     return numpy.asarray(data, dtype=config.floatX)
 27 | 
 28 | #### rectified linear unit
 29 | def ReLU(x):
 30 |     y = tensor.maximum(0.0, x)
 31 |     return(y)
 32 | 
 33 | def get_minibatches_idx(n, minibatch_size, shuffle=False):
 34 |     """
 35 |     Used to shuffle the dataset at each iteration.
 36 |     """
 37 | 
 38 |     idx_list = numpy.arange(n, dtype="int32")
 39 | 
 40 |     if shuffle:
 41 |         random.shuffle(idx_list)
 42 | 
 43 |     minibatches = []
 44 |     minibatch_start = 0
 45 |     for i in range(n // minibatch_size):
 46 |         minibatches.append(idx_list[minibatch_start:
 47 |                                     minibatch_start + minibatch_size])
 48 |         minibatch_start += minibatch_size
 49 | 
 50 |     if (minibatch_start != n):
 51 |         # Make a minibatch out of what is left
 52 |         minibatches.append(idx_list[minibatch_start:])
 53 | 
 54 |     return zip(range(len(minibatches)), minibatches)
 55 | 
 56 | def zipp(params, tparams):
 57 |     """
 58 |     When we reload the model. Needed for the GPU stuff.
 59 |     """
 60 |     for kk, vv in params.iteritems():
 61 |         tparams[kk].set_value(vv)
 62 | 
 63 | 
 64 | def unzip(zipped):
 65 |     """
 66 |     When we pickle the model. Needed for the GPU stuff.
 67 |     """
 68 |     new_params = OrderedDict()
 69 |     for kk, vv in zipped.iteritems():
 70 |         new_params[kk] = vv.get_value()
 71 |     return new_params
 72 | 
 73 | 
 74 | def dropout_layer(state_before, use_noise, trng):
 75 |     proj = tensor.switch(use_noise,
 76 |                          (state_before *
 77 |                           trng.binomial(state_before.shape,
 78 |                                         p=0.5, n=1,
 79 |                                         dtype=state_before.dtype)),
 80 |                          state_before * 0.5)
 81 |     return proj
 82 | 
 83 | 
 84 | def _p(pp, name):
 85 |     return '%s_%s' % (pp, name)
 86 | 
 87 | 
 88 | def init_params(options):
 89 |     """
 90 |     Global (not LSTM) parameter. For the embeding and the classifier.
 91 |     """
 92 |     params = OrderedDict()
 93 |     # embedding
 94 |     randn = numpy.random.rand(options['n_input'],
 95 |                               options['dim_proj'])
 96 |     params['Wemb'] = (0.01 * randn).astype('float32')
 97 |     params = get_layer(options['encoder'])[0](options,
 98 |                                               params,
 99 |                                               prefix=options['encoder'])
100 |     # classifier
101 |     params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
102 |                                             options['ydim']).astype('float32')
103 |     params['b'] = numpy.zeros((options['ydim'],)).astype('float32')
104 | 
105 |     return params
106 | 
107 | 
108 | def load_params(path, params):
109 |     pp = numpy.load(path)
110 |     for kk, vv in params.iteritems():
111 |         if kk not in pp:
112 |             raise Warning('%s is not in the archive' % kk)
113 |         params[kk] = pp[kk]
114 | 
115 |     return params
116 | 
117 | 
118 | def init_tparams(params):
119 |     tparams = OrderedDict()
120 |     for kk, pp in params.iteritems():
121 |         tparams[kk] = theano.shared(params[kk], name=kk)
122 |     return tparams
123 | 
124 | 
125 | def get_layer(name):
126 |     fns = layers[name]
127 |     return fns
128 | 
129 | 
130 | def ortho_weight(ndim):
131 |     W = numpy.random.randn(ndim, ndim)
132 |     u, s, v = numpy.linalg.svd(W)
133 |     return u.astype('float32')
134 | 
135 | 
136 | def param_init_lstm(options, params, prefix='lstm'):
137 |     """
138 |     Init the LSTM parameter:
139 | 
140 |     :see: init_params
141 |     """
142 |     W = numpy.concatenate([ortho_weight(options['dim_proj']),
143 |                            ortho_weight(options['dim_proj']),
144 |                            ortho_weight(options['dim_proj']),
145 |                            ortho_weight(options['dim_proj'])], axis=1)
146 |     params[_p(prefix, 'W')] = W.astype('float32')
147 |     U = numpy.concatenate([ortho_weight(options['dim_proj']),
148 |                            ortho_weight(options['dim_proj']),
149 |                            ortho_weight(options['dim_proj']),
150 |                            ortho_weight(options['dim_proj'])], axis=1)
151 |     params[_p(prefix, 'U')] = U.astype('float32')
152 |     b = numpy.zeros((4 * options['dim_proj'],))
153 |     params[_p(prefix, 'b')] = b.astype('float32')
154 | 
155 |     return params
156 | 
157 | 
158 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
159 |     nsteps = state_below.shape[0]
160 |     if state_below.ndim == 3:
161 |         n_samples = state_below.shape[1]
162 |     else:
163 |         n_samples = 1
164 | 
165 |     #assert mask is not None
166 | 
167 |     def _slice(_x, n, dim):
168 |         if _x.ndim == 3:
169 |             return _x[:, :, n*dim:(n+1)*dim]
170 |         return _x[:, n*dim:(n+1)*dim]
171 | 
172 |     def _step(x_, h_, c_):
173 |         preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
174 |         preact += x_
175 |         preact += tparams[_p(prefix, 'b')]
176 | 
177 |         i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
178 |         f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
179 |         o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
180 |         c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
181 | 
182 |         c = f * c_ + i * c
183 |         #c = m_[:, None] * c + (1. - m_)[:, None] * c_
184 | 
185 |         h = o * tensor.tanh(c)
186 |         #TODO: I think this don't apply since is made to avoid sequences smaller tan max_len
187 |         #h = m_[:, None] * h + (1. - m_)[:, None] * h_
188 | 
189 |         return h, c
190 | 
191 |     state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
192 |                    tparams[_p(prefix, 'b')])
193 | 
194 |     dim_proj = options['dim_proj']
195 |     rval, updates = theano.scan(_step,
196 |                                 sequences=[state_below],
197 |                                 outputs_info=[tensor.alloc(0., n_samples,
198 |                                                            dim_proj),
199 |                                               tensor.alloc(0., n_samples,
200 |                                                            dim_proj)],
201 |                                 name=_p(prefix, '_layers'),
202 |                                 n_steps=nsteps)
203 |     return rval[0]
204 | 
205 | 
206 | # ff: Feed Forward (normal neural net), only useful to put after lstm
207 | #     before the classifier.
208 | layers = {'lstm': (param_init_lstm, lstm_layer)}
209 | 
210 | def mom_sgd(lr, tparams, grads, x, y, cost):
211 |     """ Momentum Stochastic Gradient Descent
212 | 
213 |     :note: A more complicated version of sgd then needed.  This is
214 |         done like that for adadelta and rmsprop.
215 | 
216 |     """
217 | 
218 |     updates = OrderedDict()
219 | 
220 |     mom = tensor.scalar(name='mom')
221 |     gmomshared = [theano.shared(p.get_value(), name='%s_mom_grad' %k)
222 |         for k,p in tparams.iteritems()]
223 | 
224 |     # New set of shared variable that will contain the gradient
225 |     # for a mini-batch.
226 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
227 |                for k, p in tparams.iteritems()]
228 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
229 | 
230 |     # Function that computes gradients for a mini-batch, but do not
231 |     # updates the weights.
232 |     f_grad_shared = theano.function([x, y], cost, updates=gsup,
233 |                                     name='sgd_f_grad_shared')
234 | 
235 |     for gm,gp in zip(gmomshared,gshared):
236 |         updates[gm] = mom*gm - (1.0 - mom) * lr * gp
237 |     #gmomup = [(gm, mom*gm - (1.0 - mom) * lr * gp) for gm,gp in
238 |     #    zip(gmomshared, gshared)]
239 |     
240 |     #pup = [(p, p + gm) for p, gm in zip(tparams.values(), gmomup)]
241 |     for p,gm in zip(tparams.values(), gmomshared):
242 |         updates[p] = p + updates[gm]
243 | 
244 |     # Function that updates the weights from the previously computed
245 |     # gradient.
246 |     f_update = theano.function([lr,mom], [], updates=updates,
247 |                                name='sgd_f_update')
248 | 
249 |     return f_grad_shared, f_update
250 | 
251 | 
252 | def sgd(lr, tparams, grads, x, y, cost):
253 |     """ Stochastic Gradient Descent
254 | 
255 |     :note: A more complicated version of sgd then needed.  This is
256 |         done like that for adadelta and rmsprop.
257 | 
258 |     """
259 |     # New set of shared variable that will contain the gradient
260 |     # for a mini-batch.
261 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
262 |                for k, p in tparams.iteritems()]
263 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
264 | 
265 |     # Function that computes gradients for a mini-batch, but do not
266 |     # updates the weights.
267 |     f_grad_shared = theano.function([x, y], cost, updates=gsup,
268 |                                     name='sgd_f_grad_shared')
269 | 
270 |     pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
271 | 
272 |     # Function that updates the weights from the previously computed
273 |     # gradient.
274 |     f_update = theano.function([lr], [], updates=pup,
275 |                                name='sgd_f_update')
276 | 
277 |     return f_grad_shared, f_update
278 | 
279 | 
280 | def adadelta(lr, tparams, grads, x, y, cost):
281 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
282 |                                   name='%s_grad' % k)
283 |                     for k, p in tparams.iteritems()]
284 |     running_up2 = [theano.shared(p.get_value() * numpy.float32(0.),
285 |                                  name='%s_rup2' % k)
286 |                    for k, p in tparams.iteritems()]
287 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
288 |                                     name='%s_rgrad2' % k)
289 |                       for k, p in tparams.iteritems()]
290 | 
291 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
292 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
293 |              for rg2, g in zip(running_grads2, grads)]
294 | 
295 |     f_grad_shared = theano.function([x, y], cost, updates=zgup+rg2up,
296 |                                     name='adadelta_f_grad_shared')
297 | 
298 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
299 |              for zg, ru2, rg2 in zip(zipped_grads,
300 |                                      running_up2,
301 |                                      running_grads2)]
302 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
303 |              for ru2, ud in zip(running_up2, updir)]
304 |     param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
305 | 
306 |     f_update = theano.function([lr], [], updates=ru2up+param_up,
307 |                                on_unused_input='ignore',
308 |                                name='adadelta_f_update',
309 |                                mode='DebugMode')
310 | 
311 |     return f_grad_shared, f_update
312 | 
313 | 
314 | def rmsprop(lr, tparams, grads, x, y, cost):
315 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
316 |                                   name='%s_grad' % k)
317 |                     for k, p in tparams.iteritems()]
318 |     running_grads = [theano.shared(p.get_value() * numpy.float32(0.),
319 |                                    name='%s_rgrad' % k)
320 |                      for k, p in tparams.iteritems()]
321 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
322 |                                     name='%s_rgrad2' % k)
323 |                       for k, p in tparams.iteritems()]
324 | 
325 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
326 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
327 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
328 |              for rg2, g in zip(running_grads2, grads)]
329 | 
330 |     f_grad_shared = theano.function([x, y], cost,
331 |                                     updates=zgup + rgup + rg2up,
332 |                                     name='rmsprop_f_grad_shared')
333 | 
334 |     updir = [theano.shared(p.get_value() * numpy.float32(0.),
335 |                            name='%s_updir' % k)
336 |              for k, p in tparams.iteritems()]
337 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
338 |                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
339 |                                             running_grads2)]
340 |     param_up = [(p, p + udn[1])
341 |                 for p, udn in zip(tparams.values(), updir_new)]
342 |     f_update = theano.function([lr], [], updates=updir_new+param_up,
343 |                                on_unused_input='ignore',
344 |                                name='rmsprop_f_update')
345 | 
346 |     return f_grad_shared, f_update
347 | 
348 | 
349 | def build_model(tparams, options):
350 |     trng = RandomStreams(1234)
351 | 
352 |     # Used for dropout.
353 |     use_noise = theano.shared(numpy.float32(0.))
354 | 
355 |     x = tensor.tensor3('x', dtype='float32')
356 |     #mask = tensor.matrix('mask', dtype='float32')
357 |     y = tensor.vector('y', dtype='float32')
358 | 
359 |     n_timesteps = x.shape[0]
360 |     n_samples = x.shape[1]
361 |     n_dim = x.shape[2]
362 | 
363 |     emb = tensor.dot(x,tparams['Wemb'])
364 |     #emb = tensor.nnet.sigmoid(emb)
365 |     #emb = ReLU(emb)
366 | 
367 |     if options['use_dropout']:
368 |         emb = dropout_layer(emb, use_noise, trng)
369 | 
370 |     proj = get_layer(options['encoder'])[1](tparams, emb, options,
371 |                                             prefix=options['encoder']
372 |                                             )
373 |     
374 | 
375 |     if options['encoder'] == 'lstm' and options['sum_pool'] == True:
376 |         proj = proj.sum(axis=0)
377 |         proj = proj / options['n_iter'] 
378 |     else:
379 |         proj = proj[-1]
380 |     #if options['use_dropout']:
381 |     #    proj = dropout_layer(proj, use_noise, trng)
382 | 
383 |     #pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U'])+tparams['b'])
384 |     #pred = tensor.nnet.sigmoid(tensor.dot(proj, tparams['U'])\
385 |     #        + tparams['b'])
386 |     pred = tensor.dot(proj, tparams['U']) + tparams['b']
387 | 
388 |     f_pred_prob = theano.function([x], pred, name='f_pred_prob')
389 |     #f_pred = theano.function(x, pred.argmax(axis=1), name='f_pred')
390 | 
391 |     cost = tensor.mean((y-pred.T)**2)
392 | 
393 |     #cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean()
394 | 
395 |     return use_noise, x, y, f_pred_prob, cost
396 | 
397 | 
398 | def pred_probs(f_pred, prepare_data, data, model_options, verbose=False):
399 |     """ If you want to use a trained model, this is useful to compute
400 |     the probabilities of new examples.
401 |     """
402 |     n_samples = len(data)
403 | 
404 |     x,y = prepare_data(data, numpy.array([]), 
405 |                                 model_options['n_iter'], model_options['n_input'])
406 |     pred = f_pred(x) 
407 | 
408 |     return pred
409 | 
410 | def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False):
411 |     """
412 |     Just compute the error
413 |     f_pred: Theano fct computing the prediction
414 |     prepare_data: usual prepare_data for that dataset.
415 |     """
416 |     valid_err = 0
417 |     for _, valid_index in iterator:
418 |         # TODO: This is not very efficient I should check
419 |         x,  y = prepare_data([data[0][t] for t in valid_index],
420 |                                   numpy.array(data[1])[valid_index],
421 |                                   model_options['n_iter'],model_options['n_input'])
422 | 
423 | 
424 |         preds = f_pred(x)
425 |         targets = numpy.array(data[1])[valid_index]
426 |         # or tensor.sum
427 |         valid_err += ((targets-preds.T)**2).sum()
428 |     #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
429 |     valid_err = numpy_floatX(valid_err) / len(data[0])    
430 | 
431 |     return valid_err
432 | 
433 | 
434 | def R_score(f_pred, prepare_data, data, iterator, model_options, verbose=False):
435 |     """
436 |     Compute R score
437 |     f_pred: Theano fct computing the prediction
438 |     prepare_data: usual prepare_data for that dataset.
439 |     """
440 |     valid_err = 0
441 |     denom = 0
442 |     data_mean = numpy.array(data[1]).mean()
443 |     for _, valid_index in iterator:
444 |         # TODO: This is not very efficient I should check
445 |         x,  y = prepare_data([data[0][t] for t in valid_index],
446 |                                   numpy.array(data[1])[valid_index],
447 |                                   model_options['n_iter'],model_options['n_input'])
448 | 
449 | 
450 |         preds = f_pred(x)
451 |         targets = numpy.array(data[1])[valid_index]
452 |         valid_err += tensor.sum((targets-preds.T)**2)
453 |         denom += ((numpy.array(data[1]) - data_mean)**2).sum()
454 |     #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
455 |     valid_err = 1. - (valid_err / denom) 
456 | 
457 |     return valid_err.eval()
458 | 
459 | 
460 | def backforecast(f_pred, data, model_options):
461 |     """
462 |     Compute the amount of times in which
463 |     the RNN correctly predict a up or 
464 |     down trend
465 |     """
466 |     # TODO: Use the prepare data
467 |     x, y = prepare_data(data[0], data[1], model_options['n_iter'],
468 |         model_options['n_input'])
469 | 
470 |     targets =  (y > x[-1,:,0])
471 |     preds = f_pred(numpy.asarray(x,dtype='float32'))
472 |     preds_up = (preds[:,0] > x[-1,:,0])
473 |     err = (targets <> preds_up).sum()
474 | 
475 |     ret = float(err) / float(len(data[0]))
476 |     return ret
477 | 
478 | def predict_lstm(input, model_options):
479 |     
480 |     params = init_params(model_options)
481 | 
482 |     load_params(model_options['saveto'], params)
483 | 
484 |     # This create Theano Shared Variable from the parameters.
485 |     # Dict name (string) -> Theano Tensor Shared Variable
486 |     # params and tparams have different copy of the weights.
487 |     tparams = init_tparams(params)
488 | 
489 |     # use_noise is for dropout
490 |     (use_noise, x,
491 |      y, f_pred_prob, cost) = build_model(tparams, model_options)
492 | 
493 |     preds = pred_probs(f_pred_prob, prepare_data, input, model_options)
494 | 
495 |     return preds
496 | 
497 | 
498 | def train_lstm(
499 |     #dim_proj=32,  # word embeding dimension and LSTM number of hidden units.
500 |     dim_proj=124,  # word embeding dimension and LSTM number of hidden units.
501 |     patience=10,  # Number of epoch to wait before early stop if no progress
502 |     max_epochs=150,  # The maximum number of epoch to run
503 |     dispFreq=40,  # Display to stdout the training progress every N updates
504 |     decay_c=0.,  # Weight decay for the classifier applied to the U weights.
505 |     lrate=0.1,  # Learning rate for sgd (not used for adadelta and rmsprop)
506 |     n_input = 4,  # Vocabulary size
507 |     optimizer=mom_sgd,  # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
508 |     encoder='lstm',  # TODO: can be removed must be lstm.
509 |     tick='hour',
510 |     validFreq=5,  # Compute the validation error after this number of update.
511 |     saveFreq=5,  # Save the parameters after every saveFreq updates
512 |     maxlen=100,  # Sequence longer then this get ignored
513 |     batch_size=50,  # The batch size during training.
514 |     valid_batch_size=50,  # The batch size used for validation/test set.
515 |     exchange='AUDJPY',
516 | 
517 |     # Parameter for extra option
518 |     noise_std=0.,
519 |     use_dropout=False,  # if False slightly faster, but worst test error
520 |                        # This frequently need a bigger model.
521 |     reload_model="",  # Path to a saved model we want to start from.
522 |     sum_pool = False,
523 |     mom_start = 0.5,
524 |     mom_end = 0.99,
525 |     mom_epoch_interval = 60,
526 |     learning_rate_decay=0.99995,
527 |     #learning_rate_decay=0.98,
528 |     predict=False,
529 |     input_pred=None
530 | ):
531 | 
532 |     '''
533 |         Main function for LSTM training
534 |     '''
535 |     model_path = "/user/j/jgpavez/rnn_trading/models/"
536 |     data_path = "/user/j/jgpavez/rnn_trading/data/"
537 |     log_path = "/user/j/jgpavez/rnn_trading/logs/"
538 | 
539 | 
540 |     saveto = exchange + '_model.npz'
541 |     params_file = exchange + '_params.npz'
542 |     dataset = exchange + '_{0}.csv'.format(tick)
543 | 
544 |     saveto = os.path.join(model_path, saveto)
545 |     params_file = os.path.join(data_path, params_file)
546 | 
547 |     ydim = 1
548 |     #n_iter = 10
549 |     n_iter = 24
550 |     # Model options
551 |     model_options = locals().copy()
552 | 
553 |     if predict == True:
554 |         return predict_lstm(input_pred, model_options)
555 | 
556 | 
557 |     print "model options", model_options
558 | 
559 |     print 'Loading data'
560 |     train, valid, test, mean, std = read_data(max_len=n_iter, path=dataset, params_file=params_file,min=(tick=='minute'))
561 | 
562 |     #YDIM??
563 |     #number of labels (output)
564 | 
565 |     theano.config.optimizer = 'None'
566 | 
567 |     print 'Building model'
568 |     # This create the initial parameters as numpy ndarrays.
569 |     # Dict name (string) -> numpy ndarray
570 |     params = init_params(model_options)
571 | 
572 |     if reload_model:
573 |         load_params(saveto, params)
574 | 
575 |     # This create Theano Shared Variable from the parameters.
576 |     # Dict name (string) -> Theano Tensor Shared Variable
577 |     # params and tparams have different copy of the weights.
578 |     tparams = init_tparams(params)
579 | 
580 |     # use_noise is for dropout
581 |     (use_noise, x,
582 |      y, f_pred_prob, cost) = build_model(tparams, model_options)
583 | 
584 |     if decay_c > 0.:
585 |         decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
586 |         weight_decay = 0.
587 |         weight_decay += (tparams['U']**2).sum()
588 |         weight_decay *= decay_c
589 |         cost += weight_decay
590 | 
591 |     f_cost = theano.function([x, y], cost, name='f_cost')
592 | 
593 |     grads = tensor.grad(cost, wrt=tparams.values())
594 |     f_grad = theano.function([x, y], grads, name='f_grad')
595 | 
596 |     lr = tensor.scalar(name='lr')
597 |     f_grad_shared, f_update = optimizer(lr, tparams, grads,
598 |                              x, y, cost)
599 | 
600 |     print 'Optimization'
601 | 
602 | 
603 |     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size,
604 |                                    shuffle=True)
605 |     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size,
606 |                                   shuffle=True)
607 | 
608 |     print "%d train examples" % len(train[0])
609 |     print "%d valid examples" % len(valid[0])
610 |     print "%d test examples" % len(test[0])
611 |     history_errs = []
612 |     best_p = None
613 |     bad_count = 0
614 | 
615 |     if validFreq == -1:
616 |         validFreq = len(train[0])/batch_size
617 |     if saveFreq == -1:
618 |         saveFreq = len(train[0])/batch_size
619 | 
620 |     uidx = 0  # the number of update done
621 |     estop = False  # early stop
622 |     start_time = time.clock()
623 |     mom = 0
624 | 
625 |     try:
626 |         for eidx in xrange(max_epochs):
627 |             n_samples = 0
628 | 
629 |             # Get new shuffled index for the training set.
630 |             kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
631 | 
632 |             if eidx < model_options['mom_epoch_interval']:
633 |                 mom = model_options['mom_start']*\
634 |                 (1.0 - eidx/model_options['mom_epoch_interval'])\
635 |                   + mom_end*(eidx/model_options['mom_epoch_interval'])
636 |             else:
637 |                 mom = mom_end
638 | 
639 |             for _, train_index in kf:
640 |                 uidx += 1
641 |                 use_noise.set_value(1.)
642 | 
643 |                 # Select the random examples for this minibatch
644 |                 y = [train[1][t] for t in train_index]
645 |                 x = [train[0][t]for t in train_index]
646 | 
647 |                 # Get the data in numpy.ndarray formet.
648 |                 # It return something of the shape (minibatch maxlen, n samples)
649 |                 x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input'])
650 | 
651 |                 if x is None:
652 |                     print 'Minibatch with zero sample under length ', maxlen
653 |                     continue
654 |                 n_samples += x.shape[1]
655 |                 cost = f_grad_shared(x, y)
656 |                 f_update(lrate,mom)
657 | 
658 |                 #decay
659 |                 #TODO: CHECK THIS LEARNING RATE
660 |                 #lrate = learning_rate_decay*lrate
661 | 
662 |                 if numpy.isnan(cost) or numpy.isinf(cost):
663 |                     print 'NaN detected'
664 |                     return 1., 1., 1.
665 | 
666 |                 if numpy.mod(uidx, dispFreq) == 0:
667 |                     with open(log_path + 'log_{0}_{0}.log'.format(dim_proj, n_iter), 'a') as log_file:
668 |                         log_file.write('Epoch {0} Update {1} Cost {2}\n'.format(eidx, uidx, cost))
669 | 
670 |             #decay
671 |             #TODO: CHECK THIS LEARNING RATE
672 |             lrate = learning_rate_decay*lrate
673 |             if numpy.mod(eidx, validFreq) == 0:
674 |                 use_noise.set_value(0.)
675 |                 #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
676 |                 valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
677 |                 test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)
678 |                 #bckfr_err = backforecast(f_pred_prob, test, model_options)
679 |                 #r2_score = R_score(f_pred_prob, prepare_data, test, kf_test, model_options)
680 |                 bckfr_err = 0.
681 |                 r2_score = 0.
682 | 
683 |                 #history_errs.append([valid_err, test_err])
684 |                 history_errs.append([valid_err, bckfr_err])
685 | 
686 |                 if (eidx == 0 or
687 |                     test_err <= numpy.array(history_errs)[:,
688 |                                                            1].min()):
689 |                     #bckfr_err <= numpy.array(history_errs)[:,
690 |                     #                                       1].min()):
691 | 
692 |                     best_p = unzip(tparams)
693 |                     bad_counter = 0
694 | 
695 |                 with open(log_path + 'log_{0}_{0}.log'.format(dim_proj, n_iter), 'a') as log_file:
696 |                     log_file.write('Valid {0} Test {1}\n'.format(valid_err,test_err))
697 |                 print('Valid',valid_err,
698 |                        'Test ', test_err, 
699 |                        'Backfore ', bckfr_err,
700 |                        'R2 score ', r2_score)
701 | 
702 |                 if (len(history_errs) > patience and
703 |                     valid_err >= numpy.array(history_errs)[:-patience,
704 |                                                            0].min()):
705 |                     bad_counter += 1
706 |                     if bad_counter > patience:
707 |                         print 'Early Stop!'
708 |                         estop = True
709 |                         break
710 | 
711 | 
712 |             if numpy.mod(eidx, saveFreq) == 0:
713 |                 print 'Saving...',
714 | 
715 |                 if best_p is not None:
716 |                     params = best_p
717 |                 else:
718 |                     params = unzip(tparams)
719 |                 numpy.savez(saveto, history_errs=history_errs, **params)
720 |                 pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
721 |                 print 'Done'
722 | 
723 | 
724 |             print 'Seen %d samples' % n_samples
725 | 
726 |             if estop:
727 |                 break
728 | 
729 |     except KeyboardInterrupt:
730 |         print "Training interupted"
731 | 
732 |     end_time = time.clock()
733 |     if best_p is not None:
734 |         zipp(best_p, tparams)
735 |     else:
736 |         best_p = unzip(tparams)
737 | 
738 |     use_noise.set_value(0.)
739 |     #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
740 |     valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
741 |     test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)
742 |     #bckfr_err = backforecast(f_pred_prob, test, model_options)
743 |     #r2_score = R_score(f_pred_prob, prepare_data, test, kf_test, model_options)
744 |     r2_score= 0.
745 |     bckfr_err = 0.
746 | 
747 |     print 'Valid ', valid_err, 'Test ', test_err, 'Backforecasting ', bckfr_err, ' R2 score: ', r2_score
748 | 
749 |     numpy.savez(saveto, train_err=train_err,
750 |                 valid_err=valid_err, test_err=test_err,
751 |                 history_errs=history_errs, **best_p)
752 |     print 'The code run for %d epochs, with %f sec/epochs' % (
753 |         (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
754 |     print >> sys.stderr, ('Training took %.1fs' %
755 |                           (end_time - start_time))
756 |     return train_err, valid_err, test_err
757 | 
758 | 
759 | if __name__ == '__main__':
760 | 
761 |     # We must have floatX=float32 for this tutorial to work correctly.
762 |     theano.config.floatX = "float32"
763 |     # The next line is the new Theano default. This is a speed up.
764 |     #theano.config.scan.allow_gc = False
765 |     exchange = 'AUDJPY'
766 |     tick = 'hour'
767 |     if len(sys.argv) >= 2:
768 |         exchange = sys.argv[1]
769 |     if len(sys.argv) >= 3:
770 |         tick = sys.argv[2]
771 | 
772 |     # See function train for all possible parameter and there definition.
773 |     train_lstm(
774 |         #reload_model="lstm_model.npz",
775 |         exchange=exchange,
776 |         max_epochs=20,
777 |         #max_epochs=162,
778 |         tick=tick
779 |     )
780 | 
781 | 


--------------------------------------------------------------------------------
/lstm_ts_2.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | LSTM RNN for stock predictions
  3 | Based on sentiment analysis lstm found in deeplearning tutorials
  4 | '''
  5 | from collections import OrderedDict
  6 | import copy
  7 | import cPickle as pkl
  8 | import random
  9 | import sys
 10 | import time
 11 | import pdb
 12 | 
 13 | import numpy
 14 | import theano
 15 | import theano.tensor as tensor
 16 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 17 | from theano.ifelse import ifelse
 18 | 
 19 | 
 20 | from quant import read_data, prepare_data
 21 | 
 22 | #### rectified linear unit
 23 | def ReLU(x):
 24 |     y = tensor.maximum(0.0, x)
 25 |     return(y)
 26 | 
 27 | def get_minibatches_idx(n, minibatch_size, shuffle=False):
 28 |     """
 29 |     Used to shuffle the dataset at each iteration.
 30 |     """
 31 | 
 32 |     idx_list = numpy.arange(n, dtype="int32")
 33 | 
 34 |     if shuffle:
 35 |         random.shuffle(idx_list)
 36 | 
 37 |     minibatches = []
 38 |     minibatch_start = 0
 39 |     for i in range(n // minibatch_size):
 40 |         minibatches.append(idx_list[minibatch_start:
 41 |                                     minibatch_start + minibatch_size])
 42 |         minibatch_start += minibatch_size
 43 | 
 44 |     if (minibatch_start != n):
 45 |         # Make a minibatch out of what is left
 46 |         minibatches.append(idx_list[minibatch_start:])
 47 | 
 48 |     return zip(range(len(minibatches)), minibatches)
 49 | 
 50 | def zipp(params, tparams):
 51 |     """
 52 |     When we reload the model. Needed for the GPU stuff.
 53 |     """
 54 |     for kk, vv in params.iteritems():
 55 |         tparams[kk].set_value(vv)
 56 | 
 57 | 
 58 | def unzip(zipped):
 59 |     """
 60 |     When we pickle the model. Needed for the GPU stuff.
 61 |     """
 62 |     new_params = OrderedDict()
 63 |     for kk, vv in zipped.iteritems():
 64 |         new_params[kk] = vv.get_value()
 65 |     return new_params
 66 | 
 67 | 
 68 | def dropout_layer(state_before, use_noise, trng):
 69 |     proj = tensor.switch(use_noise,
 70 |                          (state_before *
 71 |                           trng.binomial(state_before.shape,
 72 |                                         p=0.5, n=1,
 73 |                                         dtype=state_before.dtype)),
 74 |                          state_before * 0.5)
 75 |     return proj
 76 | 
 77 | 
 78 | def _p(pp, name):
 79 |     return '%s_%s' % (pp, name)
 80 | 
 81 | 
 82 | def init_params(options):
 83 |     """
 84 |     Global (not LSTM) parameter. For the embeding and the classifier.
 85 |     """
 86 |     params = OrderedDict()
 87 |     # embedding
 88 |     randn = numpy.random.rand(options['n_input'],
 89 |                               options['dim_proj'])
 90 |     params['Wemb'] = (0.01 * randn).astype('float32')
 91 |     params = get_layer(options['encoder'])[0](options,
 92 |                                               params,
 93 |                                               prefix=options['encoder'])
 94 |     # classifier
 95 |     params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
 96 |                                             options['ydim']).astype('float32')
 97 |     params['b'] = numpy.zeros((options['ydim'],)).astype('float32')
 98 | 
 99 |     return params
100 | 
101 | 
102 | def load_params(path, params):
103 |     pp = numpy.load(path)
104 |     for kk, vv in params.iteritems():
105 |         if kk not in pp:
106 |             raise Warning('%s is not in the archive' % kk)
107 |         params[kk] = pp[kk]
108 | 
109 |     return params
110 | 
111 | 
112 | def init_tparams(params):
113 |     tparams = OrderedDict()
114 |     for kk, pp in params.iteritems():
115 |         tparams[kk] = theano.shared(params[kk], name=kk)
116 |     return tparams
117 | 
118 | 
119 | def get_layer(name):
120 |     fns = layers[name]
121 |     return fns
122 | 
123 | 
124 | def ortho_weight(ndim):
125 |     W = numpy.random.randn(ndim, ndim)
126 |     u, s, v = numpy.linalg.svd(W)
127 |     return u.astype('float32')
128 | 
129 | 
130 | def param_init_lstm(options, params, prefix='lstm'):
131 |     """
132 |     Init the LSTM parameter:
133 | 
134 |     :see: init_params
135 |     """
136 |     W = numpy.concatenate([ortho_weight(options['dim_proj']),
137 |                            ortho_weight(options['dim_proj']),
138 |                            ortho_weight(options['dim_proj']),
139 |                            ortho_weight(options['dim_proj'])], axis=1)
140 |     params[_p(prefix, 'W')] = W.astype('float32')
141 |     U = numpy.concatenate([ortho_weight(options['dim_proj']),
142 |                            ortho_weight(options['dim_proj']),
143 |                            ortho_weight(options['dim_proj']),
144 |                            ortho_weight(options['dim_proj'])], axis=1)
145 |     params[_p(prefix, 'U')] = U.astype('float32')
146 |     b = numpy.zeros((4 * options['dim_proj'],))
147 |     params[_p(prefix, 'b')] = b.astype('float32')
148 | 
149 |     return params
150 | 
151 | 
152 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
153 |     nsteps = state_below.shape[0]
154 |     if state_below.ndim == 3:
155 |         n_samples = state_below.shape[1]
156 |     else:
157 |         n_samples = 1
158 | 
159 |     #assert mask is not None
160 | 
161 |     def _slice(_x, n, dim):
162 |         if _x.ndim == 3:
163 |             return _x[:, :, n*dim:(n+1)*dim]
164 |         return _x[:, n*dim:(n+1)*dim]
165 | 
166 |     def _step(x_, h_, c_):
167 |         preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
168 |         preact += x_
169 |         preact += tparams[_p(prefix, 'b')]
170 | 
171 |         i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
172 |         f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
173 |         o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
174 |         c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
175 | 
176 |         c = f * c_ + i * c
177 |         #c = m_[:, None] * c + (1. - m_)[:, None] * c_
178 | 
179 |         h = o * tensor.tanh(c)
180 |         #TODO: I think this don't apply since is made to avoid sequences smaller tan max_len
181 |         #h = m_[:, None] * h + (1. - m_)[:, None] * h_
182 | 
183 |         return h, c
184 | 
185 |     state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
186 |                    tparams[_p(prefix, 'b')])
187 | 
188 |     dim_proj = options['dim_proj']
189 |     rval, updates = theano.scan(_step,
190 |                                 sequences=[state_below],
191 |                                 outputs_info=[tensor.alloc(0., n_samples,
192 |                                                            dim_proj),
193 |                                               tensor.alloc(0., n_samples,
194 |                                                            dim_proj)],
195 |                                 name=_p(prefix, '_layers'),
196 |                                 n_steps=nsteps)
197 |     return rval[0]
198 | 
199 | 
200 | # ff: Feed Forward (normal neural net), only useful to put after lstm
201 | #     before the classifier.
202 | layers = {'lstm': (param_init_lstm, lstm_layer)}
203 | 
204 | def mom_sgd(lr, tparams, grads, x, y, cost):
205 |     """ Stochastic Gradient Descent
206 | 
207 |     :note: A more complicated version of sgd then needed.  This is
208 |         done like that for adadelta and rmsprop.
209 | 
210 |     """
211 | 
212 |     updates = OrderedDict()
213 | 
214 |     mom = tensor.scalar(name='mom')
215 |     gmomshared = [theano.shared(p.get_value(), name='%s_mom_grad' %k)
216 |         for k,p in tparams.iteritems()]
217 | 
218 |     # New set of shared variable that will contain the gradient
219 |     # for a mini-batch.
220 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
221 |                for k, p in tparams.iteritems()]
222 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
223 | 
224 |     # Function that computes gradients for a mini-batch, but do not
225 |     # updates the weights.
226 |     f_grad_shared = theano.function([x, y], cost, updates=gsup,
227 |                                     name='sgd_f_grad_shared')
228 | 
229 |     for gm,gp in zip(gmomshared,gshared):
230 |         updates[gm] = mom*gm - (1.0 - mom) * lr * gp
231 |     #gmomup = [(gm, mom*gm - (1.0 - mom) * lr * gp) for gm,gp in
232 |     #    zip(gmomshared, gshared)]
233 |     
234 |     #pup = [(p, p + gm) for p, gm in zip(tparams.values(), gmomup)]
235 |     for p,gm in zip(tparams.values(), gmomshared):
236 |         updates[p] = p + updates[gm]
237 | 
238 |     # Function that updates the weights from the previously computed
239 |     # gradient.
240 |     f_update = theano.function([lr,mom], [], updates=updates,
241 |                                name='sgd_f_update')
242 | 
243 |     return f_grad_shared, f_update
244 | 
245 | 
246 | def sgd(lr, tparams, grads, x, y, cost):
247 |     """ Stochastic Gradient Descent
248 | 
249 |     :note: A more complicated version of sgd then needed.  This is
250 |         done like that for adadelta and rmsprop.
251 | 
252 |     """
253 |     # New set of shared variable that will contain the gradient
254 |     # for a mini-batch.
255 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
256 |                for k, p in tparams.iteritems()]
257 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
258 | 
259 |     # Function that computes gradients for a mini-batch, but do not
260 |     # updates the weights.
261 |     f_grad_shared = theano.function([x, y], cost, updates=gsup,
262 |                                     name='sgd_f_grad_shared')
263 | 
264 |     pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
265 | 
266 |     # Function that updates the weights from the previously computed
267 |     # gradient.
268 |     f_update = theano.function([lr], [], updates=pup,
269 |                                name='sgd_f_update')
270 | 
271 |     return f_grad_shared, f_update
272 | 
273 | 
274 | def adadelta(lr, tparams, grads, x, y, cost):
275 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
276 |                                   name='%s_grad' % k)
277 |                     for k, p in tparams.iteritems()]
278 |     running_up2 = [theano.shared(p.get_value() * numpy.float32(0.),
279 |                                  name='%s_rup2' % k)
280 |                    for k, p in tparams.iteritems()]
281 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
282 |                                     name='%s_rgrad2' % k)
283 |                       for k, p in tparams.iteritems()]
284 | 
285 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
286 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
287 |              for rg2, g in zip(running_grads2, grads)]
288 | 
289 |     f_grad_shared = theano.function([x, y], cost, updates=zgup+rg2up,
290 |                                     name='adadelta_f_grad_shared')
291 | 
292 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
293 |              for zg, ru2, rg2 in zip(zipped_grads,
294 |                                      running_up2,
295 |                                      running_grads2)]
296 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
297 |              for ru2, ud in zip(running_up2, updir)]
298 |     param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
299 | 
300 |     f_update = theano.function([lr], [], updates=ru2up+param_up,
301 |                                on_unused_input='ignore',
302 |                                name='adadelta_f_update',
303 |                                mode='DebugMode')
304 | 
305 |     return f_grad_shared, f_update
306 | 
307 | 
308 | def rmsprop(lr, tparams, grads, x, y, cost):
309 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
310 |                                   name='%s_grad' % k)
311 |                     for k, p in tparams.iteritems()]
312 |     running_grads = [theano.shared(p.get_value() * numpy.float32(0.),
313 |                                    name='%s_rgrad' % k)
314 |                      for k, p in tparams.iteritems()]
315 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
316 |                                     name='%s_rgrad2' % k)
317 |                       for k, p in tparams.iteritems()]
318 | 
319 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
320 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
321 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
322 |              for rg2, g in zip(running_grads2, grads)]
323 | 
324 |     f_grad_shared = theano.function([x, y], cost,
325 |                                     updates=zgup + rgup + rg2up,
326 |                                     name='rmsprop_f_grad_shared')
327 | 
328 |     updir = [theano.shared(p.get_value() * numpy.float32(0.),
329 |                            name='%s_updir' % k)
330 |              for k, p in tparams.iteritems()]
331 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
332 |                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
333 |                                             running_grads2)]
334 |     param_up = [(p, p + udn[1])
335 |                 for p, udn in zip(tparams.values(), updir_new)]
336 |     f_update = theano.function([lr], [], updates=updir_new+param_up,
337 |                                on_unused_input='ignore',
338 |                                name='rmsprop_f_update')
339 | 
340 |     return f_grad_shared, f_update
341 | 
342 | 
343 | def build_model(tparams, options):
344 |     trng = RandomStreams(1234)
345 | 
346 |     # Used for dropout.
347 |     use_noise = theano.shared(numpy.float32(0.))
348 | 
349 |     x = tensor.tensor3('x', dtype='float32')
350 |     #mask = tensor.matrix('mask', dtype='float32')
351 |     y = tensor.vector('y', dtype='int64')
352 | 
353 |     n_timesteps = x.shape[0]
354 |     n_samples = x.shape[1]
355 |     n_dim = x.shape[2]
356 | 
357 |     emb = tensor.dot(x,tparams['Wemb'])
358 |     #emb = tensor.nnet.sigmoid(emb)
359 |     #emb = ReLU(emb)
360 | 
361 |     if options['use_dropout']:
362 |         emb = dropout_layer(emb, use_noise, trng)
363 | 
364 |     proj = get_layer(options['encoder'])[1](tparams, emb, options,
365 |                                             prefix=options['encoder']
366 |                                             )
367 |     
368 | 
369 |     if options['encoder'] == 'lstm' and options['sum_pool'] == True:
370 |         proj = proj.sum(axis=0)
371 |         proj = proj / options['n_iter'] 
372 |     else:
373 |         proj = proj[-1]
374 |     #if options['use_dropout']:
375 |     #    proj = dropout_layer(proj, use_noise, trng)
376 | 
377 |     #pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U'])+tparams['b'])
378 |     #pred = tensor.nnet.sigmoid(tensor.dot(proj, tparams['U'])\
379 |     #        + tparams['b'])
380 |     pred = tensor.dot(proj, tparams['U']) + tparams['b']
381 |     pred = tensor.nnet.softmax(pred)
382 | 
383 |     f_pred_prob = theano.function([x], pred, name='f_pred_prob')
384 |     #f_pred = theano.function(x, pred.argmax(axis=1), name='f_pred')
385 | 
386 |     #cost = tensor.mean((y-pred.T)**2)
387 | 
388 |     cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean()
389 |     #cost = tensor.mean(tensor.nnet.binary_crossentropy(pred.T, y))
390 | 
391 | 
392 |     return use_noise, x, y, f_pred_prob, cost
393 | 
394 | 
395 | def pred_probs(f_pred_prob, prepare_data, data, iterator, model_options, verbose=False):
396 |     """ If you want to use a trained model, this is useful to compute
397 |     the probabilities of new examples.
398 |     """
399 |     n_samples = len(data[0])
400 |     probs = numpy.zeros((n_samples, 2)).astype('float32')
401 | 
402 |     n_done = 0
403 | 
404 |     for _, valid_index in iterator:
405 |         x,  y = prepare_data([data[0][t] for t in valid_index],
406 |                                   numpy.array(data[1])[valid_index],
407 |                                   model_options['n_iter'],model_options['n_input'],up=True)
408 |         pred_probs = f_pred_prob(x)
409 |         probs[valid_index, :] = pred_probs
410 | 
411 |         n_done += len(valid_index)
412 |         if verbose:
413 |             print '%d/%d samples classified' % (n_done, n_samples)
414 | 
415 |     return probs
416 | 
417 | 
418 | def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False):
419 |     """
420 |     Just compute the error
421 |     f_pred: Theano fct computing the prediction
422 |     prepare_data: usual prepare_data for that dataset.
423 |     """
424 |     valid_err = 0
425 |     for _, valid_index in iterator:
426 |         # TODO: This is not very efficient I should check
427 |         x,  y = prepare_data([data[0][t] for t in valid_index],
428 |                                   numpy.array(data[1])[valid_index],
429 |                                   model_options['n_iter'],model_options['n_input'],up=True)
430 | 
431 | 
432 |         preds_prob = f_pred(x)
433 |         preds = preds_prob.argmax(axis=1)
434 |         targets = numpy.array(data[1])[valid_index]
435 |         valid_err += tensor.sum(tensor.neq(targets,preds))
436 |     #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
437 |     valid_err = float(valid_err.eval())
438 |     return valid_err / float(len(data[0]))
439 | 
440 | 
441 | 
442 | def train_lstm(
443 |     dim_proj=32,  # word embeding dimension and LSTM number of hidden units.
444 |     patience=10,  # Number of epoch to wait before early stop if no progress
445 |     max_epochs=150,  # The maximum number of epoch to run
446 |     dispFreq=10,  # Display to stdout the training progress every N updates
447 |     decay_c=0.,  # Weight decay for the classifier applied to the U weights.
448 |     lrate=0.1,  # Learning rate for sgd (not used for adadelta and rmsprop)
449 |     n_input = 4,  # Vocabulary size
450 |     optimizer=mom_sgd,  # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
451 |     encoder='lstm',  # TODO: can be removed must be lstm.
452 |     saveto='lstm_model.npz',  # The best model will be saved there
453 |     validFreq=170,  # Compute the validation error after this number of update.
454 |     saveFreq=1110,  # Save the parameters after every saveFreq updates
455 |     maxlen=100,  # Sequence longer then this get ignored
456 |     batch_size=16,  # The batch size during training.
457 |     valid_batch_size=64,  # The batch size used for validation/test set.
458 |     dataset='imdb',
459 | 
460 |     # Parameter for extra option
461 |     noise_std=0.,
462 |     use_dropout=False,  # if False slightly faster, but worst test error
463 |                        # This frequently need a bigger model.
464 |     reload_model="",  # Path to a saved model we want to start from.
465 |     sum_pool = False,
466 |     mom_start = 0.5,
467 |     mom_end = 0.99,
468 |     mom_epoch_interval = 300,
469 |     learning_rate_decay=0.99995
470 | 
471 | ):
472 | 
473 |     # Model options
474 |     model_options = locals().copy()
475 |     print "model options", model_options
476 | 
477 |     print 'Loading data'
478 |     ydim = 2
479 |     n_iter = 10
480 | 
481 |     train, valid, test, mean, std = read_data(max_len=n_iter,up=True)
482 | 
483 |     #YDIM??
484 |     #number of labels (output)
485 | 
486 |     model_options['ydim'] = ydim
487 |     model_options['n_iter'] = n_iter
488 | 
489 |     theano.config.optimizer = 'None'
490 | 
491 |     print 'Building model'
492 |     # This create the initial parameters as numpy ndarrays.
493 |     # Dict name (string) -> numpy ndarray
494 |     params = init_params(model_options)
495 | 
496 |     if reload_model:
497 |         load_params('lstm_model.npz', params)
498 | 
499 |     # This create Theano Shared Variable from the parameters.
500 |     # Dict name (string) -> Theano Tensor Shared Variable
501 |     # params and tparams have different copy of the weights.
502 |     tparams = init_tparams(params)
503 | 
504 |     # use_noise is for dropout
505 |     (use_noise, x,
506 |      y, f_pred_prob, cost) = build_model(tparams, model_options)
507 | 
508 |     if decay_c > 0.:
509 |         decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
510 |         weight_decay = 0.
511 |         weight_decay += (tparams['U']**2).sum()
512 |         weight_decay *= decay_c
513 |         cost += weight_decay
514 | 
515 |     f_cost = theano.function([x, y], cost, name='f_cost')
516 | 
517 |     grads = tensor.grad(cost, wrt=tparams.values())
518 |     f_grad = theano.function([x, y], grads, name='f_grad')
519 | 
520 |     lr = tensor.scalar(name='lr')
521 |     f_grad_shared, f_update = optimizer(lr, tparams, grads,
522 |                              x, y, cost)
523 | 
524 |     print 'Optimization'
525 | 
526 | 
527 |     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size,
528 |                                    shuffle=True)
529 |     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size,
530 |                                   shuffle=True)
531 | 
532 |     print "%d train examples" % len(train[0])
533 |     print "%d valid examples" % len(valid[0])
534 |     print "%d test examples" % len(test[0])
535 |     history_errs = []
536 |     best_p = None
537 |     bad_count = 0
538 | 
539 |     if validFreq == -1:
540 |         validFreq = len(train[0])/batch_size
541 |     if saveFreq == -1:
542 |         saveFreq = len(train[0])/batch_size
543 | 
544 |     uidx = 0  # the number of update done
545 |     estop = False  # early stop
546 |     start_time = time.clock()
547 |     mom = 0
548 | 
549 |     try:
550 |         for eidx in xrange(max_epochs):
551 |             n_samples = 0
552 | 
553 |             # Get new shuffled index for the training set.
554 |             kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
555 | 
556 |             for _, train_index in kf:
557 |                 uidx += 1
558 |                 use_noise.set_value(1.)
559 | 
560 |                 # Select the random examples for this minibatch
561 |                 y = [train[1][t] for t in train_index]
562 |                 x = [train[0][t]for t in train_index]
563 | 
564 |                 # Get the data in numpy.ndarray formet.
565 |                 # It return something of the shape (minibatch maxlen, n samples)
566 |                 x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input'],up=True)
567 | 
568 |                 if x is None:
569 |                     print 'Minibatch with zero sample under length ', maxlen
570 |                     continue
571 |                 n_samples += x.shape[1]
572 |                 if eidx < model_options['mom_epoch_interval']:
573 |                     mom = model_options['mom_start']*\
574 |                     (1.0 - eidx/model_options['mom_epoch_interval'])\
575 |                       + mom_end*(eidx/model_options['mom_epoch_interval'])
576 |                 else:
577 |                     mom = mom_end
578 | 
579 |                 cost = f_grad_shared(x, y)
580 |                 f_update(lrate,mom)
581 | 
582 |                 #decay
583 |                 lrate = learning_rate_decay*lrate
584 | 
585 |                 if numpy.isnan(cost) or numpy.isinf(cost):
586 |                     print 'NaN detected'
587 |                     return 1., 1., 1.
588 | 
589 |                 if numpy.mod(uidx, dispFreq) == 0:
590 |                     print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost
591 | 
592 |                 if numpy.mod(uidx, saveFreq) == 0:
593 |                     print 'Saving...',
594 | 
595 |                     if best_p is not None:
596 |                         params = best_p
597 |                     else:
598 |                         params = unzip(tparams)
599 |                     numpy.savez(saveto, history_errs=history_errs, **params)
600 |                     pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
601 |                     print 'Done'
602 | 
603 |                 if numpy.mod(uidx, validFreq) == 0:
604 |                     use_noise.set_value(0.)
605 |                     #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
606 |                     valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
607 |                     test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)
608 | 
609 | 
610 |                     history_errs.append([valid_err, test_err])
611 | 
612 |                     if (uidx == 0 or
613 |                         valid_err <= numpy.array(history_errs)[:,
614 |                                                                0].min()):
615 | 
616 |                         best_p = unzip(tparams)
617 |                         bad_counter = 0
618 | 
619 |                     print ('Valid ', valid_err,
620 |                            'Test ', test_err)
621 | 
622 |                     if (len(history_errs) > patience and
623 |                         valid_err >= numpy.array(history_errs)[:-patience,
624 |                                                                0].min()):
625 |                         bad_counter += 1
626 |                         if bad_counter > patience:
627 |                             print 'Early Stop!'
628 |                             estop = True
629 |                             break
630 | 
631 |             print 'Seen %d samples' % n_samples
632 | 
633 |             if estop:
634 |                 break
635 | 
636 |     except KeyboardInterrupt:
637 |         print "Training interupted"
638 | 
639 |     end_time = time.clock()
640 |     if best_p is not None:
641 |         zipp(best_p, tparams)
642 |     else:
643 |         best_p = unzip(tparams)
644 | 
645 |     use_noise.set_value(0.)
646 |     train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
647 |     valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
648 |     test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)
649 | 
650 |     print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
651 | 
652 |     numpy.savez(saveto, train_err=train_err,
653 |                 valid_err=valid_err, test_err=test_err,
654 |                 history_errs=history_errs, **best_p)
655 |     print 'The code run for %d epochs, with %f sec/epochs' % (
656 |         (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
657 |     print >> sys.stderr, ('Training took %.1fs' %
658 |                           (end_time - start_time))
659 |     return train_err, valid_err, test_err
660 | 
661 | 
662 | if __name__ == '__main__':
663 | 
664 |     # We must have floatX=float32 for this tutorial to work correctly.
665 |     theano.config.floatX = "float32"
666 |     # The next line is the new Theano default. This is a speed up.
667 |     #theano.config.scan.allow_gc = False
668 | 
669 |     # See function train for all possible parameter and there definition.
670 |     train_lstm(
671 |         #reload_model="lstm_model.npz",
672 |         max_epochs=150,
673 |     )
674 | 
675 | 


--------------------------------------------------------------------------------
/quant.py:
--------------------------------------------------------------------------------
 1 | import os 
 2 | import pdb
 3 | import sys
 4 | 
 5 | import numpy
 6 | 
 7 | import theano
 8 | from numpy import genfromtxt
 9 | from pandas import Series
10 | 
11 | #import matplotlib.pyplot as plt
12 | import sklearn.cross_validation as cv
13 | from sklearn import preprocessing
14 | 
15 | def data_preprocessing(data):
16 |     data = data[350:,:]
17 |     # Standarization
18 |     
19 |     # Compute compound return serie
20 |     #data = numpy.log(data / numpy.roll(data, 1, axis = 0))
21 |     #data = data[1:,:]
22 |     #data = data / numpy.roll(data,1,axis=0)
23 |     #data = data[1:,:] - 1.
24 | 
25 |     #print
26 |     #ts = Series(numpy.ravel(data))
27 |     #ts.plot()
28 |     #plt.show()
29 | 
30 |     mean = data.mean(axis=0)
31 |     std = data.std(axis=0)
32 | 
33 |     data = data - mean
34 |     data = data/std
35 |     #Some kind of smoothing??
36 | 
37 |     #min_max = preprocessing.MinMaxScaler()
38 |     #data = min_max.fit_transform(data)
39 | 
40 |     #Put between 1 and 0
41 |     return data,mean,std
42 | 
43 | def read_data(path="table_a.csv", dir="/user/j/jgpavez/rnn_trading/data/",
44 |         max_len=30, valid_portion=0.1, columns=4, up=False ):
45 |     path = os.path.join(dir, path)
46 | 
47 |     data = genfromtxt(path, delimiter=',')
48 | 
49 |     data = data[:,2:(2+columns)]
50 | 
51 |     data,mean,std = data_preprocessing(data)
52 | 
53 |     x_data = numpy.array([data[i:i+max_len,:] for i in xrange(len(data)-max_len)])
54 |     y_data = numpy.array([data[i][0] for i in xrange(max_len , len(data))])
55 | 
56 |     if up is True:
57 |         y_data = y_data > x_data[:,-1,0]
58 |         y_data = numpy.asarray(y_data, dtype='int64')
59 | 
60 |     # split data into training and test
61 |     train_set_x, test_set_x, train_set_y, test_set_y = cv.train_test_split(x_data, 
62 |     y_data, test_size=0.3, random_state=0)
63 | 
64 |     # split training set into validation set
65 |     n_samples = len(train_set_x)
66 |     sidx = numpy.random.permutation(n_samples)
67 |     n_train = int(numpy.round(n_samples * (1. - valid_portion)))
68 |     valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
69 |     valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
70 |     train_set_x = [train_set_x[s] for s in sidx[:n_train]]
71 |     train_set_y = [train_set_y[s] for s in sidx[:n_train]]
72 | 
73 |     train = (train_set_x, train_set_y)
74 |     valid = (valid_set_x, valid_set_y)
75 |     test = (test_set_x, test_set_y)
76 | 
77 |     return train, valid, test, mean, std 
78 | 
79 | def prepare_data(seqs, labels, steps, x_dim, up=False):
80 |     n_samples = len(seqs)
81 |     max_len = steps
82 |     x = numpy.zeros((max_len, n_samples, x_dim)).astype('float32')
83 |     if up is True:
84 |         y = numpy.asarray(labels, dtype='int64')
85 |     else:
86 |         y = numpy.asarray(labels, dtype='float32')
87 | 
88 |     for idx, s in enumerate(seqs):
89 |         x[:,idx,:] = s
90 | 
91 |     return x, y
92 | 
93 | 


--------------------------------------------------------------------------------
/svr_ts.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pdb
 3 | import sys
 4 | 
 5 | import numpy
 6 | from sklearn import svm
 7 | from quant import read_data
 8 | 
 9 | def train_svr(dataset=''):
10 |    train, valid, test, mean, std = read_data(columns=1, max_len=10) 
11 |    x_train = [[x[0] for x in row] for row in train[0]]
12 |    x_test = [[x[0] for x in row] for row in test[0]]
13 | 
14 | 
15 |    svr = svm.SVR()
16 |    svr.fit(x_train,train[1])
17 |     
18 |    pred = svr.predict(x_test)
19 |    y = numpy.asarray(test[1], dtype='float32')
20 |    pred = numpy.asarray(pred, dtype='float32')
21 | 
22 |    #y = y*std + mean
23 |    #pred = pred*std + mean
24 |    mean_y = y.mean()
25 |    ssr = ((y - pred)**2).sum()
26 |    sst = ((y - mean_y)**2).sum()
27 | 
28 |    r2 = 1. - (ssr/sst)
29 |    cost = ((y-pred)**2).mean()
30 |    print 'Cost on Test sample, size: %d, cost: %f, R score: %f'%(len(x_test),cost,r2)
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     train_svr(dataset='table_a.csv')
35 | 


--------------------------------------------------------------------------------