├── README.md ├── bi_lstm_forex.py ├── deep_lstm_forex.py ├── forex.py ├── lstm_forex.py ├── lstm_ts_2.py ├── quant.py └── svr_ts.py /README.md: -------------------------------------------------------------------------------- 1 | # LSTM Forex prediction 2 | A long term short term memory recurrent neural network to predict forex time series 3 | 4 | The model can be trained on daily or minute data of any forex pair. The data can be downloaded 5 | from [here](http://www.histdata.com/download-free-forex-data/). 6 | 7 | The lstm-rnn should learn to predict the next day or minute based on previous data. 8 | 9 | The neural network is implemented on Theano. 10 | 11 | This code is not mantained anymore. 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /bi_lstm_forex.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Bidirection LSTM RNN for forex predictions 3 | Based on sentiment analysis lstm found in deeplearning tutorials 4 | ''' 5 | from collections import OrderedDict 6 | import copy 7 | import cPickle as pkl 8 | import random 9 | import sys 10 | import time 11 | import pdb 12 | import os 13 | 14 | import numpy 15 | import theano 16 | import theano.tensor as tensor 17 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 18 | from theano.ifelse import ifelse 19 | 20 | 21 | from forex import read_data, prepare_data 22 | 23 | #### rectified linear unit 24 | def ReLU(x): 25 | y = tensor.maximum(0.0, x) 26 | return(y) 27 | 28 | def get_minibatches_idx(n, minibatch_size, shuffle=False): 29 | """ 30 | Used to shuffle the dataset at each iteration. 31 | """ 32 | 33 | idx_list = numpy.arange(n, dtype="int32") 34 | 35 | if shuffle: 36 | random.shuffle(idx_list) 37 | 38 | minibatches = [] 39 | minibatch_start = 0 40 | for i in range(n // minibatch_size): 41 | minibatches.append(idx_list[minibatch_start: 42 | minibatch_start + minibatch_size]) 43 | minibatch_start += minibatch_size 44 | 45 | if (minibatch_start != n): 46 | # Make a minibatch out of what is left 47 | minibatches.append(idx_list[minibatch_start:]) 48 | 49 | return zip(range(len(minibatches)), minibatches) 50 | 51 | def zipp(params, tparams): 52 | """ 53 | When we reload the model. Needed for the GPU stuff. 54 | """ 55 | for kk, vv in params.iteritems(): 56 | tparams[kk].set_value(vv) 57 | 58 | 59 | def unzip(zipped): 60 | """ 61 | When we pickle the model. Needed for the GPU stuff. 62 | """ 63 | new_params = OrderedDict() 64 | for kk, vv in zipped.iteritems(): 65 | new_params[kk] = vv.get_value() 66 | return new_params 67 | 68 | 69 | def dropout_layer(state_before, use_noise, trng): 70 | proj = tensor.switch(use_noise, 71 | (state_before * 72 | trng.binomial(state_before.shape, 73 | p=0.5, n=1, 74 | dtype=state_before.dtype)), 75 | state_before * 0.5) 76 | return proj 77 | 78 | 79 | def _p(pp, name): 80 | return '%s_%s' % (pp, name) 81 | 82 | 83 | def init_params(options): 84 | """ 85 | Global (not LSTM) parameter. For the embeding and the classifier. 86 | """ 87 | params = OrderedDict() 88 | # embedding 89 | randn = numpy.random.rand(options['n_input'], 90 | options['dim_proj']) 91 | params['Wemb'] = (0.01 * randn).astype('float32') 92 | params = get_layer(options['encoder'])[0](options, 93 | params, 94 | prefix=options['encoder']) 95 | # classifier 96 | params['U'] = 0.01 * numpy.random.randn(options['dim_proj'], 97 | options['ydim']).astype('float32') 98 | params['b'] = numpy.zeros((options['ydim'],)).astype('float32') 99 | 100 | return params 101 | 102 | 103 | def load_params(path, params): 104 | pp = numpy.load(path) 105 | for kk, vv in params.iteritems(): 106 | if kk not in pp: 107 | raise Warning('%s is not in the archive' % kk) 108 | params[kk] = pp[kk] 109 | 110 | return params 111 | 112 | 113 | def init_tparams(params): 114 | tparams = OrderedDict() 115 | for kk, pp in params.iteritems(): 116 | tparams[kk] = theano.shared(params[kk], name=kk) 117 | return tparams 118 | 119 | 120 | def get_layer(name): 121 | fns = layers[name] 122 | return fns 123 | 124 | 125 | def ortho_weight(ndim): 126 | W = numpy.random.randn(ndim, ndim) 127 | u, s, v = numpy.linalg.svd(W) 128 | return u.astype('float32') 129 | 130 | 131 | def param_init_lstm(options, params, prefix='lstm'): 132 | """ 133 | Init the LSTM parameter: 134 | 135 | :see: init_params 136 | """ 137 | W = numpy.concatenate([ortho_weight(options['dim_proj']), 138 | ortho_weight(options['dim_proj']), 139 | ortho_weight(options['dim_proj']), 140 | ortho_weight(options['dim_proj'])], axis=1) 141 | params[_p(prefix, 'W')] = W.astype('float32') 142 | U = numpy.concatenate([ortho_weight(options['dim_proj']), 143 | ortho_weight(options['dim_proj']), 144 | ortho_weight(options['dim_proj']), 145 | ortho_weight(options['dim_proj'])], axis=1) 146 | params[_p(prefix, 'U')] = U.astype('float32') 147 | b = numpy.zeros((4 * options['dim_proj'],)) 148 | params[_p(prefix, 'b')] = b.astype('float32') 149 | 150 | return params 151 | 152 | 153 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None): 154 | nsteps = state_below.shape[0] 155 | if state_below.ndim == 3: 156 | n_samples = state_below.shape[1] 157 | else: 158 | n_samples = 1 159 | 160 | #assert mask is not None 161 | 162 | def _slice(_x, n, dim): 163 | if _x.ndim == 3: 164 | return _x[:, :, n*dim:(n+1)*dim] 165 | return _x[:, n*dim:(n+1)*dim] 166 | 167 | def _step(x_, h_, c_): 168 | preact = tensor.dot(h_, tparams[_p(prefix, 'U')]) 169 | preact += x_ 170 | preact += tparams[_p(prefix, 'b')] 171 | 172 | i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj'])) 173 | f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj'])) 174 | o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj'])) 175 | c = tensor.tanh(_slice(preact, 3, options['dim_proj'])) 176 | 177 | c = f * c_ + i * c 178 | #c = m_[:, None] * c + (1. - m_)[:, None] * c_ 179 | 180 | h = o * tensor.tanh(c) 181 | #TODO: I think this don't apply since is made to avoid sequences smaller tan max_len 182 | #h = m_[:, None] * h + (1. - m_)[:, None] * h_ 183 | 184 | return h, c 185 | 186 | state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) + 187 | tparams[_p(prefix, 'b')]) 188 | 189 | dim_proj = options['dim_proj'] 190 | rval, updates = theano.scan(_step, 191 | sequences=[state_below], 192 | outputs_info=[tensor.alloc(0., n_samples, 193 | dim_proj), 194 | tensor.alloc(0., n_samples, 195 | dim_proj)], 196 | name=_p(prefix, '_layers'), 197 | n_steps=nsteps) 198 | return rval[0] 199 | 200 | 201 | # ff: Feed Forward (normal neural net), only useful to put after lstm 202 | # before the classifier. 203 | layers = {'lstm': (param_init_lstm, lstm_layer)} 204 | 205 | def mom_sgd(lr, tparams, grads, x, rx, y, cost): 206 | """ Stochastic Gradient Descent 207 | 208 | :note: A more complicated version of sgd then needed. This is 209 | done like that for adadelta and rmsprop. 210 | 211 | """ 212 | 213 | updates = OrderedDict() 214 | 215 | mom = tensor.scalar(name='mom') 216 | gmomshared = [theano.shared(p.get_value(), name='%s_mom_grad' %k) 217 | for k,p in tparams.iteritems()] 218 | 219 | # New set of shared variable that will contain the gradient 220 | # for a mini-batch. 221 | gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) 222 | for k, p in tparams.iteritems()] 223 | gsup = [(gs, g) for gs, g in zip(gshared, grads)] 224 | 225 | # Function that computes gradients for a mini-batch, but do not 226 | # updates the weights. 227 | f_grad_shared = theano.function([x,rx, y], cost, updates=gsup, 228 | name='sgd_f_grad_shared') 229 | 230 | for gm,gp in zip(gmomshared,gshared): 231 | updates[gm] = mom*gm - (1.0 - mom) * lr * gp 232 | #gmomup = [(gm, mom*gm - (1.0 - mom) * lr * gp) for gm,gp in 233 | # zip(gmomshared, gshared)] 234 | 235 | #pup = [(p, p + gm) for p, gm in zip(tparams.values(), gmomup)] 236 | for p,gm in zip(tparams.values(), gmomshared): 237 | updates[p] = p + updates[gm] 238 | 239 | # Function that updates the weights from the previously computed 240 | # gradient. 241 | f_update = theano.function([lr,mom], [], updates=updates, 242 | name='sgd_f_update') 243 | 244 | return f_grad_shared, f_update 245 | 246 | 247 | def sgd(lr, tparams, grads, x, rx, y, cost): 248 | """ Stochastic Gradient Descent 249 | 250 | :note: A more complicated version of sgd then needed. This is 251 | done like that for adadelta and rmsprop. 252 | 253 | """ 254 | # New set of shared variable that will contain the gradient 255 | # for a mini-batch. 256 | gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) 257 | for k, p in tparams.iteritems()] 258 | gsup = [(gs, g) for gs, g in zip(gshared, grads)] 259 | 260 | # Function that computes gradients for a mini-batch, but do not 261 | # updates the weights. 262 | f_grad_shared = theano.function([x,rx, y], cost, updates=gsup, 263 | name='sgd_f_grad_shared') 264 | 265 | pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)] 266 | 267 | # Function that updates the weights from the previously computed 268 | # gradient. 269 | f_update = theano.function([lr], [], updates=pup, 270 | name='sgd_f_update') 271 | 272 | return f_grad_shared, f_update 273 | 274 | 275 | def adadelta(lr, tparams, grads, x,rx,y, cost): 276 | zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), 277 | name='%s_grad' % k) 278 | for k, p in tparams.iteritems()] 279 | running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), 280 | name='%s_rup2' % k) 281 | for k, p in tparams.iteritems()] 282 | running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), 283 | name='%s_rgrad2' % k) 284 | for k, p in tparams.iteritems()] 285 | 286 | zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] 287 | rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) 288 | for rg2, g in zip(running_grads2, grads)] 289 | 290 | f_grad_shared = theano.function([x,rx, y], cost, updates=zgup+rg2up, 291 | name='adadelta_f_grad_shared') 292 | 293 | updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg 294 | for zg, ru2, rg2 in zip(zipped_grads, 295 | running_up2, 296 | running_grads2)] 297 | ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) 298 | for ru2, ud in zip(running_up2, updir)] 299 | param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)] 300 | 301 | f_update = theano.function([lr], [], updates=ru2up+param_up, 302 | on_unused_input='ignore', 303 | name='adadelta_f_update', 304 | mode='DebugMode') 305 | 306 | return f_grad_shared, f_update 307 | 308 | 309 | def rmsprop(lr, tparams, grads, x, rx, y, cost): 310 | zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), 311 | name='%s_grad' % k) 312 | for k, p in tparams.iteritems()] 313 | running_grads = [theano.shared(p.get_value() * numpy.float32(0.), 314 | name='%s_rgrad' % k) 315 | for k, p in tparams.iteritems()] 316 | running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), 317 | name='%s_rgrad2' % k) 318 | for k, p in tparams.iteritems()] 319 | 320 | zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] 321 | rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)] 322 | rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) 323 | for rg2, g in zip(running_grads2, grads)] 324 | 325 | f_grad_shared = theano.function([x,rx, y], cost, 326 | updates=zgup + rgup + rg2up, 327 | name='rmsprop_f_grad_shared') 328 | 329 | updir = [theano.shared(p.get_value() * numpy.float32(0.), 330 | name='%s_updir' % k) 331 | for k, p in tparams.iteritems()] 332 | updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) 333 | for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, 334 | running_grads2)] 335 | param_up = [(p, p + udn[1]) 336 | for p, udn in zip(tparams.values(), updir_new)] 337 | f_update = theano.function([lr], [], updates=updir_new+param_up, 338 | on_unused_input='ignore', 339 | name='rmsprop_f_update') 340 | 341 | return f_grad_shared, f_update 342 | 343 | 344 | def build_model(tparams, options): 345 | trng = RandomStreams(1234) 346 | 347 | # Used for dropout. 348 | use_noise = theano.shared(numpy.float32(0.)) 349 | 350 | x = tensor.tensor3('x', dtype='float32') 351 | rx = tensor.tensor3('rx', dtype='float32') 352 | #mask = tensor.matrix('mask', dtype='float32') 353 | y = tensor.vector('y', dtype='float32') 354 | 355 | n_timesteps = x.shape[0] 356 | n_samples = x.shape[1] 357 | n_dim = x.shape[2] 358 | 359 | lstm_outs = [] 360 | for inp in [x, rx]: 361 | emb = tensor.dot(inp,tparams['Wemb']) 362 | #emb = tensor.nnet.sigmoid(emb) 363 | #emb = ReLU(emb) 364 | 365 | if options['use_dropout']: 366 | emb = dropout_layer(emb, use_noise, trng) 367 | 368 | proj = get_layer(options['encoder'])[1](tparams, emb, options, 369 | prefix=options['encoder'] 370 | ) 371 | 372 | 373 | if options['encoder'] == 'lstm' and options['sum_pool'] == True: 374 | proj = proj.sum(axis=0) 375 | proj = proj / options['n_iter'] 376 | else: 377 | proj = proj[-1] 378 | lstm_outs.append(proj) 379 | 380 | del proj 381 | #if options['use_dropout']: 382 | # proj = dropout_layer(proj, use_noise, trng) 383 | 384 | #pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U'])+tparams['b']) 385 | #pred = tensor.nnet.sigmoid(tensor.dot(proj, tparams['U'])\ 386 | # + tparams['b']) 387 | pred = tensor.dot(tensor.concatenate(lstm_outs), tparams['U']) + tparams['b'] 388 | 389 | pred = pred.reshape((2,pred.shape[0]/2, pred.shape[1])).mean(axis=0) 390 | f_pred_prob = theano.function([x,rx], pred, name='f_pred_prob') 391 | #f_pred = theano.function(x, pred.argmax(axis=1), name='f_pred') 392 | 393 | cost = tensor.mean((y-pred.T)**2) 394 | 395 | #cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean() 396 | 397 | return use_noise, x, rx, y, f_pred_prob, cost 398 | 399 | 400 | def pred_probs(f_pred, prepare_data, data, model_options, verbose=False): 401 | """ If you want to use a trained model, this is useful to compute 402 | the probabilities of new examples. 403 | """ 404 | n_samples = len(data) 405 | 406 | x,y = prepare_data(data, numpy.array([]), 407 | model_options['n_iter'], model_options['n_input']) 408 | rx,_ = prepare_data(data[:][::-1], numpy.array([]), 409 | model_options['n_iter'], model_options['n_input']) 410 | 411 | pred = f_pred(x,rx) 412 | 413 | return pred 414 | 415 | 416 | def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False): 417 | """ 418 | Just compute the error 419 | f_pred: Theano fct computing the prediction 420 | prepare_data: usual prepare_data for that dataset. 421 | """ 422 | valid_err = 0 423 | for _, valid_index in iterator: 424 | # TODO: This is not very efficient I should check 425 | x, y = prepare_data([data[0][t] for t in valid_index], 426 | numpy.array(data[1])[valid_index], 427 | model_options['n_iter'],model_options['n_input']) 428 | rx, _ = prepare_data([data[0][t][::-1] for t in valid_index], 429 | numpy.array(data[1])[valid_index], 430 | model_options['n_iter'],model_options['n_input']) 431 | 432 | 433 | 434 | preds = f_pred(x,rx) 435 | targets = numpy.array(data[1])[valid_index] 436 | valid_err += tensor.sum((targets-preds.T)**2) 437 | #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) 438 | valid_err = valid_err / len(data[0]) 439 | 440 | return valid_err.eval() 441 | 442 | 443 | def R_score(f_pred, prepare_data, data, iterator, model_options, verbose=False): 444 | """ 445 | Just compute the error 446 | f_pred: Theano fct computing the prediction 447 | prepare_data: usual prepare_data for that dataset. 448 | """ 449 | valid_err = 0 450 | denom = 0 451 | data_mean = numpy.array(data[1]).mean() 452 | for _, valid_index in iterator: 453 | # TODO: This is not very efficient I should check 454 | x, y = prepare_data([data[0][t] for t in valid_index], 455 | numpy.array(data[1])[valid_index], 456 | model_options['n_iter'],model_options['n_input']) 457 | rx, _ = prepare_data([data[0][t][::-1] for t in valid_index], 458 | numpy.array(data[1])[valid_index], 459 | model_options['n_iter'],model_options['n_input']) 460 | 461 | 462 | preds = f_pred(x,rx) 463 | targets = numpy.array(data[1])[valid_index] 464 | valid_err += tensor.sum((targets-preds.T)**2) 465 | denom += ((numpy.array(data[1]) - data_mean)**2).sum() 466 | #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) 467 | valid_err = 1. - (valid_err / denom) 468 | 469 | return valid_err.eval() 470 | 471 | 472 | def backforecast(f_pred, data, model_options): 473 | """ 474 | Compute the amount of times in which 475 | the RNN correctly predict a up or 476 | down trend 477 | """ 478 | # TODO: Use the prepare data 479 | x, y = prepare_data(data[0], data[1], model_options['n_iter'], 480 | model_options['n_input']) 481 | rx, _ = prepare_data(data[0][:][::-1], data[1], model_options['n_iter'], 482 | model_options['n_input']) 483 | 484 | 485 | 486 | targets = (y > x[-1,:,0]) 487 | #TODO: not need for this asarray 488 | preds = f_pred(numpy.asarray(x,dtype='float32'),rx) 489 | preds_up = (preds[:,0] > x[-1,:,0]) 490 | err = (targets <> preds_up).sum() 491 | 492 | ret = float(err) / float(len(data[0])) 493 | return ret 494 | 495 | def predict_lstm(input, model_options): 496 | 497 | params = init_params(model_options) 498 | 499 | load_params(model_options['saveto'], params) 500 | 501 | # This create Theano Shared Variable from the parameters. 502 | # Dict name (string) -> Theano Tensor Shared Variable 503 | # params and tparams have different copy of the weights. 504 | tparams = init_tparams(params) 505 | 506 | # use_noise is for dropout 507 | (use_noise, x, rx, 508 | y, f_pred_prob, cost) = build_model(tparams, model_options) 509 | 510 | preds = pred_probs(f_pred_prob, prepare_data, input, model_options) 511 | 512 | return preds 513 | 514 | 515 | def train_lstm( 516 | dim_proj=32, # word embeding dimension and LSTM number of hidden units. 517 | patience=10, # Number of epoch to wait before early stop if no progress 518 | max_epochs=150, # The maximum number of epoch to run 519 | dispFreq=40, # Display to stdout the training progress every N updates 520 | decay_c=0., # Weight decay for the classifier applied to the U weights. 521 | lrate=0.1, # Learning rate for sgd (not used for adadelta and rmsprop) 522 | n_input = 4, # Vocabulary size 523 | optimizer=mom_sgd, # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). 524 | encoder='lstm', # TODO: can be removed must be lstm. 525 | validFreq=20, # Compute the validation error after this number of update. 526 | saveFreq=20, # Save the parameters after every saveFreq updates 527 | maxlen=100, # Sequence longer then this get ignored 528 | batch_size=50, # The batch size during training. 529 | valid_batch_size=64, # The batch size used for validation/test set. 530 | exchange='AUDJPY', 531 | 532 | # Parameter for extra option 533 | noise_std=0., 534 | use_dropout=False, # if False slightly faster, but worst test error 535 | # This frequently need a bigger model. 536 | reload_model="", # Path to a saved model we want to start from. 537 | sum_pool = False, 538 | mom_start = 0.5, 539 | mom_end = 0.99, 540 | mom_epoch_interval = 300, 541 | learning_rate_decay=0.99995, 542 | #learning_rate_decay=0.98, 543 | predict=False, 544 | input_pred=None 545 | ): 546 | 547 | model_path = "/user/j/jgpavez/rnn_trading/models/" 548 | data_path = "/user/j/jgpavez/rnn_trading/data/" 549 | 550 | saveto = exchange + '_model.npz' 551 | params_file = exchange + '_params.npz' 552 | dataset = exchange + '_hour.csv' 553 | 554 | saveto = os.path.join(model_path, saveto) 555 | params_file = os.path.join(data_path, params_file) 556 | 557 | ydim = 1 558 | n_iter = 10 559 | 560 | # Model options 561 | model_options = locals().copy() 562 | 563 | if predict == True: 564 | return predict_lstm(input_pred, model_options) 565 | 566 | 567 | print "model options", model_options 568 | 569 | print 'Loading data' 570 | train, valid, test, mean, std = read_data(max_len=n_iter, path=dataset, params_file=params_file) 571 | 572 | #YDIM?? 573 | #number of labels (output) 574 | 575 | theano.config.optimizer = 'None' 576 | 577 | print 'Building model' 578 | # This create the initial parameters as numpy ndarrays. 579 | # Dict name (string) -> numpy ndarray 580 | params = init_params(model_options) 581 | 582 | if reload_model: 583 | load_params(saveto, params) 584 | 585 | # This create Theano Shared Variable from the parameters. 586 | # Dict name (string) -> Theano Tensor Shared Variable 587 | # params and tparams have different copy of the weights. 588 | tparams = init_tparams(params) 589 | 590 | # use_noise is for dropout 591 | (use_noise, x, rx, 592 | y, f_pred_prob, cost) = build_model(tparams, model_options) 593 | 594 | if decay_c > 0.: 595 | decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') 596 | weight_decay = 0. 597 | weight_decay += (tparams['U']**2).sum() 598 | weight_decay *= decay_c 599 | cost += weight_decay 600 | 601 | f_cost = theano.function([x,rx,y], cost, name='f_cost') 602 | 603 | grads = tensor.grad(cost, wrt=tparams.values()) 604 | f_grad = theano.function([x,rx,y], grads, name='f_grad') 605 | 606 | lr = tensor.scalar(name='lr') 607 | f_grad_shared, f_update = optimizer(lr, tparams, grads, 608 | x,rx, y, cost) 609 | 610 | print 'Optimization' 611 | 612 | 613 | kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size, 614 | shuffle=True) 615 | kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, 616 | shuffle=True) 617 | 618 | print "%d train examples" % len(train[0]) 619 | print "%d valid examples" % len(valid[0]) 620 | print "%d test examples" % len(test[0]) 621 | history_errs = [] 622 | best_p = None 623 | bad_count = 0 624 | 625 | if validFreq == -1: 626 | validFreq = len(train[0])/batch_size 627 | if saveFreq == -1: 628 | saveFreq = len(train[0])/batch_size 629 | 630 | uidx = 0 # the number of update done 631 | estop = False # early stop 632 | start_time = time.clock() 633 | mom = 0 634 | 635 | try: 636 | for eidx in xrange(max_epochs): 637 | n_samples = 0 638 | 639 | # Get new shuffled index for the training set. 640 | kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) 641 | 642 | if eidx < model_options['mom_epoch_interval']: 643 | mom = model_options['mom_start']*\ 644 | (1.0 - eidx/model_options['mom_epoch_interval'])\ 645 | + mom_end*(eidx/model_options['mom_epoch_interval']) 646 | else: 647 | mom = mom_end 648 | 649 | for _, train_index in kf: 650 | uidx += 1 651 | use_noise.set_value(1.) 652 | 653 | # Select the random examples for this minibatch 654 | y = [train[1][t] for t in train_index] 655 | x = [train[0][t]for t in train_index] 656 | 657 | # Get the data in numpy.ndarray formet. 658 | # It return something of the shape (minibatch maxlen, n samples) 659 | rx,_ = prepare_data(x[:][::-1],y,model_options['n_iter'],model_options['n_input']) 660 | x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input']) 661 | 662 | 663 | if x is None: 664 | print 'Minibatch with zero sample under length ', maxlen 665 | continue 666 | n_samples += x.shape[1] 667 | cost = f_grad_shared(x,rx, y) 668 | f_update(lrate,mom) 669 | 670 | #decay 671 | #TODO: CHECK THIS LEARNING RATE 672 | #lrate = learning_rate_decay*lrate 673 | 674 | if numpy.isnan(cost) or numpy.isinf(cost): 675 | print 'NaN detected' 676 | return 1., 1., 1. 677 | 678 | if numpy.mod(uidx, dispFreq) == 0: 679 | print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost 680 | 681 | #decay 682 | #TODO: CHECK THIS LEARNING RATE 683 | lrate = learning_rate_decay*lrate 684 | if numpy.mod(eidx, validFreq) == 0: 685 | use_noise.set_value(0.) 686 | #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) 687 | valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) 688 | test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) 689 | bckfr_err = backforecast(f_pred_prob, test, model_options) 690 | 691 | history_errs.append([valid_err, test_err]) 692 | 693 | if (eidx == 0 or 694 | test_err <= numpy.array(history_errs)[:, 695 | 1].min()): 696 | 697 | best_p = unzip(tparams) 698 | bad_counter = 0 699 | 700 | print ('Valid ', valid_err, 701 | 'Test ', test_err, 702 | 'Backfore ', bckfr_err) 703 | 704 | if (len(history_errs) > patience and 705 | valid_err >= numpy.array(history_errs)[:-patience, 706 | 0].min()): 707 | bad_counter += 1 708 | if bad_counter > patience: 709 | print 'Early Stop!' 710 | estop = True 711 | break 712 | 713 | 714 | if numpy.mod(eidx, saveFreq) == 0: 715 | print 'Saving...', 716 | 717 | if best_p is not None: 718 | params = best_p 719 | else: 720 | params = unzip(tparams) 721 | numpy.savez(saveto, history_errs=history_errs, **params) 722 | pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) 723 | print 'Done' 724 | 725 | 726 | print 'Seen %d samples' % n_samples 727 | 728 | if estop: 729 | break 730 | 731 | except KeyboardInterrupt: 732 | print "Training interupted" 733 | 734 | end_time = time.clock() 735 | if best_p is not None: 736 | zipp(best_p, tparams) 737 | else: 738 | best_p = unzip(tparams) 739 | 740 | use_noise.set_value(0.) 741 | #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) 742 | valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) 743 | test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) 744 | bckfr_err = backforecast(f_pred_prob, test, model_options) 745 | 746 | print 'Valid ', valid_err, 'Test ', test_err, 'Backforecasting ', bckfr_err 747 | 748 | numpy.savez(saveto, train_err=train_err, 749 | valid_err=valid_err, test_err=test_err, 750 | history_errs=history_errs, **best_p) 751 | print 'The code run for %d epochs, with %f sec/epochs' % ( 752 | (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) 753 | print >> sys.stderr, ('Training took %.1fs' % 754 | (end_time - start_time)) 755 | return train_err, valid_err, test_err 756 | 757 | 758 | if __name__ == '__main__': 759 | 760 | # We must have floatX=float32 for this tutorial to work correctly. 761 | theano.config.floatX = "float32" 762 | # The next line is the new Theano default. This is a speed up. 763 | #theano.config.scan.allow_gc = False 764 | exchange = 'AUDJPY' 765 | if len(sys.argv) == 2: 766 | exchange = sys.argv[1] 767 | 768 | # See function train for all possible parameter and there definition. 769 | train_lstm( 770 | #reload_model="lstm_model.npz", 771 | exchange=exchange, 772 | max_epochs=162, 773 | ) 774 | 775 | -------------------------------------------------------------------------------- /deep_lstm_forex.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Deep LSTM RNN for forex predictions 3 | Based on sentiment analysis lstm found in deeplearning tutorials 4 | ''' 5 | from collections import OrderedDict 6 | import copy 7 | import cPickle as pkl 8 | import random 9 | import sys 10 | import time 11 | import pdb 12 | import os 13 | 14 | import numpy 15 | import theano 16 | import theano.tensor as tensor 17 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 18 | from theano.ifelse import ifelse 19 | 20 | 21 | from forex import read_data, prepare_data 22 | 23 | #### rectified linear unit 24 | def ReLU(x): 25 | y = tensor.maximum(0.0, x) 26 | return(y) 27 | 28 | def get_minibatches_idx(n, minibatch_size, shuffle=False): 29 | """ 30 | Used to shuffle the dataset at each iteration. 31 | """ 32 | 33 | idx_list = numpy.arange(n, dtype="int32") 34 | 35 | if shuffle: 36 | random.shuffle(idx_list) 37 | 38 | minibatches = [] 39 | minibatch_start = 0 40 | for i in range(n // minibatch_size): 41 | minibatches.append(idx_list[minibatch_start: 42 | minibatch_start + minibatch_size]) 43 | minibatch_start += minibatch_size 44 | 45 | if (minibatch_start != n): 46 | # Make a minibatch out of what is left 47 | minibatches.append(idx_list[minibatch_start:]) 48 | 49 | return zip(range(len(minibatches)), minibatches) 50 | 51 | def zipp(params, tparams): 52 | """ 53 | When we reload the model. Needed for the GPU stuff. 54 | """ 55 | for kk, vv in params.iteritems(): 56 | tparams[kk].set_value(vv) 57 | 58 | 59 | def unzip(zipped): 60 | """ 61 | When we pickle the model. Needed for the GPU stuff. 62 | """ 63 | new_params = OrderedDict() 64 | for kk, vv in zipped.iteritems(): 65 | new_params[kk] = vv.get_value() 66 | return new_params 67 | 68 | 69 | def dropout_layer(state_before, use_noise, trng): 70 | proj = tensor.switch(use_noise, 71 | (state_before * 72 | trng.binomial(state_before.shape, 73 | p=0.5, n=1, 74 | dtype=state_before.dtype)), 75 | state_before * 0.5) 76 | return proj 77 | 78 | 79 | def _p(pp, name): 80 | return '%s_%s' % (pp, name) 81 | 82 | 83 | def init_params(options): 84 | """ 85 | Global (not LSTM) parameter. For the embeding and the classifier. 86 | """ 87 | params = OrderedDict() 88 | # embedding 89 | randn = numpy.random.rand(options['n_input'], 90 | options['dim_proj']) 91 | params['Wemb'] = (0.01 * randn).astype('float32') 92 | params = get_layer(options['encoder'])[0](options, 93 | params, 94 | prefix=options['encoder']) 95 | # classifier 96 | params['U'] = 0.01 * numpy.random.randn(options['dim_proj'], 97 | options['ydim']).astype('float32') 98 | params['b'] = numpy.zeros((options['ydim'],)).astype('float32') 99 | 100 | return params 101 | 102 | 103 | def load_params(path, params): 104 | pp = numpy.load(path) 105 | for kk, vv in params.iteritems(): 106 | if kk not in pp: 107 | raise Warning('%s is not in the archive' % kk) 108 | params[kk] = pp[kk] 109 | 110 | return params 111 | 112 | 113 | def init_tparams(params): 114 | tparams = OrderedDict() 115 | for kk, pp in params.iteritems(): 116 | tparams[kk] = theano.shared(params[kk], name=kk) 117 | return tparams 118 | 119 | 120 | def get_layer(name): 121 | fns = layers[name] 122 | return fns 123 | 124 | 125 | def ortho_weight(ndim): 126 | W = numpy.random.randn(ndim, ndim) 127 | u, s, v = numpy.linalg.svd(W) 128 | return u.astype('float32') 129 | 130 | 131 | def param_init_lstm(options, params, prefix='lstm'): 132 | """ 133 | Init the LSTM parameter: 134 | 135 | :see: init_params 136 | """ 137 | 138 | for layer in xrange(options['nlayers']): 139 | # Asuming all layers of same size 140 | W = numpy.concatenate([ortho_weight(options['dim_proj']), 141 | ortho_weight(options['dim_proj']), 142 | ortho_weight(options['dim_proj']), 143 | ortho_weight(options['dim_proj'])], axis=1) 144 | params[_p(prefix, 'W%d'%layer)] = W.astype('float32') 145 | U = numpy.concatenate([ortho_weight(options['dim_proj']), 146 | ortho_weight(options['dim_proj']), 147 | ortho_weight(options['dim_proj']), 148 | ortho_weight(options['dim_proj'])], axis=1) 149 | params[_p(prefix, 'U%d'%layer)] = U.astype('float32') 150 | b = numpy.zeros((4 * options['dim_proj'],)) 151 | params[_p(prefix, 'b%d'%layer)] = b.astype('float32') 152 | 153 | return params 154 | 155 | 156 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None, nlayers=1): 157 | nsteps = state_below.shape[0] 158 | if state_below.ndim == 3: 159 | n_samples = state_below.shape[1] 160 | else: 161 | n_samples = 1 162 | 163 | #assert mask is not None 164 | 165 | def _slice(_x, n, dim): 166 | if _x.ndim == 3: 167 | return _x[:, :, n*dim:(n+1)*dim] 168 | return _x[:, n*dim:(n+1)*dim] 169 | 170 | def _step(x_, h_, c_, param_U, param_b): 171 | 172 | preact = tensor.dot(h_, param_U) 173 | preact += x_ 174 | preact += param_b 175 | 176 | i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj'])) 177 | f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj'])) 178 | o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj'])) 179 | c = tensor.tanh(_slice(preact, 3, options['dim_proj'])) 180 | 181 | c = f * c_ + i * c 182 | #c = m_[:, None] * c + (1. - m_)[:, None] * c_ 183 | 184 | h = o * tensor.tanh(c) 185 | #TODO: I think this don't apply since is made to avoid sequences smaller tan max_len 186 | #h = m_[:, None] * h + (1. - m_)[:, None] * h_ 187 | 188 | return h, c 189 | 190 | for layer in xrange(nlayers): 191 | state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W%d'%layer)]) + 192 | tparams[_p(prefix, 'b%d'%layer)]) 193 | 194 | dim_proj = options['dim_proj'] 195 | #TODO: Scan over two sequences one for step and other for number of layer 196 | rval, updates = theano.scan(_step, 197 | sequences=[state_below], 198 | outputs_info=[tensor.alloc(0.,n_samples, 199 | dim_proj), 200 | tensor.alloc(0.,n_samples, 201 | dim_proj)], 202 | non_sequences=[tparams[_p(prefix,'U%d'%layer)],tparams[_p(prefix,'b%d'%layer)]], 203 | name=_p(prefix, '_layer%d'%layer), 204 | n_steps=nsteps) 205 | state_below = rval[0] 206 | 207 | return rval[0] 208 | 209 | 210 | # ff: Feed Forward (normal neural net), only useful to put after lstm 211 | # before the classifier. 212 | layers = {'lstm': (param_init_lstm, lstm_layer)} 213 | 214 | def mom_sgd(lr, tparams, grads, x, y, cost): 215 | """ Stochastic Gradient Descent 216 | 217 | :note: A more complicated version of sgd then needed. This is 218 | done like that for adadelta and rmsprop. 219 | 220 | """ 221 | 222 | updates = OrderedDict() 223 | 224 | mom = tensor.scalar(name='mom') 225 | gmomshared = [theano.shared(p.get_value(), name='%s_mom_grad' %k) 226 | for k,p in tparams.iteritems()] 227 | 228 | # New set of shared variable that will contain the gradient 229 | # for a mini-batch. 230 | gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) 231 | for k, p in tparams.iteritems()] 232 | gsup = [(gs, g) for gs, g in zip(gshared, grads)] 233 | 234 | # Function that computes gradients for a mini-batch, but do not 235 | # updates the weights. 236 | f_grad_shared = theano.function([x, y], cost, updates=gsup, 237 | name='sgd_f_grad_shared') 238 | 239 | for gm,gp in zip(gmomshared,gshared): 240 | updates[gm] = mom*gm - (1.0 - mom) * lr * gp 241 | #gmomup = [(gm, mom*gm - (1.0 - mom) * lr * gp) for gm,gp in 242 | # zip(gmomshared, gshared)] 243 | 244 | #pup = [(p, p + gm) for p, gm in zip(tparams.values(), gmomup)] 245 | for p,gm in zip(tparams.values(), gmomshared): 246 | updates[p] = p + updates[gm] 247 | 248 | # Function that updates the weights from the previously computed 249 | # gradient. 250 | f_update = theano.function([lr,mom], [], updates=updates, 251 | name='sgd_f_update') 252 | 253 | return f_grad_shared, f_update 254 | 255 | 256 | def sgd(lr, tparams, grads, x, y, cost): 257 | """ Stochastic Gradient Descent 258 | 259 | :note: A more complicated version of sgd then needed. This is 260 | done like that for adadelta and rmsprop. 261 | 262 | """ 263 | # New set of shared variable that will contain the gradient 264 | # for a mini-batch. 265 | gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) 266 | for k, p in tparams.iteritems()] 267 | gsup = [(gs, g) for gs, g in zip(gshared, grads)] 268 | 269 | # Function that computes gradients for a mini-batch, but do not 270 | # updates the weights. 271 | f_grad_shared = theano.function([x, y], cost, updates=gsup, 272 | name='sgd_f_grad_shared') 273 | 274 | pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)] 275 | 276 | # Function that updates the weights from the previously computed 277 | # gradient. 278 | f_update = theano.function([lr], [], updates=pup, 279 | name='sgd_f_update') 280 | 281 | return f_grad_shared, f_update 282 | 283 | 284 | def adadelta(lr, tparams, grads, x, y, cost): 285 | zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), 286 | name='%s_grad' % k) 287 | for k, p in tparams.iteritems()] 288 | running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), 289 | name='%s_rup2' % k) 290 | for k, p in tparams.iteritems()] 291 | running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), 292 | name='%s_rgrad2' % k) 293 | for k, p in tparams.iteritems()] 294 | 295 | zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] 296 | rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) 297 | for rg2, g in zip(running_grads2, grads)] 298 | 299 | f_grad_shared = theano.function([x, y], cost, updates=zgup+rg2up, 300 | name='adadelta_f_grad_shared') 301 | 302 | updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg 303 | for zg, ru2, rg2 in zip(zipped_grads, 304 | running_up2, 305 | running_grads2)] 306 | ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) 307 | for ru2, ud in zip(running_up2, updir)] 308 | param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)] 309 | 310 | f_update = theano.function([lr], [], updates=ru2up+param_up, 311 | on_unused_input='ignore', 312 | name='adadelta_f_update', 313 | mode='DebugMode') 314 | 315 | return f_grad_shared, f_update 316 | 317 | 318 | def rmsprop(lr, tparams, grads, x, y, cost): 319 | zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), 320 | name='%s_grad' % k) 321 | for k, p in tparams.iteritems()] 322 | running_grads = [theano.shared(p.get_value() * numpy.float32(0.), 323 | name='%s_rgrad' % k) 324 | for k, p in tparams.iteritems()] 325 | running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), 326 | name='%s_rgrad2' % k) 327 | for k, p in tparams.iteritems()] 328 | 329 | zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] 330 | rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)] 331 | rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) 332 | for rg2, g in zip(running_grads2, grads)] 333 | 334 | f_grad_shared = theano.function([x, y], cost, 335 | updates=zgup + rgup + rg2up, 336 | name='rmsprop_f_grad_shared') 337 | 338 | updir = [theano.shared(p.get_value() * numpy.float32(0.), 339 | name='%s_updir' % k) 340 | for k, p in tparams.iteritems()] 341 | updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) 342 | for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, 343 | running_grads2)] 344 | param_up = [(p, p + udn[1]) 345 | for p, udn in zip(tparams.values(), updir_new)] 346 | f_update = theano.function([lr], [], updates=updir_new+param_up, 347 | on_unused_input='ignore', 348 | name='rmsprop_f_update') 349 | 350 | return f_grad_shared, f_update 351 | 352 | 353 | def build_model(tparams, options): 354 | trng = RandomStreams(1234) 355 | 356 | # Used for dropout. 357 | use_noise = theano.shared(numpy.float32(0.)) 358 | 359 | x = tensor.tensor3('x', dtype='float32') 360 | #mask = tensor.matrix('mask', dtype='float32') 361 | y = tensor.vector('y', dtype='float32') 362 | 363 | n_timesteps = x.shape[0] 364 | n_samples = x.shape[1] 365 | n_dim = x.shape[2] 366 | 367 | emb = tensor.dot(x,tparams['Wemb']) 368 | #emb = tensor.nnet.sigmoid(emb) 369 | #emb = ReLU(emb) 370 | 371 | if options['use_dropout']: 372 | emb = dropout_layer(emb, use_noise, trng) 373 | 374 | proj = get_layer(options['encoder'])[1](tparams, emb, options, 375 | prefix=options['encoder'], 376 | nlayers=options['nlayers'] 377 | ) 378 | 379 | 380 | if options['encoder'] == 'lstm' and options['sum_pool'] == True: 381 | proj = proj.sum(axis=0) 382 | proj = proj / options['n_iter'] 383 | else: 384 | proj = proj[-1] 385 | #if options['use_dropout']: 386 | # proj = dropout_layer(proj, use_noise, trng) 387 | 388 | #pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U'])+tparams['b']) 389 | #pred = tensor.nnet.sigmoid(tensor.dot(proj, tparams['U'])\ 390 | # + tparams['b']) 391 | pred = tensor.dot(proj, tparams['U']) + tparams['b'] 392 | 393 | f_pred_prob = theano.function([x], pred, name='f_pred_prob') 394 | #f_pred = theano.function(x, pred.argmax(axis=1), name='f_pred') 395 | 396 | cost = tensor.mean((y-pred.T)**2) 397 | 398 | #cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean() 399 | 400 | return use_noise, x, y, f_pred_prob, cost 401 | 402 | 403 | def pred_probs(f_pred, prepare_data, data, model_options, verbose=False): 404 | """ If you want to use a trained model, this is useful to compute 405 | the probabilities of new examples. 406 | """ 407 | n_samples = len(data) 408 | 409 | x,y = prepare_data(data, numpy.array([]), 410 | model_options['n_iter'], model_options['n_input']) 411 | pred = f_pred(x) 412 | 413 | return pred 414 | 415 | 416 | def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False): 417 | """ 418 | Just compute the error 419 | f_pred: Theano fct computing the prediction 420 | prepare_data: usual prepare_data for that dataset. 421 | """ 422 | valid_err = 0 423 | for _, valid_index in iterator: 424 | # TODO: This is not very efficient I should check 425 | x, y = prepare_data([data[0][t] for t in valid_index], 426 | numpy.array(data[1])[valid_index], 427 | model_options['n_iter'],model_options['n_input']) 428 | 429 | 430 | preds = f_pred(x) 431 | targets = numpy.array(data[1])[valid_index] 432 | valid_err += tensor.sum((targets-preds.T)**2) 433 | #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) 434 | valid_err = valid_err / len(data[0]) 435 | 436 | return valid_err.eval() 437 | 438 | 439 | def R_score(f_pred, prepare_data, data, iterator, model_options, verbose=False): 440 | """ 441 | Just compute the error 442 | f_pred: Theano fct computing the prediction 443 | prepare_data: usual prepare_data for that dataset. 444 | """ 445 | valid_err = 0 446 | denom = 0 447 | data_mean = numpy.array(data[1]).mean() 448 | for _, valid_index in iterator: 449 | # TODO: This is not very efficient I should check 450 | x, y = prepare_data([data[0][t] for t in valid_index], 451 | numpy.array(data[1])[valid_index], 452 | model_options['n_iter'],model_options['n_input']) 453 | 454 | 455 | preds = f_pred(x) 456 | targets = numpy.array(data[1])[valid_index] 457 | valid_err += tensor.sum((targets-preds.T)**2) 458 | denom += ((numpy.array(data[1]) - data_mean)**2).sum() 459 | #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) 460 | valid_err = 1. - (valid_err / denom) 461 | 462 | return valid_err.eval() 463 | 464 | 465 | def backforecast(f_pred, data, model_options): 466 | """ 467 | Compute the amount of times in which 468 | the RNN correctly predict a up or 469 | down trend 470 | """ 471 | # TODO: Use the prepare data 472 | x, y = prepare_data(data[0], data[1], model_options['n_iter'], 473 | model_options['n_input']) 474 | 475 | targets = (y > x[-1,:,0]) 476 | preds = f_pred(numpy.asarray(x,dtype='float32')) 477 | preds_up = (preds[:,0] > x[-1,:,0]) 478 | err = (targets <> preds_up).sum() 479 | 480 | ret = float(err) / float(len(data[0])) 481 | return ret 482 | 483 | def predict_lstm(input, model_options): 484 | 485 | params = init_params(model_options) 486 | 487 | load_params(model_options['saveto'], params) 488 | 489 | # This create Theano Shared Variable from the parameters. 490 | # Dict name (string) -> Theano Tensor Shared Variable 491 | # params and tparams have different copy of the weights. 492 | tparams = init_tparams(params) 493 | 494 | # use_noise is for dropout 495 | (use_noise, x, 496 | y, f_pred_prob, cost) = build_model(tparams, model_options) 497 | 498 | preds = pred_probs(f_pred_prob, prepare_data, input, model_options) 499 | 500 | return preds 501 | 502 | 503 | def train_lstm( 504 | dim_proj=32, # word embeding dimension and LSTM number of hidden units. 505 | patience=10, # Number of epoch to wait before early stop if no progress 506 | max_epochs=150, # The maximum number of epoch to run 507 | dispFreq=40, # Display to stdout the training progress every N updates 508 | decay_c=0., # Weight decay for the classifier applied to the U weights. 509 | lrate=0.1, # Learning rate for sgd (not used for adadelta and rmsprop) 510 | n_input = 4, # Vocabulary size 511 | optimizer=mom_sgd, # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). 512 | encoder='lstm', # TODO: can be removed must be lstm. 513 | validFreq=20, # Compute the validation error after this number of update. 514 | saveFreq=20, # Save the parameters after every saveFreq updates 515 | maxlen=100, # Sequence longer then this get ignored 516 | batch_size=50, # The batch size during training. 517 | valid_batch_size=64, # The batch size used for validation/test set. 518 | exchange='AUDJPY', 519 | 520 | # Parameter for extra option 521 | noise_std=0., 522 | use_dropout=False, # if False slightly faster, but worst test error 523 | # This frequently need a bigger model. 524 | reload_model="", # Path to a saved model we want to start from. 525 | sum_pool = False, 526 | mom_start = 0.5, 527 | mom_end = 0.99, 528 | mom_epoch_interval = 300, 529 | learning_rate_decay=0.99995, 530 | nlayers = 3, 531 | #learning_rate_decay=0.98, 532 | predict=False, 533 | input_pred=None 534 | ): 535 | 536 | model_path = "/user/j/jgpavez/rnn_trading/models/" 537 | data_path = "/user/j/jgpavez/rnn_trading/data/" 538 | 539 | saveto = exchange + '_model_deep.npz' 540 | params_file = exchange + '_params_deep.npz' 541 | dataset = exchange + '_hour.csv' 542 | 543 | saveto = os.path.join(model_path, saveto) 544 | params_file = os.path.join(data_path, params_file) 545 | 546 | ydim = 1 547 | n_iter = 50 548 | 549 | # Model options 550 | model_options = locals().copy() 551 | 552 | if predict == True: 553 | return predict_lstm(input_pred, model_options) 554 | 555 | 556 | print "model options", model_options 557 | 558 | print 'Loading data' 559 | train, valid, test, mean, std = read_data(max_len=n_iter, path=dataset, params_file=params_file) 560 | 561 | #YDIM?? 562 | #number of labels (output) 563 | 564 | theano.config.optimizer = 'None' 565 | 566 | print 'Building model' 567 | # This create the initial parameters as numpy ndarrays. 568 | # Dict name (string) -> numpy ndarray 569 | params = init_params(model_options) 570 | 571 | if reload_model: 572 | load_params(saveto, params) 573 | 574 | # This create Theano Shared Variable from the parameters. 575 | # Dict name (string) -> Theano Tensor Shared Variable 576 | # params and tparams have different copy of the weights. 577 | tparams = init_tparams(params) 578 | 579 | # use_noise is for dropout 580 | (use_noise, x, 581 | y, f_pred_prob, cost) = build_model(tparams, model_options) 582 | 583 | if decay_c > 0.: 584 | decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') 585 | weight_decay = 0. 586 | weight_decay += (tparams['U']**2).sum() 587 | weight_decay *= decay_c 588 | cost += weight_decay 589 | 590 | f_cost = theano.function([x, y], cost, name='f_cost') 591 | 592 | grads = tensor.grad(cost, wrt=tparams.values()) 593 | f_grad = theano.function([x, y], grads, name='f_grad') 594 | 595 | lr = tensor.scalar(name='lr') 596 | f_grad_shared, f_update = optimizer(lr, tparams, grads, 597 | x, y, cost) 598 | 599 | print 'Optimization' 600 | 601 | 602 | kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size, 603 | shuffle=True) 604 | kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, 605 | shuffle=True) 606 | 607 | print "%d train examples" % len(train[0]) 608 | print "%d valid examples" % len(valid[0]) 609 | print "%d test examples" % len(test[0]) 610 | history_errs = [] 611 | best_p = None 612 | bad_count = 0 613 | 614 | if validFreq == -1: 615 | validFreq = len(train[0])/batch_size 616 | if saveFreq == -1: 617 | saveFreq = len(train[0])/batch_size 618 | 619 | uidx = 0 # the number of update done 620 | estop = False # early stop 621 | start_time = time.clock() 622 | mom = 0 623 | 624 | try: 625 | for eidx in xrange(max_epochs): 626 | n_samples = 0 627 | 628 | # Get new shuffled index for the training set. 629 | kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) 630 | 631 | if eidx < model_options['mom_epoch_interval']: 632 | mom = model_options['mom_start']*\ 633 | (1.0 - eidx/model_options['mom_epoch_interval'])\ 634 | + mom_end*(eidx/model_options['mom_epoch_interval']) 635 | else: 636 | mom = mom_end 637 | 638 | for _, train_index in kf: 639 | uidx += 1 640 | use_noise.set_value(1.) 641 | 642 | # Select the random examples for this minibatch 643 | y = [train[1][t] for t in train_index] 644 | x = [train[0][t]for t in train_index] 645 | 646 | # Get the data in numpy.ndarray formet. 647 | # It return something of the shape (minibatch maxlen, n samples) 648 | x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input']) 649 | 650 | if x is None: 651 | print 'Minibatch with zero sample under length ', maxlen 652 | continue 653 | n_samples += x.shape[1] 654 | cost = f_grad_shared(x, y) 655 | f_update(lrate,mom) 656 | 657 | #decay 658 | #TODO: CHECK THIS LEARNING RATE 659 | #lrate = learning_rate_decay*lrate 660 | 661 | if numpy.isnan(cost) or numpy.isinf(cost): 662 | print 'NaN detected' 663 | return 1., 1., 1. 664 | 665 | if numpy.mod(uidx, dispFreq) == 0: 666 | print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost 667 | 668 | #decay 669 | #TODO: CHECK THIS LEARNING RATE 670 | lrate = learning_rate_decay*lrate 671 | if numpy.mod(eidx, validFreq) == 0: 672 | use_noise.set_value(0.) 673 | #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) 674 | valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) 675 | test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) 676 | bckfr_err = backforecast(f_pred_prob, test, model_options) 677 | 678 | history_errs.append([valid_err, test_err]) 679 | 680 | if (eidx == 0 or 681 | test_err <= numpy.array(history_errs)[:, 682 | 1].min()): 683 | 684 | best_p = unzip(tparams) 685 | bad_counter = 0 686 | 687 | print ('Valid ', valid_err, 688 | 'Test ', test_err, 689 | 'Backfore ', bckfr_err) 690 | 691 | if (len(history_errs) > patience and 692 | valid_err >= numpy.array(history_errs)[:-patience, 693 | 0].min()): 694 | bad_counter += 1 695 | if bad_counter > patience: 696 | print 'Early Stop!' 697 | estop = True 698 | break 699 | 700 | 701 | if numpy.mod(eidx, saveFreq) == 0: 702 | print 'Saving...', 703 | 704 | if best_p is not None: 705 | params = best_p 706 | else: 707 | params = unzip(tparams) 708 | numpy.savez(saveto, history_errs=history_errs, **params) 709 | pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) 710 | print 'Done' 711 | 712 | 713 | print 'Seen %d samples' % n_samples 714 | 715 | if estop: 716 | break 717 | 718 | except KeyboardInterrupt: 719 | print "Training interupted" 720 | 721 | end_time = time.clock() 722 | if best_p is not None: 723 | zipp(best_p, tparams) 724 | else: 725 | best_p = unzip(tparams) 726 | 727 | use_noise.set_value(0.) 728 | #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) 729 | valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) 730 | test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) 731 | bckfr_err = backforecast(f_pred_prob, test, model_options) 732 | 733 | print 'Valid ', valid_err, 'Test ', test_err, 'Backforecasting ', bckfr_err 734 | 735 | numpy.savez(saveto, train_err=train_err, 736 | valid_err=valid_err, test_err=test_err, 737 | history_errs=history_errs, **best_p) 738 | print 'The code run for %d epochs, with %f sec/epochs' % ( 739 | (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) 740 | print >> sys.stderr, ('Training took %.1fs' % 741 | (end_time - start_time)) 742 | return train_err, valid_err, test_err 743 | 744 | 745 | if __name__ == '__main__': 746 | 747 | # We must have floatX=float32 for this tutorial to work correctly. 748 | theano.config.floatX = "float32" 749 | # The next line is the new Theano default. This is a speed up. 750 | #theano.config.scan.allow_gc = False 751 | exchange = 'AUDJPY' 752 | if len(sys.argv) == 2: 753 | exchange = sys.argv[1] 754 | 755 | # See function train for all possible parameter and there definition. 756 | train_lstm( 757 | #reload_model="lstm_model.npz", 758 | exchange=exchange, 759 | max_epochs=162, 760 | ) 761 | 762 | -------------------------------------------------------------------------------- /forex.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code to read from forex data 3 | data can be obtained from http://www.histdata.com/download-free-forex-data/ 4 | both daily data and minute data can be used 5 | ''' 6 | 7 | # Minute data is under implementation 8 | 9 | 10 | import os 11 | import pdb 12 | import sys 13 | 14 | import numpy 15 | 16 | import theano 17 | from numpy import genfromtxt 18 | from pandas import Series 19 | import datetime 20 | import csv 21 | 22 | #import matplotlib.pyplot as plt 23 | import sklearn.cross_validation as cv 24 | from sklearn import preprocessing 25 | 26 | 27 | def data_preprocessing(data): 28 | ''' 29 | Simple preprocessing of data 30 | ''' 31 | #data = data[51000:] 32 | #data = data[350:,:] 33 | # Standarization 34 | 35 | # Compute compound return serie 36 | #data = numpy.log(data / numpy.roll(data, 1, axis = 0)) 37 | #data = data[1:,:] 38 | #data = data / numpy.roll(data,1,axis=0) 39 | #data = data[1:,:] - 1. 40 | 41 | print data.shape 42 | 43 | #print 44 | #ts = Series(numpy.ravel(data[:,0])) 45 | #ts.plot() 46 | #plt.show() 47 | 48 | 49 | mean = data.mean(axis=0) 50 | std = data.std(axis=0) 51 | 52 | data = data - mean 53 | data = data/std 54 | 55 | #Some kind of smoothing?? 56 | 57 | #min_max = preprocessing.MinMaxScaler() 58 | #data = min_max.fit_transform(data) 59 | 60 | #Put between 1 and 0 61 | return data,mean,std 62 | 63 | def read_data(path="AUDJPY_hour.csv", dir="/user/j/jgpavez/rnn_trading/data/", 64 | max_len=30, valid_portion=0.1, columns=4, up=False, params_file='params.npz',min=False): 65 | 66 | ''' 67 | Reading forex data, daily or minute 68 | ''' 69 | path = os.path.join(dir, path) 70 | 71 | #data = read_csv(path,delimiter=delimiter) 72 | data = genfromtxt(path, delimiter=',',skip_header=1) 73 | # Adding data bu minute 74 | if min == False: 75 | date_index = 1 76 | values_index = 3 77 | hours = data[:,2] 78 | else: 79 | date_index = 0 80 | values_index = 1 81 | 82 | dates = data[:,date_index] 83 | days = numpy.array([datetime.datetime(int(str(date)[0:-2][0:4]),int(str(date)[0:-2][4:6]), 84 | int(str(date)[0:-2][6:8])).weekday() for date in dates]) 85 | months = numpy.array([datetime.datetime(int(str(date)[0:-2][0:4]),int(str(date)[0:-2][4:6]), 86 | int(str(date)[0:-2][6:8])).month for date in dates]) 87 | 88 | #dates[:,date_index] = days 89 | 90 | data = data[:,values_index:(values_index+columns)] 91 | 92 | data,mean,std = data_preprocessing(data) 93 | 94 | # Save data parameters 95 | numpy.savez(params_file, mean=mean, std=std) 96 | 97 | #x_data = numpy.array([data[i:i+max_len,:] for i in xrange(len(data)-max_len)]) 98 | #y_data = numpy.array([data[i][-1] for i in xrange(max_len , len(data))]) 99 | 100 | # Not consider jumps between days of market closing 101 | #TODO: Here I'm just considering weekends, have to think about holydays 102 | x_data = [] 103 | y_data = [] 104 | for i in xrange(len(data)-max_len): 105 | #TODO: just working for max_len < 24 106 | if (dates[i+max_len-1] == 4 and dates[i+max_len] <> 4): 107 | continue 108 | x_data.append(data[i:i+max_len,:]) 109 | y_data.append(data[i+max_len][-1]) 110 | x_data = numpy.array(x_data) 111 | y_data = numpy.array(y_data) 112 | 113 | if up is True: 114 | y_data = y_data > x_data[:,-1,0] 115 | y_data = numpy.asarray(y_data, dtype='int64') 116 | 117 | # split data into training and test 118 | train_set_x, test_set_x, train_set_y, test_set_y = cv.train_test_split(x_data, 119 | y_data, test_size=0.2, random_state=0) 120 | 121 | # split training set into validation set 122 | n_samples = len(train_set_x) 123 | sidx = numpy.random.permutation(n_samples) 124 | n_train = int(numpy.round(n_samples * (1. - valid_portion))) 125 | valid_set_x = [train_set_x[s] for s in sidx[n_train:]] 126 | valid_set_y = [train_set_y[s] for s in sidx[n_train:]] 127 | train_set_x = [train_set_x[s] for s in sidx[:n_train]] 128 | train_set_y = [train_set_y[s] for s in sidx[:n_train]] 129 | 130 | train = (train_set_x, train_set_y) 131 | valid = (valid_set_x, valid_set_y) 132 | test = (test_set_x, test_set_y) 133 | 134 | return train, valid, test, mean, std 135 | 136 | def prepare_data(seqs, labels, steps, x_dim, up=False): 137 | 138 | n_samples = len(seqs) 139 | max_len = steps 140 | x = numpy.zeros((max_len, n_samples, x_dim)).astype('float32') 141 | if up is True: 142 | y = numpy.asarray(labels, dtype='int64') 143 | else: 144 | y = numpy.asarray(labels, dtype='float32') 145 | 146 | for idx, s in enumerate(seqs): 147 | x[:,idx,:] = s 148 | 149 | return x, y 150 | 151 | -------------------------------------------------------------------------------- /lstm_forex.py: -------------------------------------------------------------------------------- 1 | ''' 2 | LSTM RNN for forex predictions 3 | Based on sentiment analysis lstm found in deeplearning tutorials 4 | ''' 5 | from collections import OrderedDict 6 | import copy 7 | import cPickle as pkl 8 | import random 9 | import sys 10 | import time 11 | import pdb 12 | import os 13 | import logging 14 | 15 | import numpy 16 | import theano 17 | import theano.tensor as tensor 18 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 19 | from theano.ifelse import ifelse 20 | from theano import config 21 | 22 | 23 | from forex import read_data, prepare_data 24 | 25 | def numpy_floatX(data): 26 | return numpy.asarray(data, dtype=config.floatX) 27 | 28 | #### rectified linear unit 29 | def ReLU(x): 30 | y = tensor.maximum(0.0, x) 31 | return(y) 32 | 33 | def get_minibatches_idx(n, minibatch_size, shuffle=False): 34 | """ 35 | Used to shuffle the dataset at each iteration. 36 | """ 37 | 38 | idx_list = numpy.arange(n, dtype="int32") 39 | 40 | if shuffle: 41 | random.shuffle(idx_list) 42 | 43 | minibatches = [] 44 | minibatch_start = 0 45 | for i in range(n // minibatch_size): 46 | minibatches.append(idx_list[minibatch_start: 47 | minibatch_start + minibatch_size]) 48 | minibatch_start += minibatch_size 49 | 50 | if (minibatch_start != n): 51 | # Make a minibatch out of what is left 52 | minibatches.append(idx_list[minibatch_start:]) 53 | 54 | return zip(range(len(minibatches)), minibatches) 55 | 56 | def zipp(params, tparams): 57 | """ 58 | When we reload the model. Needed for the GPU stuff. 59 | """ 60 | for kk, vv in params.iteritems(): 61 | tparams[kk].set_value(vv) 62 | 63 | 64 | def unzip(zipped): 65 | """ 66 | When we pickle the model. Needed for the GPU stuff. 67 | """ 68 | new_params = OrderedDict() 69 | for kk, vv in zipped.iteritems(): 70 | new_params[kk] = vv.get_value() 71 | return new_params 72 | 73 | 74 | def dropout_layer(state_before, use_noise, trng): 75 | proj = tensor.switch(use_noise, 76 | (state_before * 77 | trng.binomial(state_before.shape, 78 | p=0.5, n=1, 79 | dtype=state_before.dtype)), 80 | state_before * 0.5) 81 | return proj 82 | 83 | 84 | def _p(pp, name): 85 | return '%s_%s' % (pp, name) 86 | 87 | 88 | def init_params(options): 89 | """ 90 | Global (not LSTM) parameter. For the embeding and the classifier. 91 | """ 92 | params = OrderedDict() 93 | # embedding 94 | randn = numpy.random.rand(options['n_input'], 95 | options['dim_proj']) 96 | params['Wemb'] = (0.01 * randn).astype('float32') 97 | params = get_layer(options['encoder'])[0](options, 98 | params, 99 | prefix=options['encoder']) 100 | # classifier 101 | params['U'] = 0.01 * numpy.random.randn(options['dim_proj'], 102 | options['ydim']).astype('float32') 103 | params['b'] = numpy.zeros((options['ydim'],)).astype('float32') 104 | 105 | return params 106 | 107 | 108 | def load_params(path, params): 109 | pp = numpy.load(path) 110 | for kk, vv in params.iteritems(): 111 | if kk not in pp: 112 | raise Warning('%s is not in the archive' % kk) 113 | params[kk] = pp[kk] 114 | 115 | return params 116 | 117 | 118 | def init_tparams(params): 119 | tparams = OrderedDict() 120 | for kk, pp in params.iteritems(): 121 | tparams[kk] = theano.shared(params[kk], name=kk) 122 | return tparams 123 | 124 | 125 | def get_layer(name): 126 | fns = layers[name] 127 | return fns 128 | 129 | 130 | def ortho_weight(ndim): 131 | W = numpy.random.randn(ndim, ndim) 132 | u, s, v = numpy.linalg.svd(W) 133 | return u.astype('float32') 134 | 135 | 136 | def param_init_lstm(options, params, prefix='lstm'): 137 | """ 138 | Init the LSTM parameter: 139 | 140 | :see: init_params 141 | """ 142 | W = numpy.concatenate([ortho_weight(options['dim_proj']), 143 | ortho_weight(options['dim_proj']), 144 | ortho_weight(options['dim_proj']), 145 | ortho_weight(options['dim_proj'])], axis=1) 146 | params[_p(prefix, 'W')] = W.astype('float32') 147 | U = numpy.concatenate([ortho_weight(options['dim_proj']), 148 | ortho_weight(options['dim_proj']), 149 | ortho_weight(options['dim_proj']), 150 | ortho_weight(options['dim_proj'])], axis=1) 151 | params[_p(prefix, 'U')] = U.astype('float32') 152 | b = numpy.zeros((4 * options['dim_proj'],)) 153 | params[_p(prefix, 'b')] = b.astype('float32') 154 | 155 | return params 156 | 157 | 158 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None): 159 | nsteps = state_below.shape[0] 160 | if state_below.ndim == 3: 161 | n_samples = state_below.shape[1] 162 | else: 163 | n_samples = 1 164 | 165 | #assert mask is not None 166 | 167 | def _slice(_x, n, dim): 168 | if _x.ndim == 3: 169 | return _x[:, :, n*dim:(n+1)*dim] 170 | return _x[:, n*dim:(n+1)*dim] 171 | 172 | def _step(x_, h_, c_): 173 | preact = tensor.dot(h_, tparams[_p(prefix, 'U')]) 174 | preact += x_ 175 | preact += tparams[_p(prefix, 'b')] 176 | 177 | i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj'])) 178 | f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj'])) 179 | o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj'])) 180 | c = tensor.tanh(_slice(preact, 3, options['dim_proj'])) 181 | 182 | c = f * c_ + i * c 183 | #c = m_[:, None] * c + (1. - m_)[:, None] * c_ 184 | 185 | h = o * tensor.tanh(c) 186 | #TODO: I think this don't apply since is made to avoid sequences smaller tan max_len 187 | #h = m_[:, None] * h + (1. - m_)[:, None] * h_ 188 | 189 | return h, c 190 | 191 | state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) + 192 | tparams[_p(prefix, 'b')]) 193 | 194 | dim_proj = options['dim_proj'] 195 | rval, updates = theano.scan(_step, 196 | sequences=[state_below], 197 | outputs_info=[tensor.alloc(0., n_samples, 198 | dim_proj), 199 | tensor.alloc(0., n_samples, 200 | dim_proj)], 201 | name=_p(prefix, '_layers'), 202 | n_steps=nsteps) 203 | return rval[0] 204 | 205 | 206 | # ff: Feed Forward (normal neural net), only useful to put after lstm 207 | # before the classifier. 208 | layers = {'lstm': (param_init_lstm, lstm_layer)} 209 | 210 | def mom_sgd(lr, tparams, grads, x, y, cost): 211 | """ Momentum Stochastic Gradient Descent 212 | 213 | :note: A more complicated version of sgd then needed. This is 214 | done like that for adadelta and rmsprop. 215 | 216 | """ 217 | 218 | updates = OrderedDict() 219 | 220 | mom = tensor.scalar(name='mom') 221 | gmomshared = [theano.shared(p.get_value(), name='%s_mom_grad' %k) 222 | for k,p in tparams.iteritems()] 223 | 224 | # New set of shared variable that will contain the gradient 225 | # for a mini-batch. 226 | gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) 227 | for k, p in tparams.iteritems()] 228 | gsup = [(gs, g) for gs, g in zip(gshared, grads)] 229 | 230 | # Function that computes gradients for a mini-batch, but do not 231 | # updates the weights. 232 | f_grad_shared = theano.function([x, y], cost, updates=gsup, 233 | name='sgd_f_grad_shared') 234 | 235 | for gm,gp in zip(gmomshared,gshared): 236 | updates[gm] = mom*gm - (1.0 - mom) * lr * gp 237 | #gmomup = [(gm, mom*gm - (1.0 - mom) * lr * gp) for gm,gp in 238 | # zip(gmomshared, gshared)] 239 | 240 | #pup = [(p, p + gm) for p, gm in zip(tparams.values(), gmomup)] 241 | for p,gm in zip(tparams.values(), gmomshared): 242 | updates[p] = p + updates[gm] 243 | 244 | # Function that updates the weights from the previously computed 245 | # gradient. 246 | f_update = theano.function([lr,mom], [], updates=updates, 247 | name='sgd_f_update') 248 | 249 | return f_grad_shared, f_update 250 | 251 | 252 | def sgd(lr, tparams, grads, x, y, cost): 253 | """ Stochastic Gradient Descent 254 | 255 | :note: A more complicated version of sgd then needed. This is 256 | done like that for adadelta and rmsprop. 257 | 258 | """ 259 | # New set of shared variable that will contain the gradient 260 | # for a mini-batch. 261 | gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) 262 | for k, p in tparams.iteritems()] 263 | gsup = [(gs, g) for gs, g in zip(gshared, grads)] 264 | 265 | # Function that computes gradients for a mini-batch, but do not 266 | # updates the weights. 267 | f_grad_shared = theano.function([x, y], cost, updates=gsup, 268 | name='sgd_f_grad_shared') 269 | 270 | pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)] 271 | 272 | # Function that updates the weights from the previously computed 273 | # gradient. 274 | f_update = theano.function([lr], [], updates=pup, 275 | name='sgd_f_update') 276 | 277 | return f_grad_shared, f_update 278 | 279 | 280 | def adadelta(lr, tparams, grads, x, y, cost): 281 | zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), 282 | name='%s_grad' % k) 283 | for k, p in tparams.iteritems()] 284 | running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), 285 | name='%s_rup2' % k) 286 | for k, p in tparams.iteritems()] 287 | running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), 288 | name='%s_rgrad2' % k) 289 | for k, p in tparams.iteritems()] 290 | 291 | zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] 292 | rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) 293 | for rg2, g in zip(running_grads2, grads)] 294 | 295 | f_grad_shared = theano.function([x, y], cost, updates=zgup+rg2up, 296 | name='adadelta_f_grad_shared') 297 | 298 | updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg 299 | for zg, ru2, rg2 in zip(zipped_grads, 300 | running_up2, 301 | running_grads2)] 302 | ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) 303 | for ru2, ud in zip(running_up2, updir)] 304 | param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)] 305 | 306 | f_update = theano.function([lr], [], updates=ru2up+param_up, 307 | on_unused_input='ignore', 308 | name='adadelta_f_update', 309 | mode='DebugMode') 310 | 311 | return f_grad_shared, f_update 312 | 313 | 314 | def rmsprop(lr, tparams, grads, x, y, cost): 315 | zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), 316 | name='%s_grad' % k) 317 | for k, p in tparams.iteritems()] 318 | running_grads = [theano.shared(p.get_value() * numpy.float32(0.), 319 | name='%s_rgrad' % k) 320 | for k, p in tparams.iteritems()] 321 | running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), 322 | name='%s_rgrad2' % k) 323 | for k, p in tparams.iteritems()] 324 | 325 | zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] 326 | rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)] 327 | rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) 328 | for rg2, g in zip(running_grads2, grads)] 329 | 330 | f_grad_shared = theano.function([x, y], cost, 331 | updates=zgup + rgup + rg2up, 332 | name='rmsprop_f_grad_shared') 333 | 334 | updir = [theano.shared(p.get_value() * numpy.float32(0.), 335 | name='%s_updir' % k) 336 | for k, p in tparams.iteritems()] 337 | updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) 338 | for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, 339 | running_grads2)] 340 | param_up = [(p, p + udn[1]) 341 | for p, udn in zip(tparams.values(), updir_new)] 342 | f_update = theano.function([lr], [], updates=updir_new+param_up, 343 | on_unused_input='ignore', 344 | name='rmsprop_f_update') 345 | 346 | return f_grad_shared, f_update 347 | 348 | 349 | def build_model(tparams, options): 350 | trng = RandomStreams(1234) 351 | 352 | # Used for dropout. 353 | use_noise = theano.shared(numpy.float32(0.)) 354 | 355 | x = tensor.tensor3('x', dtype='float32') 356 | #mask = tensor.matrix('mask', dtype='float32') 357 | y = tensor.vector('y', dtype='float32') 358 | 359 | n_timesteps = x.shape[0] 360 | n_samples = x.shape[1] 361 | n_dim = x.shape[2] 362 | 363 | emb = tensor.dot(x,tparams['Wemb']) 364 | #emb = tensor.nnet.sigmoid(emb) 365 | #emb = ReLU(emb) 366 | 367 | if options['use_dropout']: 368 | emb = dropout_layer(emb, use_noise, trng) 369 | 370 | proj = get_layer(options['encoder'])[1](tparams, emb, options, 371 | prefix=options['encoder'] 372 | ) 373 | 374 | 375 | if options['encoder'] == 'lstm' and options['sum_pool'] == True: 376 | proj = proj.sum(axis=0) 377 | proj = proj / options['n_iter'] 378 | else: 379 | proj = proj[-1] 380 | #if options['use_dropout']: 381 | # proj = dropout_layer(proj, use_noise, trng) 382 | 383 | #pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U'])+tparams['b']) 384 | #pred = tensor.nnet.sigmoid(tensor.dot(proj, tparams['U'])\ 385 | # + tparams['b']) 386 | pred = tensor.dot(proj, tparams['U']) + tparams['b'] 387 | 388 | f_pred_prob = theano.function([x], pred, name='f_pred_prob') 389 | #f_pred = theano.function(x, pred.argmax(axis=1), name='f_pred') 390 | 391 | cost = tensor.mean((y-pred.T)**2) 392 | 393 | #cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean() 394 | 395 | return use_noise, x, y, f_pred_prob, cost 396 | 397 | 398 | def pred_probs(f_pred, prepare_data, data, model_options, verbose=False): 399 | """ If you want to use a trained model, this is useful to compute 400 | the probabilities of new examples. 401 | """ 402 | n_samples = len(data) 403 | 404 | x,y = prepare_data(data, numpy.array([]), 405 | model_options['n_iter'], model_options['n_input']) 406 | pred = f_pred(x) 407 | 408 | return pred 409 | 410 | def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False): 411 | """ 412 | Just compute the error 413 | f_pred: Theano fct computing the prediction 414 | prepare_data: usual prepare_data for that dataset. 415 | """ 416 | valid_err = 0 417 | for _, valid_index in iterator: 418 | # TODO: This is not very efficient I should check 419 | x, y = prepare_data([data[0][t] for t in valid_index], 420 | numpy.array(data[1])[valid_index], 421 | model_options['n_iter'],model_options['n_input']) 422 | 423 | 424 | preds = f_pred(x) 425 | targets = numpy.array(data[1])[valid_index] 426 | # or tensor.sum 427 | valid_err += ((targets-preds.T)**2).sum() 428 | #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) 429 | valid_err = numpy_floatX(valid_err) / len(data[0]) 430 | 431 | return valid_err 432 | 433 | 434 | def R_score(f_pred, prepare_data, data, iterator, model_options, verbose=False): 435 | """ 436 | Compute R score 437 | f_pred: Theano fct computing the prediction 438 | prepare_data: usual prepare_data for that dataset. 439 | """ 440 | valid_err = 0 441 | denom = 0 442 | data_mean = numpy.array(data[1]).mean() 443 | for _, valid_index in iterator: 444 | # TODO: This is not very efficient I should check 445 | x, y = prepare_data([data[0][t] for t in valid_index], 446 | numpy.array(data[1])[valid_index], 447 | model_options['n_iter'],model_options['n_input']) 448 | 449 | 450 | preds = f_pred(x) 451 | targets = numpy.array(data[1])[valid_index] 452 | valid_err += tensor.sum((targets-preds.T)**2) 453 | denom += ((numpy.array(data[1]) - data_mean)**2).sum() 454 | #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) 455 | valid_err = 1. - (valid_err / denom) 456 | 457 | return valid_err.eval() 458 | 459 | 460 | def backforecast(f_pred, data, model_options): 461 | """ 462 | Compute the amount of times in which 463 | the RNN correctly predict a up or 464 | down trend 465 | """ 466 | # TODO: Use the prepare data 467 | x, y = prepare_data(data[0], data[1], model_options['n_iter'], 468 | model_options['n_input']) 469 | 470 | targets = (y > x[-1,:,0]) 471 | preds = f_pred(numpy.asarray(x,dtype='float32')) 472 | preds_up = (preds[:,0] > x[-1,:,0]) 473 | err = (targets <> preds_up).sum() 474 | 475 | ret = float(err) / float(len(data[0])) 476 | return ret 477 | 478 | def predict_lstm(input, model_options): 479 | 480 | params = init_params(model_options) 481 | 482 | load_params(model_options['saveto'], params) 483 | 484 | # This create Theano Shared Variable from the parameters. 485 | # Dict name (string) -> Theano Tensor Shared Variable 486 | # params and tparams have different copy of the weights. 487 | tparams = init_tparams(params) 488 | 489 | # use_noise is for dropout 490 | (use_noise, x, 491 | y, f_pred_prob, cost) = build_model(tparams, model_options) 492 | 493 | preds = pred_probs(f_pred_prob, prepare_data, input, model_options) 494 | 495 | return preds 496 | 497 | 498 | def train_lstm( 499 | #dim_proj=32, # word embeding dimension and LSTM number of hidden units. 500 | dim_proj=124, # word embeding dimension and LSTM number of hidden units. 501 | patience=10, # Number of epoch to wait before early stop if no progress 502 | max_epochs=150, # The maximum number of epoch to run 503 | dispFreq=40, # Display to stdout the training progress every N updates 504 | decay_c=0., # Weight decay for the classifier applied to the U weights. 505 | lrate=0.1, # Learning rate for sgd (not used for adadelta and rmsprop) 506 | n_input = 4, # Vocabulary size 507 | optimizer=mom_sgd, # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). 508 | encoder='lstm', # TODO: can be removed must be lstm. 509 | tick='hour', 510 | validFreq=5, # Compute the validation error after this number of update. 511 | saveFreq=5, # Save the parameters after every saveFreq updates 512 | maxlen=100, # Sequence longer then this get ignored 513 | batch_size=50, # The batch size during training. 514 | valid_batch_size=50, # The batch size used for validation/test set. 515 | exchange='AUDJPY', 516 | 517 | # Parameter for extra option 518 | noise_std=0., 519 | use_dropout=False, # if False slightly faster, but worst test error 520 | # This frequently need a bigger model. 521 | reload_model="", # Path to a saved model we want to start from. 522 | sum_pool = False, 523 | mom_start = 0.5, 524 | mom_end = 0.99, 525 | mom_epoch_interval = 60, 526 | learning_rate_decay=0.99995, 527 | #learning_rate_decay=0.98, 528 | predict=False, 529 | input_pred=None 530 | ): 531 | 532 | ''' 533 | Main function for LSTM training 534 | ''' 535 | model_path = "/user/j/jgpavez/rnn_trading/models/" 536 | data_path = "/user/j/jgpavez/rnn_trading/data/" 537 | log_path = "/user/j/jgpavez/rnn_trading/logs/" 538 | 539 | 540 | saveto = exchange + '_model.npz' 541 | params_file = exchange + '_params.npz' 542 | dataset = exchange + '_{0}.csv'.format(tick) 543 | 544 | saveto = os.path.join(model_path, saveto) 545 | params_file = os.path.join(data_path, params_file) 546 | 547 | ydim = 1 548 | #n_iter = 10 549 | n_iter = 24 550 | # Model options 551 | model_options = locals().copy() 552 | 553 | if predict == True: 554 | return predict_lstm(input_pred, model_options) 555 | 556 | 557 | print "model options", model_options 558 | 559 | print 'Loading data' 560 | train, valid, test, mean, std = read_data(max_len=n_iter, path=dataset, params_file=params_file,min=(tick=='minute')) 561 | 562 | #YDIM?? 563 | #number of labels (output) 564 | 565 | theano.config.optimizer = 'None' 566 | 567 | print 'Building model' 568 | # This create the initial parameters as numpy ndarrays. 569 | # Dict name (string) -> numpy ndarray 570 | params = init_params(model_options) 571 | 572 | if reload_model: 573 | load_params(saveto, params) 574 | 575 | # This create Theano Shared Variable from the parameters. 576 | # Dict name (string) -> Theano Tensor Shared Variable 577 | # params and tparams have different copy of the weights. 578 | tparams = init_tparams(params) 579 | 580 | # use_noise is for dropout 581 | (use_noise, x, 582 | y, f_pred_prob, cost) = build_model(tparams, model_options) 583 | 584 | if decay_c > 0.: 585 | decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') 586 | weight_decay = 0. 587 | weight_decay += (tparams['U']**2).sum() 588 | weight_decay *= decay_c 589 | cost += weight_decay 590 | 591 | f_cost = theano.function([x, y], cost, name='f_cost') 592 | 593 | grads = tensor.grad(cost, wrt=tparams.values()) 594 | f_grad = theano.function([x, y], grads, name='f_grad') 595 | 596 | lr = tensor.scalar(name='lr') 597 | f_grad_shared, f_update = optimizer(lr, tparams, grads, 598 | x, y, cost) 599 | 600 | print 'Optimization' 601 | 602 | 603 | kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size, 604 | shuffle=True) 605 | kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, 606 | shuffle=True) 607 | 608 | print "%d train examples" % len(train[0]) 609 | print "%d valid examples" % len(valid[0]) 610 | print "%d test examples" % len(test[0]) 611 | history_errs = [] 612 | best_p = None 613 | bad_count = 0 614 | 615 | if validFreq == -1: 616 | validFreq = len(train[0])/batch_size 617 | if saveFreq == -1: 618 | saveFreq = len(train[0])/batch_size 619 | 620 | uidx = 0 # the number of update done 621 | estop = False # early stop 622 | start_time = time.clock() 623 | mom = 0 624 | 625 | try: 626 | for eidx in xrange(max_epochs): 627 | n_samples = 0 628 | 629 | # Get new shuffled index for the training set. 630 | kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) 631 | 632 | if eidx < model_options['mom_epoch_interval']: 633 | mom = model_options['mom_start']*\ 634 | (1.0 - eidx/model_options['mom_epoch_interval'])\ 635 | + mom_end*(eidx/model_options['mom_epoch_interval']) 636 | else: 637 | mom = mom_end 638 | 639 | for _, train_index in kf: 640 | uidx += 1 641 | use_noise.set_value(1.) 642 | 643 | # Select the random examples for this minibatch 644 | y = [train[1][t] for t in train_index] 645 | x = [train[0][t]for t in train_index] 646 | 647 | # Get the data in numpy.ndarray formet. 648 | # It return something of the shape (minibatch maxlen, n samples) 649 | x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input']) 650 | 651 | if x is None: 652 | print 'Minibatch with zero sample under length ', maxlen 653 | continue 654 | n_samples += x.shape[1] 655 | cost = f_grad_shared(x, y) 656 | f_update(lrate,mom) 657 | 658 | #decay 659 | #TODO: CHECK THIS LEARNING RATE 660 | #lrate = learning_rate_decay*lrate 661 | 662 | if numpy.isnan(cost) or numpy.isinf(cost): 663 | print 'NaN detected' 664 | return 1., 1., 1. 665 | 666 | if numpy.mod(uidx, dispFreq) == 0: 667 | with open(log_path + 'log_{0}_{0}.log'.format(dim_proj, n_iter), 'a') as log_file: 668 | log_file.write('Epoch {0} Update {1} Cost {2}\n'.format(eidx, uidx, cost)) 669 | 670 | #decay 671 | #TODO: CHECK THIS LEARNING RATE 672 | lrate = learning_rate_decay*lrate 673 | if numpy.mod(eidx, validFreq) == 0: 674 | use_noise.set_value(0.) 675 | #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) 676 | valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) 677 | test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) 678 | #bckfr_err = backforecast(f_pred_prob, test, model_options) 679 | #r2_score = R_score(f_pred_prob, prepare_data, test, kf_test, model_options) 680 | bckfr_err = 0. 681 | r2_score = 0. 682 | 683 | #history_errs.append([valid_err, test_err]) 684 | history_errs.append([valid_err, bckfr_err]) 685 | 686 | if (eidx == 0 or 687 | test_err <= numpy.array(history_errs)[:, 688 | 1].min()): 689 | #bckfr_err <= numpy.array(history_errs)[:, 690 | # 1].min()): 691 | 692 | best_p = unzip(tparams) 693 | bad_counter = 0 694 | 695 | with open(log_path + 'log_{0}_{0}.log'.format(dim_proj, n_iter), 'a') as log_file: 696 | log_file.write('Valid {0} Test {1}\n'.format(valid_err,test_err)) 697 | print('Valid',valid_err, 698 | 'Test ', test_err, 699 | 'Backfore ', bckfr_err, 700 | 'R2 score ', r2_score) 701 | 702 | if (len(history_errs) > patience and 703 | valid_err >= numpy.array(history_errs)[:-patience, 704 | 0].min()): 705 | bad_counter += 1 706 | if bad_counter > patience: 707 | print 'Early Stop!' 708 | estop = True 709 | break 710 | 711 | 712 | if numpy.mod(eidx, saveFreq) == 0: 713 | print 'Saving...', 714 | 715 | if best_p is not None: 716 | params = best_p 717 | else: 718 | params = unzip(tparams) 719 | numpy.savez(saveto, history_errs=history_errs, **params) 720 | pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) 721 | print 'Done' 722 | 723 | 724 | print 'Seen %d samples' % n_samples 725 | 726 | if estop: 727 | break 728 | 729 | except KeyboardInterrupt: 730 | print "Training interupted" 731 | 732 | end_time = time.clock() 733 | if best_p is not None: 734 | zipp(best_p, tparams) 735 | else: 736 | best_p = unzip(tparams) 737 | 738 | use_noise.set_value(0.) 739 | #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) 740 | valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) 741 | test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) 742 | #bckfr_err = backforecast(f_pred_prob, test, model_options) 743 | #r2_score = R_score(f_pred_prob, prepare_data, test, kf_test, model_options) 744 | r2_score= 0. 745 | bckfr_err = 0. 746 | 747 | print 'Valid ', valid_err, 'Test ', test_err, 'Backforecasting ', bckfr_err, ' R2 score: ', r2_score 748 | 749 | numpy.savez(saveto, train_err=train_err, 750 | valid_err=valid_err, test_err=test_err, 751 | history_errs=history_errs, **best_p) 752 | print 'The code run for %d epochs, with %f sec/epochs' % ( 753 | (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) 754 | print >> sys.stderr, ('Training took %.1fs' % 755 | (end_time - start_time)) 756 | return train_err, valid_err, test_err 757 | 758 | 759 | if __name__ == '__main__': 760 | 761 | # We must have floatX=float32 for this tutorial to work correctly. 762 | theano.config.floatX = "float32" 763 | # The next line is the new Theano default. This is a speed up. 764 | #theano.config.scan.allow_gc = False 765 | exchange = 'AUDJPY' 766 | tick = 'hour' 767 | if len(sys.argv) >= 2: 768 | exchange = sys.argv[1] 769 | if len(sys.argv) >= 3: 770 | tick = sys.argv[2] 771 | 772 | # See function train for all possible parameter and there definition. 773 | train_lstm( 774 | #reload_model="lstm_model.npz", 775 | exchange=exchange, 776 | max_epochs=20, 777 | #max_epochs=162, 778 | tick=tick 779 | ) 780 | 781 | -------------------------------------------------------------------------------- /lstm_ts_2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | LSTM RNN for stock predictions 3 | Based on sentiment analysis lstm found in deeplearning tutorials 4 | ''' 5 | from collections import OrderedDict 6 | import copy 7 | import cPickle as pkl 8 | import random 9 | import sys 10 | import time 11 | import pdb 12 | 13 | import numpy 14 | import theano 15 | import theano.tensor as tensor 16 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 17 | from theano.ifelse import ifelse 18 | 19 | 20 | from quant import read_data, prepare_data 21 | 22 | #### rectified linear unit 23 | def ReLU(x): 24 | y = tensor.maximum(0.0, x) 25 | return(y) 26 | 27 | def get_minibatches_idx(n, minibatch_size, shuffle=False): 28 | """ 29 | Used to shuffle the dataset at each iteration. 30 | """ 31 | 32 | idx_list = numpy.arange(n, dtype="int32") 33 | 34 | if shuffle: 35 | random.shuffle(idx_list) 36 | 37 | minibatches = [] 38 | minibatch_start = 0 39 | for i in range(n // minibatch_size): 40 | minibatches.append(idx_list[minibatch_start: 41 | minibatch_start + minibatch_size]) 42 | minibatch_start += minibatch_size 43 | 44 | if (minibatch_start != n): 45 | # Make a minibatch out of what is left 46 | minibatches.append(idx_list[minibatch_start:]) 47 | 48 | return zip(range(len(minibatches)), minibatches) 49 | 50 | def zipp(params, tparams): 51 | """ 52 | When we reload the model. Needed for the GPU stuff. 53 | """ 54 | for kk, vv in params.iteritems(): 55 | tparams[kk].set_value(vv) 56 | 57 | 58 | def unzip(zipped): 59 | """ 60 | When we pickle the model. Needed for the GPU stuff. 61 | """ 62 | new_params = OrderedDict() 63 | for kk, vv in zipped.iteritems(): 64 | new_params[kk] = vv.get_value() 65 | return new_params 66 | 67 | 68 | def dropout_layer(state_before, use_noise, trng): 69 | proj = tensor.switch(use_noise, 70 | (state_before * 71 | trng.binomial(state_before.shape, 72 | p=0.5, n=1, 73 | dtype=state_before.dtype)), 74 | state_before * 0.5) 75 | return proj 76 | 77 | 78 | def _p(pp, name): 79 | return '%s_%s' % (pp, name) 80 | 81 | 82 | def init_params(options): 83 | """ 84 | Global (not LSTM) parameter. For the embeding and the classifier. 85 | """ 86 | params = OrderedDict() 87 | # embedding 88 | randn = numpy.random.rand(options['n_input'], 89 | options['dim_proj']) 90 | params['Wemb'] = (0.01 * randn).astype('float32') 91 | params = get_layer(options['encoder'])[0](options, 92 | params, 93 | prefix=options['encoder']) 94 | # classifier 95 | params['U'] = 0.01 * numpy.random.randn(options['dim_proj'], 96 | options['ydim']).astype('float32') 97 | params['b'] = numpy.zeros((options['ydim'],)).astype('float32') 98 | 99 | return params 100 | 101 | 102 | def load_params(path, params): 103 | pp = numpy.load(path) 104 | for kk, vv in params.iteritems(): 105 | if kk not in pp: 106 | raise Warning('%s is not in the archive' % kk) 107 | params[kk] = pp[kk] 108 | 109 | return params 110 | 111 | 112 | def init_tparams(params): 113 | tparams = OrderedDict() 114 | for kk, pp in params.iteritems(): 115 | tparams[kk] = theano.shared(params[kk], name=kk) 116 | return tparams 117 | 118 | 119 | def get_layer(name): 120 | fns = layers[name] 121 | return fns 122 | 123 | 124 | def ortho_weight(ndim): 125 | W = numpy.random.randn(ndim, ndim) 126 | u, s, v = numpy.linalg.svd(W) 127 | return u.astype('float32') 128 | 129 | 130 | def param_init_lstm(options, params, prefix='lstm'): 131 | """ 132 | Init the LSTM parameter: 133 | 134 | :see: init_params 135 | """ 136 | W = numpy.concatenate([ortho_weight(options['dim_proj']), 137 | ortho_weight(options['dim_proj']), 138 | ortho_weight(options['dim_proj']), 139 | ortho_weight(options['dim_proj'])], axis=1) 140 | params[_p(prefix, 'W')] = W.astype('float32') 141 | U = numpy.concatenate([ortho_weight(options['dim_proj']), 142 | ortho_weight(options['dim_proj']), 143 | ortho_weight(options['dim_proj']), 144 | ortho_weight(options['dim_proj'])], axis=1) 145 | params[_p(prefix, 'U')] = U.astype('float32') 146 | b = numpy.zeros((4 * options['dim_proj'],)) 147 | params[_p(prefix, 'b')] = b.astype('float32') 148 | 149 | return params 150 | 151 | 152 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None): 153 | nsteps = state_below.shape[0] 154 | if state_below.ndim == 3: 155 | n_samples = state_below.shape[1] 156 | else: 157 | n_samples = 1 158 | 159 | #assert mask is not None 160 | 161 | def _slice(_x, n, dim): 162 | if _x.ndim == 3: 163 | return _x[:, :, n*dim:(n+1)*dim] 164 | return _x[:, n*dim:(n+1)*dim] 165 | 166 | def _step(x_, h_, c_): 167 | preact = tensor.dot(h_, tparams[_p(prefix, 'U')]) 168 | preact += x_ 169 | preact += tparams[_p(prefix, 'b')] 170 | 171 | i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj'])) 172 | f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj'])) 173 | o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj'])) 174 | c = tensor.tanh(_slice(preact, 3, options['dim_proj'])) 175 | 176 | c = f * c_ + i * c 177 | #c = m_[:, None] * c + (1. - m_)[:, None] * c_ 178 | 179 | h = o * tensor.tanh(c) 180 | #TODO: I think this don't apply since is made to avoid sequences smaller tan max_len 181 | #h = m_[:, None] * h + (1. - m_)[:, None] * h_ 182 | 183 | return h, c 184 | 185 | state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) + 186 | tparams[_p(prefix, 'b')]) 187 | 188 | dim_proj = options['dim_proj'] 189 | rval, updates = theano.scan(_step, 190 | sequences=[state_below], 191 | outputs_info=[tensor.alloc(0., n_samples, 192 | dim_proj), 193 | tensor.alloc(0., n_samples, 194 | dim_proj)], 195 | name=_p(prefix, '_layers'), 196 | n_steps=nsteps) 197 | return rval[0] 198 | 199 | 200 | # ff: Feed Forward (normal neural net), only useful to put after lstm 201 | # before the classifier. 202 | layers = {'lstm': (param_init_lstm, lstm_layer)} 203 | 204 | def mom_sgd(lr, tparams, grads, x, y, cost): 205 | """ Stochastic Gradient Descent 206 | 207 | :note: A more complicated version of sgd then needed. This is 208 | done like that for adadelta and rmsprop. 209 | 210 | """ 211 | 212 | updates = OrderedDict() 213 | 214 | mom = tensor.scalar(name='mom') 215 | gmomshared = [theano.shared(p.get_value(), name='%s_mom_grad' %k) 216 | for k,p in tparams.iteritems()] 217 | 218 | # New set of shared variable that will contain the gradient 219 | # for a mini-batch. 220 | gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) 221 | for k, p in tparams.iteritems()] 222 | gsup = [(gs, g) for gs, g in zip(gshared, grads)] 223 | 224 | # Function that computes gradients for a mini-batch, but do not 225 | # updates the weights. 226 | f_grad_shared = theano.function([x, y], cost, updates=gsup, 227 | name='sgd_f_grad_shared') 228 | 229 | for gm,gp in zip(gmomshared,gshared): 230 | updates[gm] = mom*gm - (1.0 - mom) * lr * gp 231 | #gmomup = [(gm, mom*gm - (1.0 - mom) * lr * gp) for gm,gp in 232 | # zip(gmomshared, gshared)] 233 | 234 | #pup = [(p, p + gm) for p, gm in zip(tparams.values(), gmomup)] 235 | for p,gm in zip(tparams.values(), gmomshared): 236 | updates[p] = p + updates[gm] 237 | 238 | # Function that updates the weights from the previously computed 239 | # gradient. 240 | f_update = theano.function([lr,mom], [], updates=updates, 241 | name='sgd_f_update') 242 | 243 | return f_grad_shared, f_update 244 | 245 | 246 | def sgd(lr, tparams, grads, x, y, cost): 247 | """ Stochastic Gradient Descent 248 | 249 | :note: A more complicated version of sgd then needed. This is 250 | done like that for adadelta and rmsprop. 251 | 252 | """ 253 | # New set of shared variable that will contain the gradient 254 | # for a mini-batch. 255 | gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) 256 | for k, p in tparams.iteritems()] 257 | gsup = [(gs, g) for gs, g in zip(gshared, grads)] 258 | 259 | # Function that computes gradients for a mini-batch, but do not 260 | # updates the weights. 261 | f_grad_shared = theano.function([x, y], cost, updates=gsup, 262 | name='sgd_f_grad_shared') 263 | 264 | pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)] 265 | 266 | # Function that updates the weights from the previously computed 267 | # gradient. 268 | f_update = theano.function([lr], [], updates=pup, 269 | name='sgd_f_update') 270 | 271 | return f_grad_shared, f_update 272 | 273 | 274 | def adadelta(lr, tparams, grads, x, y, cost): 275 | zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), 276 | name='%s_grad' % k) 277 | for k, p in tparams.iteritems()] 278 | running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), 279 | name='%s_rup2' % k) 280 | for k, p in tparams.iteritems()] 281 | running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), 282 | name='%s_rgrad2' % k) 283 | for k, p in tparams.iteritems()] 284 | 285 | zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] 286 | rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) 287 | for rg2, g in zip(running_grads2, grads)] 288 | 289 | f_grad_shared = theano.function([x, y], cost, updates=zgup+rg2up, 290 | name='adadelta_f_grad_shared') 291 | 292 | updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg 293 | for zg, ru2, rg2 in zip(zipped_grads, 294 | running_up2, 295 | running_grads2)] 296 | ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) 297 | for ru2, ud in zip(running_up2, updir)] 298 | param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)] 299 | 300 | f_update = theano.function([lr], [], updates=ru2up+param_up, 301 | on_unused_input='ignore', 302 | name='adadelta_f_update', 303 | mode='DebugMode') 304 | 305 | return f_grad_shared, f_update 306 | 307 | 308 | def rmsprop(lr, tparams, grads, x, y, cost): 309 | zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), 310 | name='%s_grad' % k) 311 | for k, p in tparams.iteritems()] 312 | running_grads = [theano.shared(p.get_value() * numpy.float32(0.), 313 | name='%s_rgrad' % k) 314 | for k, p in tparams.iteritems()] 315 | running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), 316 | name='%s_rgrad2' % k) 317 | for k, p in tparams.iteritems()] 318 | 319 | zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] 320 | rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)] 321 | rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) 322 | for rg2, g in zip(running_grads2, grads)] 323 | 324 | f_grad_shared = theano.function([x, y], cost, 325 | updates=zgup + rgup + rg2up, 326 | name='rmsprop_f_grad_shared') 327 | 328 | updir = [theano.shared(p.get_value() * numpy.float32(0.), 329 | name='%s_updir' % k) 330 | for k, p in tparams.iteritems()] 331 | updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) 332 | for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, 333 | running_grads2)] 334 | param_up = [(p, p + udn[1]) 335 | for p, udn in zip(tparams.values(), updir_new)] 336 | f_update = theano.function([lr], [], updates=updir_new+param_up, 337 | on_unused_input='ignore', 338 | name='rmsprop_f_update') 339 | 340 | return f_grad_shared, f_update 341 | 342 | 343 | def build_model(tparams, options): 344 | trng = RandomStreams(1234) 345 | 346 | # Used for dropout. 347 | use_noise = theano.shared(numpy.float32(0.)) 348 | 349 | x = tensor.tensor3('x', dtype='float32') 350 | #mask = tensor.matrix('mask', dtype='float32') 351 | y = tensor.vector('y', dtype='int64') 352 | 353 | n_timesteps = x.shape[0] 354 | n_samples = x.shape[1] 355 | n_dim = x.shape[2] 356 | 357 | emb = tensor.dot(x,tparams['Wemb']) 358 | #emb = tensor.nnet.sigmoid(emb) 359 | #emb = ReLU(emb) 360 | 361 | if options['use_dropout']: 362 | emb = dropout_layer(emb, use_noise, trng) 363 | 364 | proj = get_layer(options['encoder'])[1](tparams, emb, options, 365 | prefix=options['encoder'] 366 | ) 367 | 368 | 369 | if options['encoder'] == 'lstm' and options['sum_pool'] == True: 370 | proj = proj.sum(axis=0) 371 | proj = proj / options['n_iter'] 372 | else: 373 | proj = proj[-1] 374 | #if options['use_dropout']: 375 | # proj = dropout_layer(proj, use_noise, trng) 376 | 377 | #pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U'])+tparams['b']) 378 | #pred = tensor.nnet.sigmoid(tensor.dot(proj, tparams['U'])\ 379 | # + tparams['b']) 380 | pred = tensor.dot(proj, tparams['U']) + tparams['b'] 381 | pred = tensor.nnet.softmax(pred) 382 | 383 | f_pred_prob = theano.function([x], pred, name='f_pred_prob') 384 | #f_pred = theano.function(x, pred.argmax(axis=1), name='f_pred') 385 | 386 | #cost = tensor.mean((y-pred.T)**2) 387 | 388 | cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean() 389 | #cost = tensor.mean(tensor.nnet.binary_crossentropy(pred.T, y)) 390 | 391 | 392 | return use_noise, x, y, f_pred_prob, cost 393 | 394 | 395 | def pred_probs(f_pred_prob, prepare_data, data, iterator, model_options, verbose=False): 396 | """ If you want to use a trained model, this is useful to compute 397 | the probabilities of new examples. 398 | """ 399 | n_samples = len(data[0]) 400 | probs = numpy.zeros((n_samples, 2)).astype('float32') 401 | 402 | n_done = 0 403 | 404 | for _, valid_index in iterator: 405 | x, y = prepare_data([data[0][t] for t in valid_index], 406 | numpy.array(data[1])[valid_index], 407 | model_options['n_iter'],model_options['n_input'],up=True) 408 | pred_probs = f_pred_prob(x) 409 | probs[valid_index, :] = pred_probs 410 | 411 | n_done += len(valid_index) 412 | if verbose: 413 | print '%d/%d samples classified' % (n_done, n_samples) 414 | 415 | return probs 416 | 417 | 418 | def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False): 419 | """ 420 | Just compute the error 421 | f_pred: Theano fct computing the prediction 422 | prepare_data: usual prepare_data for that dataset. 423 | """ 424 | valid_err = 0 425 | for _, valid_index in iterator: 426 | # TODO: This is not very efficient I should check 427 | x, y = prepare_data([data[0][t] for t in valid_index], 428 | numpy.array(data[1])[valid_index], 429 | model_options['n_iter'],model_options['n_input'],up=True) 430 | 431 | 432 | preds_prob = f_pred(x) 433 | preds = preds_prob.argmax(axis=1) 434 | targets = numpy.array(data[1])[valid_index] 435 | valid_err += tensor.sum(tensor.neq(targets,preds)) 436 | #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) 437 | valid_err = float(valid_err.eval()) 438 | return valid_err / float(len(data[0])) 439 | 440 | 441 | 442 | def train_lstm( 443 | dim_proj=32, # word embeding dimension and LSTM number of hidden units. 444 | patience=10, # Number of epoch to wait before early stop if no progress 445 | max_epochs=150, # The maximum number of epoch to run 446 | dispFreq=10, # Display to stdout the training progress every N updates 447 | decay_c=0., # Weight decay for the classifier applied to the U weights. 448 | lrate=0.1, # Learning rate for sgd (not used for adadelta and rmsprop) 449 | n_input = 4, # Vocabulary size 450 | optimizer=mom_sgd, # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). 451 | encoder='lstm', # TODO: can be removed must be lstm. 452 | saveto='lstm_model.npz', # The best model will be saved there 453 | validFreq=170, # Compute the validation error after this number of update. 454 | saveFreq=1110, # Save the parameters after every saveFreq updates 455 | maxlen=100, # Sequence longer then this get ignored 456 | batch_size=16, # The batch size during training. 457 | valid_batch_size=64, # The batch size used for validation/test set. 458 | dataset='imdb', 459 | 460 | # Parameter for extra option 461 | noise_std=0., 462 | use_dropout=False, # if False slightly faster, but worst test error 463 | # This frequently need a bigger model. 464 | reload_model="", # Path to a saved model we want to start from. 465 | sum_pool = False, 466 | mom_start = 0.5, 467 | mom_end = 0.99, 468 | mom_epoch_interval = 300, 469 | learning_rate_decay=0.99995 470 | 471 | ): 472 | 473 | # Model options 474 | model_options = locals().copy() 475 | print "model options", model_options 476 | 477 | print 'Loading data' 478 | ydim = 2 479 | n_iter = 10 480 | 481 | train, valid, test, mean, std = read_data(max_len=n_iter,up=True) 482 | 483 | #YDIM?? 484 | #number of labels (output) 485 | 486 | model_options['ydim'] = ydim 487 | model_options['n_iter'] = n_iter 488 | 489 | theano.config.optimizer = 'None' 490 | 491 | print 'Building model' 492 | # This create the initial parameters as numpy ndarrays. 493 | # Dict name (string) -> numpy ndarray 494 | params = init_params(model_options) 495 | 496 | if reload_model: 497 | load_params('lstm_model.npz', params) 498 | 499 | # This create Theano Shared Variable from the parameters. 500 | # Dict name (string) -> Theano Tensor Shared Variable 501 | # params and tparams have different copy of the weights. 502 | tparams = init_tparams(params) 503 | 504 | # use_noise is for dropout 505 | (use_noise, x, 506 | y, f_pred_prob, cost) = build_model(tparams, model_options) 507 | 508 | if decay_c > 0.: 509 | decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') 510 | weight_decay = 0. 511 | weight_decay += (tparams['U']**2).sum() 512 | weight_decay *= decay_c 513 | cost += weight_decay 514 | 515 | f_cost = theano.function([x, y], cost, name='f_cost') 516 | 517 | grads = tensor.grad(cost, wrt=tparams.values()) 518 | f_grad = theano.function([x, y], grads, name='f_grad') 519 | 520 | lr = tensor.scalar(name='lr') 521 | f_grad_shared, f_update = optimizer(lr, tparams, grads, 522 | x, y, cost) 523 | 524 | print 'Optimization' 525 | 526 | 527 | kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size, 528 | shuffle=True) 529 | kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, 530 | shuffle=True) 531 | 532 | print "%d train examples" % len(train[0]) 533 | print "%d valid examples" % len(valid[0]) 534 | print "%d test examples" % len(test[0]) 535 | history_errs = [] 536 | best_p = None 537 | bad_count = 0 538 | 539 | if validFreq == -1: 540 | validFreq = len(train[0])/batch_size 541 | if saveFreq == -1: 542 | saveFreq = len(train[0])/batch_size 543 | 544 | uidx = 0 # the number of update done 545 | estop = False # early stop 546 | start_time = time.clock() 547 | mom = 0 548 | 549 | try: 550 | for eidx in xrange(max_epochs): 551 | n_samples = 0 552 | 553 | # Get new shuffled index for the training set. 554 | kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) 555 | 556 | for _, train_index in kf: 557 | uidx += 1 558 | use_noise.set_value(1.) 559 | 560 | # Select the random examples for this minibatch 561 | y = [train[1][t] for t in train_index] 562 | x = [train[0][t]for t in train_index] 563 | 564 | # Get the data in numpy.ndarray formet. 565 | # It return something of the shape (minibatch maxlen, n samples) 566 | x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input'],up=True) 567 | 568 | if x is None: 569 | print 'Minibatch with zero sample under length ', maxlen 570 | continue 571 | n_samples += x.shape[1] 572 | if eidx < model_options['mom_epoch_interval']: 573 | mom = model_options['mom_start']*\ 574 | (1.0 - eidx/model_options['mom_epoch_interval'])\ 575 | + mom_end*(eidx/model_options['mom_epoch_interval']) 576 | else: 577 | mom = mom_end 578 | 579 | cost = f_grad_shared(x, y) 580 | f_update(lrate,mom) 581 | 582 | #decay 583 | lrate = learning_rate_decay*lrate 584 | 585 | if numpy.isnan(cost) or numpy.isinf(cost): 586 | print 'NaN detected' 587 | return 1., 1., 1. 588 | 589 | if numpy.mod(uidx, dispFreq) == 0: 590 | print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost 591 | 592 | if numpy.mod(uidx, saveFreq) == 0: 593 | print 'Saving...', 594 | 595 | if best_p is not None: 596 | params = best_p 597 | else: 598 | params = unzip(tparams) 599 | numpy.savez(saveto, history_errs=history_errs, **params) 600 | pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) 601 | print 'Done' 602 | 603 | if numpy.mod(uidx, validFreq) == 0: 604 | use_noise.set_value(0.) 605 | #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) 606 | valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) 607 | test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) 608 | 609 | 610 | history_errs.append([valid_err, test_err]) 611 | 612 | if (uidx == 0 or 613 | valid_err <= numpy.array(history_errs)[:, 614 | 0].min()): 615 | 616 | best_p = unzip(tparams) 617 | bad_counter = 0 618 | 619 | print ('Valid ', valid_err, 620 | 'Test ', test_err) 621 | 622 | if (len(history_errs) > patience and 623 | valid_err >= numpy.array(history_errs)[:-patience, 624 | 0].min()): 625 | bad_counter += 1 626 | if bad_counter > patience: 627 | print 'Early Stop!' 628 | estop = True 629 | break 630 | 631 | print 'Seen %d samples' % n_samples 632 | 633 | if estop: 634 | break 635 | 636 | except KeyboardInterrupt: 637 | print "Training interupted" 638 | 639 | end_time = time.clock() 640 | if best_p is not None: 641 | zipp(best_p, tparams) 642 | else: 643 | best_p = unzip(tparams) 644 | 645 | use_noise.set_value(0.) 646 | train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) 647 | valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) 648 | test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) 649 | 650 | print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err 651 | 652 | numpy.savez(saveto, train_err=train_err, 653 | valid_err=valid_err, test_err=test_err, 654 | history_errs=history_errs, **best_p) 655 | print 'The code run for %d epochs, with %f sec/epochs' % ( 656 | (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) 657 | print >> sys.stderr, ('Training took %.1fs' % 658 | (end_time - start_time)) 659 | return train_err, valid_err, test_err 660 | 661 | 662 | if __name__ == '__main__': 663 | 664 | # We must have floatX=float32 for this tutorial to work correctly. 665 | theano.config.floatX = "float32" 666 | # The next line is the new Theano default. This is a speed up. 667 | #theano.config.scan.allow_gc = False 668 | 669 | # See function train for all possible parameter and there definition. 670 | train_lstm( 671 | #reload_model="lstm_model.npz", 672 | max_epochs=150, 673 | ) 674 | 675 | -------------------------------------------------------------------------------- /quant.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | import sys 4 | 5 | import numpy 6 | 7 | import theano 8 | from numpy import genfromtxt 9 | from pandas import Series 10 | 11 | #import matplotlib.pyplot as plt 12 | import sklearn.cross_validation as cv 13 | from sklearn import preprocessing 14 | 15 | def data_preprocessing(data): 16 | data = data[350:,:] 17 | # Standarization 18 | 19 | # Compute compound return serie 20 | #data = numpy.log(data / numpy.roll(data, 1, axis = 0)) 21 | #data = data[1:,:] 22 | #data = data / numpy.roll(data,1,axis=0) 23 | #data = data[1:,:] - 1. 24 | 25 | #print 26 | #ts = Series(numpy.ravel(data)) 27 | #ts.plot() 28 | #plt.show() 29 | 30 | mean = data.mean(axis=0) 31 | std = data.std(axis=0) 32 | 33 | data = data - mean 34 | data = data/std 35 | #Some kind of smoothing?? 36 | 37 | #min_max = preprocessing.MinMaxScaler() 38 | #data = min_max.fit_transform(data) 39 | 40 | #Put between 1 and 0 41 | return data,mean,std 42 | 43 | def read_data(path="table_a.csv", dir="/user/j/jgpavez/rnn_trading/data/", 44 | max_len=30, valid_portion=0.1, columns=4, up=False ): 45 | path = os.path.join(dir, path) 46 | 47 | data = genfromtxt(path, delimiter=',') 48 | 49 | data = data[:,2:(2+columns)] 50 | 51 | data,mean,std = data_preprocessing(data) 52 | 53 | x_data = numpy.array([data[i:i+max_len,:] for i in xrange(len(data)-max_len)]) 54 | y_data = numpy.array([data[i][0] for i in xrange(max_len , len(data))]) 55 | 56 | if up is True: 57 | y_data = y_data > x_data[:,-1,0] 58 | y_data = numpy.asarray(y_data, dtype='int64') 59 | 60 | # split data into training and test 61 | train_set_x, test_set_x, train_set_y, test_set_y = cv.train_test_split(x_data, 62 | y_data, test_size=0.3, random_state=0) 63 | 64 | # split training set into validation set 65 | n_samples = len(train_set_x) 66 | sidx = numpy.random.permutation(n_samples) 67 | n_train = int(numpy.round(n_samples * (1. - valid_portion))) 68 | valid_set_x = [train_set_x[s] for s in sidx[n_train:]] 69 | valid_set_y = [train_set_y[s] for s in sidx[n_train:]] 70 | train_set_x = [train_set_x[s] for s in sidx[:n_train]] 71 | train_set_y = [train_set_y[s] for s in sidx[:n_train]] 72 | 73 | train = (train_set_x, train_set_y) 74 | valid = (valid_set_x, valid_set_y) 75 | test = (test_set_x, test_set_y) 76 | 77 | return train, valid, test, mean, std 78 | 79 | def prepare_data(seqs, labels, steps, x_dim, up=False): 80 | n_samples = len(seqs) 81 | max_len = steps 82 | x = numpy.zeros((max_len, n_samples, x_dim)).astype('float32') 83 | if up is True: 84 | y = numpy.asarray(labels, dtype='int64') 85 | else: 86 | y = numpy.asarray(labels, dtype='float32') 87 | 88 | for idx, s in enumerate(seqs): 89 | x[:,idx,:] = s 90 | 91 | return x, y 92 | 93 | -------------------------------------------------------------------------------- /svr_ts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | import sys 4 | 5 | import numpy 6 | from sklearn import svm 7 | from quant import read_data 8 | 9 | def train_svr(dataset=''): 10 | train, valid, test, mean, std = read_data(columns=1, max_len=10) 11 | x_train = [[x[0] for x in row] for row in train[0]] 12 | x_test = [[x[0] for x in row] for row in test[0]] 13 | 14 | 15 | svr = svm.SVR() 16 | svr.fit(x_train,train[1]) 17 | 18 | pred = svr.predict(x_test) 19 | y = numpy.asarray(test[1], dtype='float32') 20 | pred = numpy.asarray(pred, dtype='float32') 21 | 22 | #y = y*std + mean 23 | #pred = pred*std + mean 24 | mean_y = y.mean() 25 | ssr = ((y - pred)**2).sum() 26 | sst = ((y - mean_y)**2).sum() 27 | 28 | r2 = 1. - (ssr/sst) 29 | cost = ((y-pred)**2).mean() 30 | print 'Cost on Test sample, size: %d, cost: %f, R score: %f'%(len(x_test),cost,r2) 31 | 32 | 33 | if __name__ == '__main__': 34 | train_svr(dataset='table_a.csv') 35 | --------------------------------------------------------------------------------