├── PYTHON
    ├── downloader.py
    ├── my_regression.py
    ├── portfolio.py
    └── regression.py
├── README.md
└── matlab
    ├── est_A.m
    ├── labor.m
    ├── maxeig.m
    └── mr_opt_w.m


/PYTHON/downloader.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import Quandl
 3 | import datetime as dt
 4 | import cPickle
 5 | import pandas
 6 | from numpy import cumsum, log, polyfit, sqrt, std, subtract, insert
 7 | import numpy as np
 8 | 
 9 | def hurst(ts):
10 | 	# Set the range of lag
11 | 	lags = range(2, 100)
12 | 	# Calculate the the variances
13 | 	tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]
14 | 	# Estimate the Hurst Exponent
15 | 	poly = polyfit(log(lags), log(tau), 1)
16 | 	# Return the Hurst exponent from the polyfit output
17 | 	return poly[0]*2.0
18 | 
19 | instruments = ['EURUSD', 'JPYUSD', 'GBPUSD', 'AUDUSD', 'CHFUSD', 'CADUSD', 'HKDUSD', 'SEKUSD', 'NZDUSD', 'KRWUSD', 'SGDUSD', 'NOKUSD', 'MXNUSD', 'INRUSD', 'JPYEUR', 'GBPEUR', 'AUDEUR', 'CHFEUR', 'CADEUR', 'HKDEUR', 'SEKEUR', 'NZDEUR', 'KRWEUR', 'SGDEUR', 'NOKEUR', 'MXNEUR', 'INREUR', 'GBPJPY', 'AUDJPY', 'CHFJPY', 'CADJPY', 'SEKJPY', 'NZDJPY', 'NOKJPY', 'INRJPY', 'AUDGBP', 'CHFGBP', 'CADGBP', 'HKDGBP', 'SEKGBP', 'NZDGBP', 'KRWGBP', 'SGDGBP', 'NOKGBP', 'MXNGBP', 'INRGBP', 'CHFAUD', 'CADAUD', 'HKDAUD', 'SEKAUD', 'NZDAUD', 'KRWAUD', 'SGDAUD', 'NOKAUD', 'MXNAUD', 'INRAUD', 'CADCHF', 'SEKCHF', 'NZDCHF', 'NOKCHF', 'INRCHF', 'SEKCAD', 'NZDCAD', 'NOKCAD', 'INRCAD', 'INRHKD', 'NZDSEK', 'NOKSEK', 'INRSEK', 'NOKNZD', 'INRNZD', 'INRKRW', 'INRSGD', 'INRNOK', 'INRMXN']
20 | 
21 | start=dt.datetime(1990,1,1)
22 | end=dt.datetime(2010,12,31)
23 | data=pandas.DataFrame()
24 | 
25 | print 'Download started...'
26 | number_of_assets = 0
27 | for symbol in instruments:
28 |   fx = Quandl.get("CURRFX/"+symbol, authtoken="DHRfTADW3mz8jee-sRcb", trim_start=start, trim_end=end).Rate
29 |   print ". "+symbol+" downloaded"
30 |   if hurst(fx)<0.5:
31 |     data[symbol]=fx
32 |     number_of_assets=number_of_assets+1
33 |     print ".. "+symbol+" passed Hurst-test"
34 | #  print tmp.shape
35 | #  print tmp.head()
36 | #print data.head()
37 | print 'Download completed.'
38 | pandas.DataFrame.to_csv(data, "fx_data_mean_rev.csv", header=True)
39 | print 'Data modification...'
40 | # exponential moving average
41 | ema200 = pandas.stats.moments.ewma(data, span=200)
42 | tmp=list()
43 | for name in ema200.columns.tolist():
44 |   tmp.append(name+'_ema200')
45 | ema200.columns=tmp
46 | print '... ema(200) calculated'
47 | 
48 | ema50 = pandas.stats.moments.ewma(data, span=50)
49 | tmp=list()
50 | for name in ema50.columns.tolist():
51 |   tmp.append(name+'_ema50')
52 | ema50.columns=tmp
53 | print '... ema(50) calculated'
54 | 
55 | ema10 = pandas.stats.moments.ewma(data, span=10)
56 | tmp=list()
57 | for name in ema10.columns.tolist():
58 |   tmp.append(name+'_ema10')
59 | ema10.columns=tmp
60 | print '... ema(10) calculated'
61 | 
62 | data = data.join(ema200).join(ema50).join(ema10)
63 | 
64 | # pandas.DataFrame.to_csv(data, "fx_data_mean_rev_ewma.csv", header=True)
65 | data=data.dropna()
66 | print '.. NaN values dropped'
67 | data=data.as_matrix() # convert to nparray
68 | target=data[:,0:number_of_assets-1]
69 | target=target[1:,:] # every value but the first
70 | target=insert(target, len(target)-1, 1)
71 | print '.. target variable set-up'
72 | print 'Data modification completed'
73 | data = data[0:(data.shape[0]-data.shape[0]%50), ]
74 | print data.shape
75 | target = target[0:(data.shape[0]-data.shape[0]%10), ]
76 | print target.shape
77 | dataset=(data, target)
78 | full_dataset=(dataset, dataset, dataset)
79 | print 'Dump output...'
80 | filename='full_data.save'
81 | out_file=file(filename, 'wb')
82 | cPickle.dump(full_dataset, out_file)
83 | out_file.close()
84 | print 'Dump completed to '+filename
85 | 


--------------------------------------------------------------------------------
/PYTHON/my_regression.py:
--------------------------------------------------------------------------------
  1 | import cPickle
  2 | import numpy
  3 | import theano
  4 | import theano.tensor as T
  5 | 
  6 | class Regression(object):
  7 | 	def __init__(self, input, n_in, n_out):
  8 | 		self.W=theano.shared(value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=True)
  9 | 		self.b=theano.shared(value=numpy.zeros((n_out, 1), dtype=theano.config.floatX), name='b', borrow=True)
 10 | 		self.y_perd=[T.dot(input, self.W)+self.b]
 11 | 		self.params=[self.W, self.b]
 12 | 	def errors(self, y):
 13 | 		return T.sum(T.sum((y-self.y_perd)**2, axis=1))
 14 | 	
 15 | def load_data(dataset):
 16 |     print '... loading data'
 17 | 
 18 |     f = file(dataset, 'rb')
 19 |     train_set, valid_set, test_set = cPickle.load(f)
 20 |     f.close()
 21 |     #train_set, valid_set, test_set format: tuple(input, target)
 22 |     #input is an numpy.ndarray of 2 dimensions (a matrix)
 23 |     #witch row's correspond to an example. target is a
 24 |     #numpy.ndarray of 1 dimensions (vector)) that have the same length as
 25 |     #the number of rows in the input. It should give the target
 26 |     #target to the example with the same index in the input.
 27 | 	
 28 |     def shared_dataset(data_xy, borrow=True):
 29 |         # Function that loads the dataset into shared variables
 30 | 		# The reason of using shared variables is to reduce memory read time
 31 | 		data_x, data_y = data_xy
 32 |         shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
 33 |         shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)
 34 |         return shared_x, shared_y
 35 | 
 36 |     test_set_x, test_set_y = shared_dataset(test_set)
 37 |     valid_set_x, valid_set_y = shared_dataset(valid_set)
 38 |     train_set_x, train_set_y = shared_dataset(train_set)
 39 | 	
 40 | 
 41 |     rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
 42 |     return rval
 43 | 
 44 | def optimization(learning_rate=0.1, n_epochs=10, input_data='full_data.save', batch_size=10):
 45 |     datasets=load_data(input_data)
 46 | 
 47 |     train_set_x, train_set_y = datasets[0]
 48 |     valid_set_x, valid_set_y = datasets[1]
 49 |     test_set_x, test_set_y = datasets[2]
 50 | 	
 51 |     n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
 52 |     n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
 53 |     n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
 54 | 
 55 |     print '... building the model'
 56 |     index=T.lscalar() #index for the minibatch
 57 | 	
 58 |     x=T.fmatrix('x') # input data
 59 |     y=T.fvector('y') # output
 60 | 	
 61 |     classifier=Regression(input=x, n_in=4, n_out=1)
 62 |     cost=classifier.errors(y)
 63 | 	
 64 |     test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]})
 65 | 
 66 |     validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]})
 67 | 	
 68 | 	# compute the gradient of cost with respect to theta = (W,b)
 69 |     g_W = T.grad(cost=cost, wrt=classifier.W)
 70 |     g_b = T.grad(cost=cost, wrt=classifier.b)
 71 | 	
 72 | 	# compute the update parameters for the weigths
 73 |     updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)]
 74 | 
 75 | 	#retrain the model with the new weigths
 76 |     train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]})
 77 |  
 78 |     print '... training the model'
 79 |     patience = 5000
 80 |     patience_increase=2
 81 | 	
 82 |     improvement_threashold=0.995
 83 |     validation_frequency = min(n_train_batches, patience/2)
 84 |     best_validation_loss = numpy.inf
 85 |     test_score = 0.0
 86 |     done_looping = False
 87 |     epoch = 0
 88 |     while(epoch < n_epochs) and (not done_looping):
 89 |         epoch = epoch + 1
 90 |         for minibatch_index in xrange(n_train_batches):
 91 |             minibatch_avg_cost = train_model(minibatch_index)
 92 |             iter = (epoch - 1) * n_train_batches + minibatch_index
 93 |             if (iter + 1) % validation_frequency == 0:
 94 |                 validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
 95 |                 this_validation_loss = numpy.mean(validation_losses)
 96 |                 print ('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.))
 97 | 
 98 |             if this_validation_loss < best_validation_loss:
 99 |                 if this_validation_loss < best_validation_loss * improvement_threashold:
100 | 					patience = max(patience, iter * patience_increase)
101 |                 best_validation_loss = this_validation_loss
102 |                 test_losses = [test_model(i) for i in xrange(n_test_batches)]
103 |                 test_score = numpy.mean(test_losses)
104 |                 print(('     epoch %i, minibatch %i/%i, test error of best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.))
105 |             if patience <= iter:
106 |                 done_looping = True
107 |                 break
108 |     print(('Optimitaion complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.))
109 |     """			
110 | 	
111 | 	
112 | if __name__ == '__main__':
113 |     optimization()


--------------------------------------------------------------------------------
/PYTHON/portfolio.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #   MatLab
 3 | #
 4 | #   load('s.mat');
 5 | #   G = cov(s);
 6 | #   [T, N] = size(s);
 7 | #   A = zeros(N, N);
 8 | #   for t=2:1:T
 9 | #       A = A + pinv(s(t-1,:).'*s(t-1,:))*(s(t-1,:).'*s(t,:));
10 | #   end
11 | #   pinvg = pinv(G)^0.5;
12 | #   [v1, l1] = maxeig(pinvg.'*A*G*A.'*pinvg);
13 | #   w = real(pinvg*v1);
14 | 
15 | import csv
16 | import numpy as np
17 | from numpy.core.multiarray import zeros
18 | from scipy import linalg
19 | import time
20 | 
21 | start_time = time.clock()
22 | 
23 | Theano = False
24 | 
25 | porfolio=1000
26 | fee=1
27 | reader = csv.reader(open("C:\s980.csv", "rb"), delimiter=',')
28 | S = np.matrix(list(reader)).astype('double')
29 | [T, N] = S.shape
30 | G = np.cov(S, rowvar=False)
31 | 
32 | A = zeros((N, N))
33 | for t in range(1, T): # sum 2 to T
34 |     STMinusOneTranspose = S[t-1, :].T
35 |     A = A + (linalg.pinv2((S[t-1,:].T).dot(S[t-1,:]))).dot((S[t-1,:].T).dot(S[t,:]));       
36 | pingv = linalg.sqrtm(linalg.pinv2(G))
37 | X = (pingv.T).dot(A).dot(G).dot((A.T)).dot(pingv)
38 | [maxEigenValue, maxEigenVector] = linalg.eigh(X, eigvals_only=False, eigvals=(N-1, N-1))
39 | w = pingv.real.dot(maxEigenVector.real)
40 | 
41 | if w.dot(A[:,N-1])>pandas.stats.moments.ewma(A[:,N-1], span=10):
42 | 	portfolio=porfolio+porftolio*w.dot(A[:,N-1])-fee
43 | else
44 | 	portfolio=porfolio+porftolio*-w.dot(A[:,N-1])-fee
45 | end_time = time.clock()
46 | print end_time-start_time


--------------------------------------------------------------------------------
/PYTHON/regression.py:
--------------------------------------------------------------------------------
  1 | import theano
  2 | import numpy as N
  3 | from theano import tensor as T
  4 | from theano.tensor import nnet as NN
  5 | from theano.compile import module as M
  6 | 
  7 | class RegressionLayer(M.Module):
  8 |     def __init__(self, input = None, target = None, regularize = True):
  9 |         super(RegressionLayer, self).__init__() #boilerplate
 10 |         # MODEL CONFIGURATION
 11 |         self.regularize = regularize
 12 |         # ACQUIRE/MAKE INPUT AND TARGET
 13 |         if not input:
 14 |             input = T.matrix('input')
 15 |         if not target:
 16 |             target = T.matrix('target')
 17 |         # HYPER-PARAMETERS
 18 |         self.stepsize = T.scalar()  # a stepsize for gradient descent
 19 |         # PARAMETERS
 20 |         self.w = T.matrix()  #the linear transform to apply to our input points
 21 |         self.b = T.vector()  #a vector of biases, which make our transform affine instead of linear
 22 |         # REGRESSION MODEL
 23 |         self.activation = T.dot(input, self.w) + self.b
 24 |         self.prediction = self.build_prediction()
 25 |         # CLASSIFICATION COST
 26 |         self.classification_cost = self.build_classification_cost(target)
 27 |         # REGULARIZATION COST
 28 |         self.regularization = self.build_regularization()
 29 |         # TOTAL COST
 30 |         self.cost = self.classification_cost
 31 |         if self.regularize:
 32 |             self.cost = self.cost + self.regularization
 33 |         # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
 34 |         self.grad_w, self.grad_b, grad_act = T.grad(self.cost, [self.w, self.b, self.prediction])
 35 |         print 'grads', self.grad_w, self.grad_b
 36 |         # INTERFACE METHODS
 37 |         self.update = M.Method([input, target], [self.cost, self.grad_w, self.grad_b, grad_act],  updates={self.w: self.w - self.stepsize * self.grad_w, self.b: self.b - self.stepsize * self.grad_b})
 38 |         self.apply = M.Method(input, self.prediction)
 39 |     def params(self):
 40 |         return self.w, self.b
 41 |     def _instance_initialize(self, obj, input_size = None, target_size = None, seed = 1827, **init):
 42 |         # obj is an "instance" of this module holding values for each member and
 43 |         # functions for each method
 44 |         if input_size and target_size:
 45 |             # initialize w and b in a special way using input_size and target_size
 46 |             sz = (input_size, target_size)
 47 |             rng = N.random.RandomState(seed)
 48 |             obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
 49 |             obj.b = N.zeros(target_size)
 50 |             obj.stepsize = 0.01
 51 |         # here we call the default_initialize method, which takes all the name: value
 52 |         # pairs in init and sets the property with that name to the provided value
 53 |         # this covers setting stepsize, l2_coef; w and b can be set that way too
 54 |         # we call it after as we want the parameter to superseed the default value.
 55 |         M.default_initialize(obj,**init)
 56 |     def build_regularization(self):
 57 |         return T.zero() # no regularization!
 58 | 
 59 | 
 60 | class SpecifiedRegressionLayer(RegressionLayer):
 61 |     """ XE mean cross entropy"""
 62 |     def build_prediction(self):
 63 |         # return NN.softmax(self.activation) #use this line to expose a slow subtensor
 64 |         # implementation
 65 |         return NN.sigmoid(self.activation)
 66 |     def build_classification_cost(self, target):
 67 |         self.classification_cost_matrix = (target - self.prediction)**2
 68 |         #print self.classification_cost_matrix.type
 69 |         self.classification_costs = T.sum(self.classification_cost_matrix, axis=1)
 70 |         return T.sum(self.classification_costs)
 71 |     def build_regularization(self):
 72 |         self.l2_coef = T.scalar() # we can add a hyper parameter if we need to
 73 |         return self.l2_coef * T.sum(self.w * self.w)
 74 | 
 75 | 
 76 | class PrintEverythingMode(theano.Mode):
 77 |     def __init__(self, linker, optimizer=None):                                                       
 78 |         def print_eval(i, node, fn): 
 79 |             print i, node, [input[0] for input in fn.inputs],                                         
 80 |             fn()
 81 |             print [output[0] for output in fn.outputs]
 82 |         wrap_linker = theano.gof.WrapLinkerMany([linker], [print_eval])
 83 |         super(PrintEverythingMode, self).__init__(wrap_linker, optimizer)                             
 84 | 
 85 | 
 86 | def test_module_advanced_example():
 87 | 
 88 |     profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
 89 |     profmode = PrintEverythingMode(theano.gof.OpWiseCLinker(), 'fast_run')
 90 | 
 91 |     data_x = N.random.randn(4, 10)
 92 |     data_y = [ [int(x)] for x in (N.random.randn(4) > 0)]
 93 | 
 94 | 
 95 |     model = SpecifiedRegressionLayer(regularize = False).make(input_size = 10, target_size = 1, stepsize = 0.1, mode=profmode)
 96 | 
 97 |     for i in xrange(1000):
 98 |        xe, gw, gb, ga = model.update([data_x, data_y])
 99 |        if i % 100 == 0:
100 |            print i, xe
101 |            pass
102 |        #for inputs, targets in my_training_set():
103 |            #print "cost:", model.update(inputs, targets)
104 | 
105 |     print "final weights:", model.w
106 |     print "final biases:", model.b
107 | 
108 |     profmode.print_summary()
109 | 
110 | if __name__ == '__main__':
111 |     test_module_advanced_example()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Stock-Market-Time-Series-Data-Mining-Using-Deep-Learning-Algorithms
2 | In this project we try to predict future price from historical prices with data mining and time series analysis methods. Then based on these retrieved informationa a portfolio is assembled which provides the maximal expected profit. Deep neural networks is used for prediction.
3 | 


--------------------------------------------------------------------------------
/matlab/est_A.m:
--------------------------------------------------------------------------------
1 | function A = est_A(s)
2 |     [T, N] = size(s);
3 |     A = zeros(N, N);
4 |     for t=2:1:T
5 |         A = A + pinv(s(t-1,:).'*s(t-1,:))*(s(t-1,:).'*s(t,:));
6 |         %A = A + pinv(s(t-1,:).'*s(t-1,:));
7 |     end
8 | end


--------------------------------------------------------------------------------
/matlab/labor.m:
--------------------------------------------------------------------------------
1 | tic;
2 | load('s.mat');
3 | G = cov(s);
4 | A = est_A(s);
5 | w = mr_opt_w(A, G);
6 | toc


--------------------------------------------------------------------------------
/matlab/maxeig.m:
--------------------------------------------------------------------------------
1 | function [v1, l1] = maxeig(M)
2 |     [V,D] = eig(M);
3 |     [l1, l1palce] = max(max(D));
4 |     v1 = V(:, l1palce);
5 | end


--------------------------------------------------------------------------------
/matlab/mr_opt_w.m:
--------------------------------------------------------------------------------
1 | function w = mr_opt_w(A, G)
2 |     pinvg = pinv(G)^0.5;
3 |     [v1, l1] = maxeig(pinvg.'*A*G*A.'*pinvg);
4 |     w = real(pinvg*v1);
5 | end


--------------------------------------------------------------------------------