├── misc ├── __init__.py ├── graph.py └── arg_handler.py ├── pipeline ├── __init__.py ├── strategies │ ├── __init__.py │ ├── .ff_strat.py.swo │ ├── .ff_strat.py.swp │ └── ff_strat.py └── backtest.py ├── scripts ├── __init__.py ├── oil.png ├── .feedforward_nn.py.swp ├── constants.py ├── data_process.py ├── feedforward_nn.py ├── recurrent_lstm.py └── ConvNet.py ├── data ├── lag.p ├── save.p └── stock_close.p ├── generic-data-system-finance-FSI_shutterstock-545018428-370x290.jpg ├── .idea ├── vcs.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── modules.xml ├── Quant_stock.iml ├── misc.xml └── workspace.xml ├── driver.py ├── examples ├── test.py ├── ex2.py ├── international-airline-passengers.csv └── passenger.py ├── README.md ├── LICENSE └── .gitignore /misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pipeline/strategies/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/lag.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/data/lag.p -------------------------------------------------------------------------------- /data/save.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/data/save.p -------------------------------------------------------------------------------- /scripts/oil.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/scripts/oil.png -------------------------------------------------------------------------------- /data/stock_close.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/data/stock_close.p -------------------------------------------------------------------------------- /scripts/.feedforward_nn.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/scripts/.feedforward_nn.py.swp -------------------------------------------------------------------------------- /pipeline/strategies/.ff_strat.py.swo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/pipeline/strategies/.ff_strat.py.swo -------------------------------------------------------------------------------- /pipeline/strategies/.ff_strat.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/pipeline/strategies/.ff_strat.py.swp -------------------------------------------------------------------------------- /generic-data-system-finance-FSI_shutterstock-545018428-370x290.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/generic-data-system-finance-FSI_shutterstock-545018428-370x290.jpg -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /driver.py: -------------------------------------------------------------------------------- 1 | from misc.arg_handler import arg_parser, InputHandler 2 | 3 | 4 | def main(): 5 | inputs = arg_parser() 6 | InputHandler(inputs) 7 | 8 | 9 | if __name__ == "__main__": 10 | main() 11 | -------------------------------------------------------------------------------- /scripts/constants.py: -------------------------------------------------------------------------------- 1 | hm_epoch = 5 # Number of epoch in training 2 | lag_range = 1 # Lag range 3 | lag_epoch_num = 1 # Number of epoch while finding lag 4 | 5 | learning_rate = 0.001 # Default learning rate 6 | 7 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/Quant_stock.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /examples/test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib as mpl 4 | import matplotlib.pyplot as plt 5 | 6 | url = "https://www.quandl.com/api/v3/datasets/CHRIS/CME_CL1.csv" 7 | wticl1 = pd.read_csv(url, index_col=0, parse_dates=True) 8 | wticl1.sort_index(inplace=True) 9 | wticl1_last = wticl1['Last'] 10 | wticl1['PctCh'] = wticl1.Last.pct_change() 11 | 12 | print(type(wticl1_last)) 13 | print(wticl1_last) 14 | #fig = plt.figure(figsize=[7,5]) 15 | #ax1 = plt.subplot(111) 16 | #line = wticl1_last.plot(color='red',linewidth=3) 17 | #ax1.set_ylabel('USD per barrel') 18 | #ax1.set_xlabel('') 19 | #ax1.set_title('WTI Crude Oil Price', fontsize=18) 20 | #ax1.spines["top"].set_visible(False) 21 | #ax1.spines["right"].set_visible(False) 22 | #ax1.get_xaxis().tick_bottom() 23 | #ax1.get_yaxis().tick_left() 24 | #ax1.tick_params(axis='x', which='major', labelsize=8) 25 | #plt.savefig('oil.png', dpi=1000) 26 | #plt.show() 27 | 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BloombergGPT-based Stock Market Trader POC 2 | Stock analysis/prediction model using machine learning using the impact between different out-of-the-market factors (weather, etc.) and the stock prices. 3 | 4 | --- 5 | 6 | ![Architecture Diagram](generic-data-system-finance-FSI_shutterstock-545018428-370x290.jpg?raw=true "Architecture Diagram") 7 | 8 | ## Models used 9 | There are three ML model that are being implemented: 10 | * A simple feedforward neural network 11 | * A recurrent neural network with LSTM (long short term memory) 12 | * BloombergGPT for processing raw text 13 | 14 | ## Accuracy measurements 15 | The pipeline implemented is using [backtrader](https://www.backtrader.com) to implement backtesting in order to test each individual strategy. In the future, it is worthwhile to try using a genetic algorithm to better figure the accuracy of the model. 16 | 17 | ## Usage 18 | There are three main usages for this project: 19 | 20 | run `python driver.py -t model_name` to train 21 | 22 | run `python driver.py -b model_name` to backtest the model 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Lam Nguyen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/data_process.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data 3 | import pickle 4 | 5 | 6 | def create_data(): 7 | url = "data/CME_CL1.csv" 8 | crude_oil = pd.read_csv(url, index_col=0, parse_dates=True) 9 | crude_oil.sort_index(inplace=True) 10 | crude_oil_last = crude_oil['Last'] 11 | 12 | param = { 13 | 'q': 'XOM', 14 | 'i': 86400, 15 | 'x': "NYSE", 16 | 'p': '40Y' 17 | } 18 | 19 | try: 20 | print("Retrieving data") 21 | df = get_price_data(param) 22 | df.set_index(df.index.normalize(), inplace=True) 23 | stock_close = df['Close'] 24 | pickle.dump(stock_close, open("data/stock_close.p", "wb")) 25 | except: 26 | print("Error in retrieving data... Loading previous saved stock data.") 27 | stock_close = pickle.load(open("data/stock_close.p", "rb")) 28 | 29 | oil_price, stock_price = crude_oil_last.align(stock_close, join='inner') 30 | 31 | split_index = int(3*len(oil_price)/4) 32 | oil_train = oil_price.iloc[:split_index] 33 | stock_train = oil_price.iloc[:split_index] 34 | 35 | oil_test = oil_price.iloc[split_index:] 36 | stock_test = oil_price.iloc[split_index:] 37 | 38 | return oil_train, stock_train, oil_test, stock_test, oil_price, stock_price 39 | 40 | 41 | def add_lag(dataset_1, dataset_2, lag): 42 | if lag != 0: 43 | dataset_2 = dataset_2[lag:] 44 | dataset_1 = dataset_1[:-lag] 45 | 46 | return dataset_1, dataset_2 47 | 48 | 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /misc/graph.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib 3 | import tensorflow as tf 4 | from scripts import data_process as dp 5 | 6 | 7 | n_classes = 2 8 | chunk_size = 1 9 | rnn_size = 512 10 | n_chunks = 2 11 | total_chunk_size = chunk_size*n_chunks 12 | 13 | 14 | def graph(models): 15 | for model in models: 16 | print("Loading pre-trained model...") 17 | sess = tf.Session() 18 | saver = tf.train.import_meta_graph("data/model/"+str(model)+'/'+str(model)+'.ckpt.meta') 19 | saver.restore(sess, tf.train.latest_checkpoint('data/model/'+str(model))) 20 | print("Model loaded...") 21 | 22 | graph = tf.get_default_graph() 23 | if model == 'feedforward': 24 | x = graph.get_tensor_by_name('input:0') 25 | prediction = graph.get_tensor_by_name('output:0') 26 | elif model == 'recurrent': 27 | x = graph.get_tensor_by_name('input_recurrent:0') 28 | prediction = graph.get_tensor_by_name('output_recurrent:0') 29 | _, _, _, _, oil_price, stock_price = dp.create_data() 30 | 31 | predictions = [] 32 | if model == 'feedforward': 33 | date_labels = oil_price.index 34 | date_labels = matplotlib.dates.date2num(date_labels.to_pydatetime()) 35 | for i in oil_price: 36 | predictions.append(sess.run(prediction, feed_dict={x: [[i]]})[0][0]) 37 | elif model == 'recurrent': 38 | predictions = [] 39 | for index in range(int(len(oil_price.values) / total_chunk_size)): 40 | x_in = oil_price.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape( 41 | (1, n_chunks, chunk_size)) 42 | predictions += sess.run(prediction, feed_dict={x: x_in})[0].reshape(total_chunk_size).tolist() 43 | 44 | plt.plot_date(date_labels, predictions, 'b-', label="Feedforward Predictions") 45 | plt.plot_date(date_labels, stock_price.values, 'r-', label='Stock Prices') 46 | plt.legend() 47 | plt.ylabel('Price') 48 | plt.xlabel('Year') 49 | plt.show() 50 | -------------------------------------------------------------------------------- /misc/arg_handler.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pipeline import backtest 3 | from pipeline.backtest import TestStrategy 4 | from pipeline.strategies.ff_strat import FeedforwardStrategy 5 | from scripts import feedforward_nn, recurrent_lstm, ConvNet 6 | from scripts import data_process as dp 7 | from misc.graph import graph 8 | 9 | 10 | def arg_parser(): 11 | parser = argparse.ArgumentParser(description="Stock prediction model", formatter_class=argparse.RawTextHelpFormatter) 12 | parser.add_argument('-b', '--btest', help='Run backtest with the model', 13 | default=None, choices=['test', 'feedforward', 'recurrent', 'cnn']) 14 | parser.add_argument('-t', '--train', help='Train a model', default=None, 15 | choices=['feedforward', 'recurrent', 'cnn']) 16 | parser.add_argument('-g', '--graph', help='Graph', nargs='*', choices=['test', 'feedforward', 'recurrent', 'cnn']) 17 | 18 | args = parser.parse_args() 19 | return args 20 | 21 | 22 | class InputHandler: 23 | def __init__(self, inputs): 24 | self.inputs = inputs 25 | if self.inputs.train: 26 | self.train(self.inputs.train) 27 | if self.inputs.btest: 28 | if self.inputs.btest == "test": 29 | self.run(TestStrategy) 30 | elif self.inputs.btest == "feedforward": 31 | self.run(FeedforwardStrategy) 32 | if self.inputs.graph: 33 | if len(self.inputs.graph) != 0: 34 | graph(self.inputs.graph) 35 | else: 36 | graph(['feedforward', 'recurrent', 'cnn']) 37 | 38 | @staticmethod 39 | def run(strategy): 40 | backtest_obj = backtest.Backtest(stock_symbol='XOM', strategy=strategy) 41 | backtest_obj.run(plot=True) 42 | 43 | @staticmethod 44 | def train(model): 45 | inputs = dp.create_data() 46 | if model == "feedforward": 47 | feedforward_nn.feedforward_neural_network(inputs) 48 | elif model == "recurrent": 49 | recurrent_lstm.recurrent_neural_network(inputs) 50 | elif model == 'cnn': 51 | ConvNet.conv_neural_network(inputs) 52 | -------------------------------------------------------------------------------- /examples/ex2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | from tensorflow.contrib import rnn 4 | 5 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) 6 | 7 | n_classes = 10 8 | batch_size = 128 9 | epochs_num = 10 10 | 11 | chunk_size = 28 12 | chunk_num = 28 13 | rnn_size = 128 14 | 15 | x = tf.placeholder('float', [None, chunk_num, chunk_size]) 16 | y = tf.placeholder('float') 17 | 18 | learning_rate = 0.001 19 | 20 | def recurrent_neural_network(x): 21 | layer = {'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 22 | 'biases': tf.Variable(tf.random_normal([n_classes]))} 23 | 24 | x = tf.transpose(x, [1, 0, 2]) 25 | x = tf.reshape(x, [-1, chunk_size]) 26 | x = tf.split(x, chunk_num, 0) 27 | 28 | lstm_cell = rnn.BasicLSTMCell(rnn_size) 29 | outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) 30 | 31 | output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] 32 | 33 | return output 34 | 35 | def train_neural_network(x): 36 | prediction = recurrent_neural_network(x) 37 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction) ) 38 | 39 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) 40 | 41 | with tf.Session() as sess: 42 | sess.run(tf.global_variables_initializer()) 43 | for epoch in range(epochs_num): 44 | epoch_loss = 0 45 | for _ in range(int(mnist.train.num_examples/batch_size)): 46 | epoch_x, epoch_y = mnist.train.next_batch(batch_size) 47 | epoch_x = epoch_x.reshape((batch_size, chunk_num, chunk_size )) 48 | _, c = sess.run([optimizer, cost], feed_dict = {x:epoch_x, y:epoch_y}) 49 | epoch_loss += c 50 | print ('Epoch', epoch, 'completed out of', epochs_num, 'loss:', epoch_loss) 51 | 52 | correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1)) 53 | accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 54 | print('Accuracy:', accuracy.eval({x:mnist.test.images.reshape((-1,chunk_num,chunk_size)), y:mnist.test.labels })) 55 | 56 | train_neural_network(x) 57 | -------------------------------------------------------------------------------- /examples/international-airline-passengers.csv: -------------------------------------------------------------------------------- 1 | "Month","International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60" 2 | "1949-01",112 3 | "1949-02",118 4 | "1949-03",132 5 | "1949-04",129 6 | "1949-05",121 7 | "1949-06",135 8 | "1949-07",148 9 | "1949-08",148 10 | "1949-09",136 11 | "1949-10",119 12 | "1949-11",104 13 | "1949-12",118 14 | "1950-01",115 15 | "1950-02",126 16 | "1950-03",141 17 | "1950-04",135 18 | "1950-05",125 19 | "1950-06",149 20 | "1950-07",170 21 | "1950-08",170 22 | "1950-09",158 23 | "1950-10",133 24 | "1950-11",114 25 | "1950-12",140 26 | "1951-01",145 27 | "1951-02",150 28 | "1951-03",178 29 | "1951-04",163 30 | "1951-05",172 31 | "1951-06",178 32 | "1951-07",199 33 | "1951-08",199 34 | "1951-09",184 35 | "1951-10",162 36 | "1951-11",146 37 | "1951-12",166 38 | "1952-01",171 39 | "1952-02",180 40 | "1952-03",193 41 | "1952-04",181 42 | "1952-05",183 43 | "1952-06",218 44 | "1952-07",230 45 | "1952-08",242 46 | "1952-09",209 47 | "1952-10",191 48 | "1952-11",172 49 | "1952-12",194 50 | "1953-01",196 51 | "1953-02",196 52 | "1953-03",236 53 | "1953-04",235 54 | "1953-05",229 55 | "1953-06",243 56 | "1953-07",264 57 | "1953-08",272 58 | "1953-09",237 59 | "1953-10",211 60 | "1953-11",180 61 | "1953-12",201 62 | "1954-01",204 63 | "1954-02",188 64 | "1954-03",235 65 | "1954-04",227 66 | "1954-05",234 67 | "1954-06",264 68 | "1954-07",302 69 | "1954-08",293 70 | "1954-09",259 71 | "1954-10",229 72 | "1954-11",203 73 | "1954-12",229 74 | "1955-01",242 75 | "1955-02",233 76 | "1955-03",267 77 | "1955-04",269 78 | "1955-05",270 79 | "1955-06",315 80 | "1955-07",364 81 | "1955-08",347 82 | "1955-09",312 83 | "1955-10",274 84 | "1955-11",237 85 | "1955-12",278 86 | "1956-01",284 87 | "1956-02",277 88 | "1956-03",317 89 | "1956-04",313 90 | "1956-05",318 91 | "1956-06",374 92 | "1956-07",413 93 | "1956-08",405 94 | "1956-09",355 95 | "1956-10",306 96 | "1956-11",271 97 | "1956-12",306 98 | "1957-01",315 99 | "1957-02",301 100 | "1957-03",356 101 | "1957-04",348 102 | "1957-05",355 103 | "1957-06",422 104 | "1957-07",465 105 | "1957-08",467 106 | "1957-09",404 107 | "1957-10",347 108 | "1957-11",305 109 | "1957-12",336 110 | "1958-01",340 111 | "1958-02",318 112 | "1958-03",362 113 | "1958-04",348 114 | "1958-05",363 115 | "1958-06",435 116 | "1958-07",491 117 | "1958-08",505 118 | "1958-09",404 119 | "1958-10",359 120 | "1958-11",310 121 | "1958-12",337 122 | "1959-01",360 123 | "1959-02",342 124 | "1959-03",406 125 | "1959-04",396 126 | "1959-05",420 127 | "1959-06",472 128 | "1959-07",548 129 | "1959-08",559 130 | "1959-09",463 131 | "1959-10",407 132 | "1959-11",362 133 | "1959-12",405 134 | "1960-01",417 135 | "1960-02",391 136 | "1960-03",419 137 | "1960-04",461 138 | "1960-05",472 139 | "1960-06",535 140 | "1960-07",622 141 | "1960-08",606 142 | "1960-09",508 143 | "1960-10",461 144 | "1960-11",390 145 | "1960-12",432 146 | 147 | International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60 148 | 149 | -------------------------------------------------------------------------------- /examples/passenger.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import matplotlib.pyplot as plt 3 | from pandas import read_csv 4 | import math 5 | from keras.models import Sequential 6 | from keras.layers import Dense 7 | from keras.layers import LSTM 8 | from sklearn.preprocessing import MinMaxScaler 9 | from sklearn.metrics import mean_squared_error 10 | # convert an array of values into a dataset matrix 11 | def create_dataset(dataset, look_back=1): 12 | dataX, dataY = [], [] 13 | for i in range(len(dataset)-look_back-1): 14 | a = dataset[i:(i+look_back), 0] 15 | dataX.append(a) 16 | dataY.append(dataset[i + look_back, 0]) 17 | return numpy.array(dataX), numpy.array(dataY) 18 | # fix random seed for reproducibility 19 | numpy.random.seed(7) 20 | # load the dataset 21 | dataframe = read_csv('international-airline-passengers.csv', usecols=[1], engine='python', skipfooter=3) 22 | dataset = dataframe.values 23 | dataset = dataset.astype('float32') 24 | # normalize the dataset 25 | scaler = MinMaxScaler(feature_range=(0, 1)) 26 | dataset = scaler.fit_transform(dataset) 27 | # split into train and test sets 28 | train_size = int(len(dataset) * 0.67) 29 | test_size = len(dataset) - train_size 30 | train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:] 31 | # reshape into X=t and Y=t+1 32 | look_back = 3 33 | trainX, trainY = create_dataset(train, look_back) 34 | testX, testY = create_dataset(test, look_back) 35 | # reshape input to be [samples, time steps, features] 36 | trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) 37 | testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1)) 38 | # create and fit the LSTM network 39 | model = Sequential() 40 | model.add(LSTM(4, input_shape=(look_back, 1))) 41 | model.add(Dense(1)) 42 | model.compile(loss='mean_squared_error', optimizer='adam') 43 | model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2) 44 | # make predictions 45 | trainPredict = model.predict(trainX) 46 | testPredict = model.predict(testX) 47 | # invert predictions 48 | trainPredict = scaler.inverse_transform(trainPredict) 49 | trainY = scaler.inverse_transform([trainY]) 50 | testPredict = scaler.inverse_transform(testPredict) 51 | testY = scaler.inverse_transform([testY]) 52 | # calculate root mean squared error 53 | trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0])) 54 | print('Train Score: %.2f RMSE' % (trainScore)) 55 | testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0])) 56 | print('Test Score: %.2f RMSE' % (testScore)) 57 | # shift train predictions for plotting 58 | trainPredictPlot = numpy.empty_like(dataset) 59 | trainPredictPlot[:, :] = numpy.nan 60 | trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict 61 | # shift test predictions for plotting 62 | testPredictPlot = numpy.empty_like(dataset) 63 | testPredictPlot[:, :] = numpy.nan 64 | testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict 65 | # plot baseline and predictions 66 | plt.plot(scaler.inverse_transform(dataset)) 67 | plt.plot(trainPredictPlot) 68 | plt.plot(testPredictPlot) 69 | plt.show() 70 | -------------------------------------------------------------------------------- /pipeline/backtest.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import backtrader as bt 3 | 4 | 5 | class SmaCross(bt.SignalStrategy): 6 | params = (('pfast', 10), ('pslow', 30),) 7 | def __init__(self): 8 | sma1, sma2 = bt.ind.SMA(period=self.p.pfast), bt.ind.SMA(period=self.p.pslow) 9 | self.signal_add(bt.SIGNAL_LONG, bt.ind.CrossOver(sma1, sma2)) 10 | 11 | 12 | # Create a Stratey 13 | class TestStrategy(bt.Strategy): 14 | def log(self, txt, dt=None): 15 | ''' Logging function fot this strategy''' 16 | dt = dt or self.datas[0].datetime.date(0) 17 | print('%s, %s' % (dt.isoformat(), txt)) 18 | 19 | def __init__(self): 20 | # Keep a reference to the "close" line in the data[0] dataseries 21 | self.dataclose = self.datas[0].close 22 | 23 | # To keep track of pending orders and buy price/commission 24 | self.order = None 25 | self.buyprice = None 26 | self.buycomm = None 27 | 28 | def notify_order(self, order): 29 | if order.status in [order.Submitted, order.Accepted]: 30 | # Buy/Sell order submitted/accepted to/by broker - Nothing to do 31 | return 32 | 33 | # Check if an order has been completed 34 | # Attention: broker could reject order if not enougth cash 35 | if order.status in [order.Completed]: 36 | if order.isbuy(): 37 | self.log( 38 | 'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' % 39 | (order.executed.price, 40 | order.executed.value, 41 | order.executed.comm)) 42 | 43 | self.buyprice = order.executed.price 44 | self.buycomm = order.executed.comm 45 | else: # Sell 46 | self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' % 47 | (order.executed.price, 48 | order.executed.value, 49 | order.executed.comm)) 50 | 51 | self.bar_executed = len(self) 52 | 53 | elif order.status in [order.Canceled, order.Margin, order.Rejected]: 54 | self.log('Order Canceled/Margin/Rejected') 55 | 56 | self.order = None 57 | 58 | def notify_trade(self, trade): 59 | if not trade.isclosed: 60 | return 61 | 62 | self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' % 63 | (trade.pnl, trade.pnlcomm)) 64 | 65 | def next(self): 66 | # Simply log the closing price of the series from the reference 67 | self.log('Close, %.2f' % self.dataclose[0]) 68 | 69 | # Check if an order is pending ... if yes, we cannot send a 2nd one 70 | if self.order: 71 | return 72 | 73 | # Check if we are in the market 74 | if not self.position: 75 | 76 | # Not yet ... we MIGHT BUY if ... 77 | if self.dataclose[0] < self.dataclose[-1]: 78 | # current close less than previous close 79 | 80 | if self.dataclose[-1] < self.dataclose[-2]: 81 | # previous close less than the previous close 82 | 83 | # BUY, BUY, BUY!!! (with default parameters) 84 | self.log('BUY CREATE, %.2f' % self.dataclose[0]) 85 | 86 | # Keep track of the created order to avoid a 2nd order 87 | self.order = self.buy() 88 | 89 | else: 90 | 91 | # Already in the market ... we might sell 92 | if len(self) >= (self.bar_executed + 5): 93 | # SELL, SELL, SELL!!! (with all possible default parameters) 94 | self.log('SELL CREATE, %.2f' % self.dataclose[0]) 95 | 96 | # Keep track of the created order to avoid a 2nd order 97 | self.order = self.sell() 98 | 99 | 100 | class Backtest: 101 | def __init__(self, stock_symbol, start=datetime(2010, 1, 1), 102 | end=datetime(2017, 1, 1), strategy=TestStrategy): 103 | self.cerebro = bt.Cerebro() 104 | self.cerebro.broker.setcommission(commission=0.001) 105 | self.cerebro.broker.setcash(100000) 106 | data = bt.feeds.YahooFinanceData(dataname=stock_symbol, fromdate=start, 107 | todate=end) 108 | self.cerebro.adddata(data) 109 | print("Value before transactions:", self.cerebro.broker.getvalue()) 110 | self.cerebro.addstrategy(strategy) 111 | 112 | def run(self, plot=False): 113 | self.cerebro.run() 114 | print("Value after transactions:", self.cerebro.broker.getvalue()) 115 | if plot: 116 | self.cerebro.plot() 117 | -------------------------------------------------------------------------------- /pipeline/strategies/ff_strat.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import backtrader as bt 3 | import tensorflow as tf 4 | from scripts import data_process as dp 5 | import pickle 6 | 7 | 8 | class FeedforwardStrategy(bt.Strategy): 9 | def log(self, txt, dt=None): 10 | ''' Logging function fot this strategy''' 11 | dt = dt or self.datas[0].datetime.date(0) 12 | print('%s, %s' % (dt.isoformat(), txt)) 13 | 14 | def __init__(self): 15 | # Keep a reference to the "close" line in the data[0] dataseries 16 | self.dataclose = self.datas[0].close 17 | 18 | # To keep track of pending orders and buy price/commission 19 | self.order = None 20 | self.buyprice = None 21 | self.buycomm = None 22 | 23 | print("Loading pre-trained model...") 24 | self.sess = tf.Session() 25 | self.saver = tf.train.import_meta_graph("data/model/feedforward/feedforward.ckpt.meta") 26 | self.saver.restore(self.sess, tf.train.latest_checkpoint('data/model/feedforward')) 27 | print("Model loaded...") 28 | 29 | self.graph = tf.get_default_graph() 30 | self.x = self.graph.get_tensor_by_name('input:0') 31 | self.prediction = self.graph.get_tensor_by_name('output:0') 32 | _, _, _, _, self.oil_price, self.stock_price = dp.create_data() 33 | 34 | self.prediction_graph() 35 | 36 | def notify_order(self, order): 37 | if order.status in [order.Submitted, order.Accepted]: 38 | # Buy/Sell order submitted/accepted to/by broker - Nothing to do 39 | return 40 | # Check if an order has been completed 41 | # Attention: broker could reject order if not enougth cash 42 | if order.status in [order.Completed]: 43 | if order.isbuy(): 44 | self.log( 45 | 'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' % 46 | (order.executed.price, 47 | order.executed.value, 48 | order.executed.comm)) 49 | self.buyprice = order.executed.price 50 | self.buycomm = order.executed.comm 51 | else: # Sell 52 | self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' % 53 | (order.executed.price, 54 | order.executed.value, 55 | order.executed.comm)) 56 | self.bar_executed = len(self) 57 | elif order.status in [order.Canceled, order.Margin, order.Rejected]: 58 | self.log('Order Canceled/Margin/Rejected') 59 | self.order = None 60 | 61 | def notify_trade(self, trade): 62 | if not trade.isclosed: 63 | return 64 | self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' % 65 | (trade.pnl, trade.pnlcomm)) 66 | 67 | def prediction_graph(self): 68 | predictions = [] 69 | for i in self.oil_price: 70 | predictions.append(self.sess.run(self.prediction, feed_dict={self.x: [[i]]})[0][0]) 71 | plt.plot(self.oil_price.values, label='Oil Prices') 72 | plt.plot(self.stock_price.values, label='Stock Prices') 73 | plt.plot(predictions, label="Predictions") 74 | plt.legend() 75 | plt.ylabel('Price') 76 | plt.xlabel('Date') 77 | plt.show() 78 | 79 | def next(self): 80 | # Simply log the closing price of the series from the reference 81 | #self.log('Close, %.2f' % self.dataclose[0]) 82 | # Check if an order is pending ... if yes, we cannot send a 2nd one 83 | if self.order: 84 | return 85 | # Check if we are in the market 86 | if not self.position: 87 | # Not yet ... we MIGHT BUY if ... 88 | if self.datas[0].datetime.date(0) in self.oil_price: 89 | if self.sess.run(self.prediction, 90 | feed_dict={self.x: [[self.oil_price[self.datas[0].datetime.date(0)]]]}) > self.dataclose[0]: 91 | # previous close less than the previous close 92 | # BUY, BUY, BUY!!! (with default parameters) 93 | self.log('BUY CREATE, %.2f' % self.dataclose[0]) 94 | # Keep track of the created order to avoid a 2nd order 95 | self.order = self.buy() 96 | 97 | else: 98 | # Already in the market ... we might sell 99 | if len(self) >= (self.bar_executed + 2): 100 | # SELL, SELL, SELL!!! (with all possible default parameters) 101 | self.log('SELL CREATE, %.2f' % self.dataclose[0]) 102 | # Keep track of the created order to avoid a 2nd order 103 | self.order = self.sell() -------------------------------------------------------------------------------- /scripts/feedforward_nn.py: -------------------------------------------------------------------------------- 1 | from scripts import data_process as dp 2 | import matplotlib 3 | from scripts.constants import * 4 | import tensorflow as tf 5 | import matplotlib.pyplot as plt 6 | import pickle 7 | 8 | n_nodes_hl1 = 500 9 | n_nodes_hl2 = 500 10 | n_nodes_hl3 = 500 11 | 12 | n_classes = 1 13 | batch_size = 100 14 | 15 | x = tf.placeholder('float') 16 | y = tf.placeholder('float') 17 | 18 | 19 | def neural_network_model(data): 20 | hidden_1_layer = {'weights': tf.Variable(tf.random_normal([1, n_nodes_hl1])), 21 | 'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))} 22 | 23 | hidden_2_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])), 24 | 'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))} 25 | 26 | hidden_3_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])), 27 | 'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))} 28 | 29 | output_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])), 30 | 'biases': tf.Variable(tf.random_normal([n_classes])),} 31 | 32 | l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases']) 33 | l1 = tf.nn.relu(l1) 34 | 35 | l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases']) 36 | l2 = tf.nn.relu(l2) 37 | 38 | l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases']) 39 | l3 = tf.nn.relu(l3) 40 | 41 | output = tf.add(tf.matmul(l3, output_layer['weights']), 42 | output_layer['biases'], name="output") 43 | 44 | return output 45 | 46 | 47 | def refine_input_with_lag(oil_train, stock_train, oil_test, stock_test): 48 | prediction = neural_network_model(x) 49 | cost = tf.reduce_mean(tf.square(tf.transpose(prediction)-y)) 50 | optimizer = tf.train.AdamOptimizer().minimize(cost) 51 | #Adding lag 52 | all_lag_losses = [] 53 | for i in range(lag_range): 54 | with tf.Session() as sess: 55 | sess.run(tf.global_variables_initializer()) 56 | oil_lag, stock_lag = dp.add_lag(oil_train, stock_train, i) 57 | for epoch in range(lag_epoch_num): 58 | lag_loss = 0 59 | for (X, Y) in zip(oil_lag.values, stock_lag.values): 60 | _, c = sess.run([optimizer, cost], feed_dict={x: [[X]], y: [[Y]]}) 61 | lag_loss += c 62 | print('Lag', i, 'epoch', epoch, 'loss:', lag_loss) 63 | all_lag_losses.append(lag_loss) 64 | lag = all_lag_losses.index(min(all_lag_losses)) 65 | oil_train, stock_train = dp.add_lag(oil_train, stock_train, lag) 66 | oil_test, stock_test = dp.add_lag(oil_test, stock_test, lag) 67 | print("The best lag is:", lag) 68 | pickle.dump(lag, open("data/save.p", "wb")) 69 | return oil_train, stock_train, oil_test, stock_test 70 | 71 | 72 | def feedforward_neural_network(inputs): 73 | x = tf.placeholder('float', name='input') 74 | oil_train, stock_train, oil_test, stock_test, oil_price, stock_price = inputs 75 | prediction = neural_network_model(x) 76 | cost = tf.reduce_mean(tf.square(tf.transpose(prediction)-y)) 77 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) 78 | #oil_train, stock_train, oil_test, stock_test = inputs 79 | 80 | oil_train, stock_train, oil_test, stock_test = refine_input_with_lag(oil_train, stock_train, oil_test, stock_test) 81 | saver = tf.train.Saver() 82 | with tf.Session() as sess: 83 | sess.run(tf.global_variables_initializer()) 84 | #Running neural net 85 | for epoch in range(hm_epoch): 86 | epoch_loss = 0 87 | for (X, Y) in zip(oil_train.values, stock_train.values): 88 | _, c = sess.run([optimizer, cost], feed_dict={x: [[X]], y: [[Y]]}) 89 | epoch_loss += c 90 | print('Epoch', epoch, 'completed out of', hm_epoch, 'loss:', epoch_loss) 91 | correct = tf.subtract(prediction, y) 92 | total = 0 93 | cor = 0 94 | for (X,Y) in zip(oil_test.values, stock_test.values): 95 | total += 1 96 | if abs(correct.eval({x: [[X]], y: [[Y]]})) < 5: 97 | cor += 1 98 | print('Accuracy:', cor/total) 99 | save_path = saver.save(sess, "data/model/feedforward/feedforward.ckpt") 100 | print("Model saved in file: %s" % save_path) 101 | 102 | date_labels = oil_price.index 103 | date_labels = matplotlib.dates.date2num(date_labels.to_pydatetime()) 104 | 105 | predictions = [] 106 | for i in oil_price: 107 | predictions.append(sess.run(prediction, feed_dict={x: [[i]]})[0][0]) 108 | plt.plot_date(date_labels, predictions, 'b-', label="Feedforward Predictions") 109 | plt.plot_date(date_labels, stock_price.values, 'r-', label='Stock Prices') 110 | plt.legend() 111 | plt.ylabel('Price') 112 | plt.xlabel('Year') 113 | plt.show() 114 | 115 | 116 | if __name__ == "__main__": 117 | feedforward_neural_network(x) -------------------------------------------------------------------------------- /scripts/recurrent_lstm.py: -------------------------------------------------------------------------------- 1 | from scripts import data_process as dp 2 | from scripts.constants import * 3 | import tensorflow as tf 4 | from tensorflow.contrib import rnn 5 | import matplotlib.pyplot as plt 6 | import matplotlib 7 | import pickle 8 | 9 | # Shape of output matrix 10 | n_classes = 5 11 | rnn_size = 512 12 | chunk_size = 1 13 | n_chunks = 5 14 | # Shape of matrix input 15 | total_chunk_size = chunk_size*n_chunks 16 | 17 | x = tf.placeholder('float', name='input_recurrent') 18 | y = tf.placeholder('float', name='train_output_recurrent') 19 | 20 | 21 | def rnn_model(data): 22 | layer = {'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 23 | 'biases': tf.Variable(tf.random_normal([n_classes]))} 24 | 25 | data = tf.transpose(data, [1, 0, 2]) 26 | data = tf.reshape(data, [-1, chunk_size]) 27 | data = tf.split(data, n_chunks, 0) 28 | 29 | lstm_cell = rnn.BasicLSTMCell(rnn_size, state_is_tuple=True) 30 | outputs, states = rnn.static_rnn(lstm_cell, data, dtype=tf.float32) 31 | 32 | output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases'], name='output_recurrent') 33 | 34 | return output 35 | 36 | 37 | prediction = rnn_model(x) 38 | 39 | 40 | def refine_input_with_lag(oil_train, stock_train, oil_test, stock_test): 41 | cost = tf.reduce_mean(tf.square(prediction-y)) 42 | optimizer = tf.train.AdamOptimizer().minimize(cost) 43 | #Adding lag 44 | all_lag_losses = [] 45 | for i in range(lag_range): 46 | with tf.Session() as sess: 47 | sess.run(tf.global_variables_initializer()) 48 | oil_lag, stock_lag = dp.add_lag(oil_train, stock_train, i) 49 | for epoch in range(lag_epoch_num): 50 | lag_loss = 0 51 | for index in range(int(len(oil_lag.values)/total_chunk_size)): 52 | x_in = oil_lag.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size)) 53 | y_in = stock_lag.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size)) 54 | _, c = sess.run([optimizer, cost], feed_dict={x: x_in, y: y_in}) 55 | lag_loss += c 56 | print('Lag', i, 'epoch', epoch, 'loss:', lag_loss) 57 | all_lag_losses.append(lag_loss) 58 | lag = all_lag_losses.index(min(all_lag_losses)) 59 | oil_train, stock_train = dp.add_lag(oil_train, stock_train, lag) 60 | oil_test, stock_test = dp.add_lag(oil_test, stock_test, lag) 61 | print("The best lag is:", lag) 62 | pickle.dump(lag, open("data/lag.p", "wb")) 63 | return oil_train, stock_train, oil_test, stock_test 64 | 65 | 66 | def recurrent_neural_network(inputs): 67 | oil_train, stock_train, oil_test, stock_test, oil_price, stock_price = inputs 68 | cost = tf.reduce_mean(tf.square(prediction-y)) 69 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) 70 | #oil_train, stock_train, oil_test, stock_test = inputs 71 | 72 | oil_train, stock_train, oil_test, stock_test = refine_input_with_lag(oil_train, stock_train, oil_test, stock_test) 73 | with tf.Session() as sess: 74 | sess.run(tf.global_variables_initializer()) 75 | #Running neural net 76 | for epoch in range(hm_epoch): 77 | epoch_loss = 0 78 | for index in range(int(len(oil_train.values) / total_chunk_size)): 79 | x_in = oil_train.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size)) 80 | y_in = stock_train.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size)) 81 | _, c = sess.run([optimizer, cost], feed_dict={x: x_in, y: y_in}) 82 | epoch_loss += c 83 | print('Epoch', epoch, 'completed out of', hm_epoch, 'loss:', epoch_loss) 84 | correct = tf.reduce_mean(tf.square(tf.subtract(prediction, y))) 85 | total = 0 86 | cor = 0 87 | for index in range(int(len(oil_test.values) / total_chunk_size)): 88 | x_in = oil_test.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size)) 89 | y_in = stock_test.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size)) 90 | total += total_chunk_size 91 | if abs(correct.eval(feed_dict={x: x_in, y: y_in})) < 5: 92 | cor += total_chunk_size 93 | 94 | saver = tf.train.Saver() 95 | print('Accuracy:', cor/total) 96 | save_path = saver.save(sess, "data/model/recurrent/recurrent.ckpt") 97 | print("Model saved in file: %s" % save_path) 98 | 99 | date_labels = oil_price.index 100 | date_labels = matplotlib.dates.date2num(date_labels.to_pydatetime())[:-4] 101 | 102 | predictions = [] 103 | for index in range(int(len(oil_price.values) / total_chunk_size)): 104 | x_in = oil_price.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size)) 105 | predictions += sess.run(prediction, feed_dict={x: x_in})[0].reshape(total_chunk_size).tolist() 106 | print(len(predictions), len(date_labels)) 107 | plt.plot_date(date_labels, predictions, 'b-', label="RNN Predictions") 108 | plt.plot_date(date_labels, stock_price.values[:-4], 'r-', label='Stock Prices') 109 | plt.legend() 110 | plt.ylabel('Price') 111 | plt.xlabel('Year') 112 | plt.show() 113 | 114 | 115 | if __name__ == "__main__": 116 | recurrent_neural_network(x) 117 | -------------------------------------------------------------------------------- /scripts/ConvNet.py: -------------------------------------------------------------------------------- 1 | from scripts import data_process as dp 2 | from scripts.constants import * 3 | import tensorflow as tf 4 | import matplotlib.pyplot as plt 5 | import matplotlib 6 | import numpy as np 7 | import pickle 8 | 9 | n_classes = 20 10 | batch_size = 128 11 | input_size = (20, 160) 12 | keep_rate = 0.8 13 | keep_prob = tf.placeholder(tf.float32) 14 | 15 | 16 | def conv2d(x, W): 17 | return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') 18 | 19 | 20 | def maxpool2d(x): 21 | # size of window movement of window 22 | return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') 23 | 24 | 25 | x = tf.placeholder('float', name='input_cnn') 26 | y = tf.placeholder('float', name='train_output_cnn') 27 | 28 | 29 | def cnn_model(data): 30 | weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])), 31 | 'W_conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])), 32 | 'W_fc': tf.Variable(tf.random_normal([5 * 40 * 64, 1024])), 33 | 'out': tf.Variable(tf.random_normal([1024, n_classes]))} 34 | 35 | biases = {'b_conv1': tf.Variable(tf.random_normal([32])), 36 | 'b_conv2': tf.Variable(tf.random_normal([64])), 37 | 'b_fc': tf.Variable(tf.random_normal([1024])), 38 | 'out': tf.Variable(tf.random_normal([n_classes]))} 39 | 40 | data = tf.reshape(data, shape=[-1, 20, 160, 1]) 41 | 42 | conv1 = tf.nn.relu(conv2d(data, weights['W_conv1']) + biases['b_conv1']) 43 | conv1 = maxpool2d(conv1) 44 | 45 | conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2']) 46 | conv2 = maxpool2d(conv2) 47 | 48 | fc = tf.reshape(conv2, [-1, 5 * 40 * 64]) 49 | fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc']) 50 | fc = tf.nn.dropout(fc, keep_rate) 51 | 52 | output = tf.add(tf.matmul(fc, weights['out']), biases['out'], name='cnn_output') 53 | 54 | return output 55 | 56 | 57 | prediction = cnn_model(x) 58 | 59 | 60 | def refine_input_with_lag(oil_train, stock_train, oil_test, stock_test): 61 | cost = tf.reduce_mean(tf.square(prediction-y)) 62 | optimizer = tf.train.AdamOptimizer().minimize(cost) 63 | #Adding lag 64 | all_lag_losses = [] 65 | for i in range(lag_range): 66 | with tf.Session() as sess: 67 | sess.run(tf.global_variables_initializer()) 68 | oil_lag, stock_lag = dp.add_lag(oil_train, stock_train, i) 69 | for epoch in range(lag_epoch_num): 70 | lag_loss = 0 71 | for index in range(int(len(oil_lag.values)/input_size[0])): 72 | x_in = np.zeros((input_size[1], input_size[0], 1, 1)) 73 | for index_in, value in enumerate(oil_lag.values[index * input_size[0]:index * input_size[0] + input_size[0]]): 74 | x_in[int(value), index_in, 0, 0] = 1 75 | y_in = stock_lag.values[index * input_size[0]:index * input_size[0] + input_size[0]] 76 | _, c = sess.run([optimizer, cost], feed_dict={x: x_in, y: y_in}) 77 | lag_loss += c 78 | print('Lag', i, 'epoch', epoch, 'loss:', lag_loss) 79 | all_lag_losses.append(lag_loss) 80 | lag = all_lag_losses.index(min(all_lag_losses)) 81 | oil_train, stock_train = dp.add_lag(oil_train, stock_train, lag) 82 | oil_test, stock_test = dp.add_lag(oil_test, stock_test, lag) 83 | print("The best lag is:", lag) 84 | pickle.dump(lag, open("data/lag.p", "wb")) 85 | return oil_train, stock_train, oil_test, stock_test 86 | 87 | 88 | def conv_neural_network(inputs): 89 | oil_train, stock_train, oil_test, stock_test, oil_price, stock_price = inputs 90 | cost = tf.reduce_mean(tf.square(prediction-y)) 91 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) 92 | #oil_train, stock_train, oil_test, stock_test = inputs 93 | 94 | oil_train, stock_train, oil_test, stock_test = refine_input_with_lag(oil_train, stock_train, oil_test, stock_test) 95 | with tf.Session() as sess: 96 | sess.run(tf.global_variables_initializer()) 97 | #Running neural net 98 | for epoch in range(hm_epoch): 99 | epoch_loss = 0 100 | for index in range(int(len(oil_train.values) / input_size[0])): 101 | x_in = np.zeros((input_size[1], input_size[0], 1, 1)) 102 | for index_in, value in enumerate( 103 | oil_train.values[index * input_size[0]:index * input_size[0] + input_size[0]]): 104 | x_in[int(value), index_in, 0, 0] = 1 105 | y_in = stock_train.values[index * input_size[0]:index * input_size[0] + input_size[0]] 106 | _, c = sess.run([optimizer, cost], feed_dict={x: x_in, y: y_in}) 107 | epoch_loss += c 108 | print('Epoch', epoch, 'completed out of', hm_epoch, 'loss:', epoch_loss) 109 | correct = tf.reduce_mean(tf.square(tf.subtract(prediction, y))) 110 | total = 0 111 | cor = 0 112 | for index in range(int(len(oil_test.values) / input_size[0])): 113 | x_in = np.zeros((input_size[1], input_size[0], 1, 1)) 114 | for index_in, value in enumerate( 115 | oil_test.values[index * input_size[0]:index * input_size[0] + input_size[0]]): 116 | x_in[int(value), index_in, 0, 0] = 1 117 | y_in = stock_test.values[index * input_size[0]:index * input_size[0] + input_size[0]] 118 | total += input_size[0] 119 | if abs(correct.eval(feed_dict={x: x_in, y: y_in})) < 5: 120 | cor += input_size[0] 121 | 122 | saver = tf.train.Saver() 123 | print('Accuracy:', cor/total) 124 | save_path = saver.save(sess, "data/model/recurrent/recurrent.ckpt") 125 | print("Model saved in file: %s" % save_path) 126 | 127 | predictions = [] 128 | for index in range(int(len(oil_price.values) / input_size[0])): 129 | x_in = np.zeros((input_size[1], input_size[0], 1, 1)) 130 | for index_in, value in enumerate( 131 | oil_price.values[index * input_size[0]:index * input_size[0] + input_size[0]]): 132 | x_in[int(value), index_in, 0, 0] = 1 133 | predictions += sess.run(prediction, feed_dict={x: x_in})[0].tolist() 134 | 135 | date_labels = oil_price.index 136 | date_labels = matplotlib.dates.date2num(date_labels.to_pydatetime())[:-14] 137 | 138 | plt.plot_date(date_labels, predictions, 'b-', label="RNN Predictions") 139 | plt.plot_date(date_labels, stock_price.values[:-14], 'r-', label='Stock Prices') 140 | plt.legend() 141 | plt.ylabel('Price') 142 | plt.xlabel('Year') 143 | plt.show() 144 | 145 | 146 | if __name__ == "__main__": 147 | conv_neural_network(x) 148 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 151 | 152 | 153 | 154 | input 155 | save 156 | 157 | 158 | 159 | 161 | 162 | 178 | 179 | 180 | 181 | 182 | true 183 | DEFINITION_ORDER 184 | 185 | 186 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 |