├── misc
├── __init__.py
├── graph.py
└── arg_handler.py
├── pipeline
├── __init__.py
├── strategies
│ ├── __init__.py
│ ├── .ff_strat.py.swo
│ ├── .ff_strat.py.swp
│ └── ff_strat.py
└── backtest.py
├── scripts
├── __init__.py
├── oil.png
├── .feedforward_nn.py.swp
├── constants.py
├── data_process.py
├── feedforward_nn.py
├── recurrent_lstm.py
└── ConvNet.py
├── data
├── lag.p
├── save.p
└── stock_close.p
├── generic-data-system-finance-FSI_shutterstock-545018428-370x290.jpg
├── .idea
├── vcs.xml
├── inspectionProfiles
│ └── profiles_settings.xml
├── modules.xml
├── Quant_stock.iml
├── misc.xml
└── workspace.xml
├── driver.py
├── examples
├── test.py
├── ex2.py
├── international-airline-passengers.csv
└── passenger.py
├── README.md
├── LICENSE
└── .gitignore
/misc/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pipeline/strategies/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data/lag.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/data/lag.p
--------------------------------------------------------------------------------
/data/save.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/data/save.p
--------------------------------------------------------------------------------
/scripts/oil.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/scripts/oil.png
--------------------------------------------------------------------------------
/data/stock_close.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/data/stock_close.p
--------------------------------------------------------------------------------
/scripts/.feedforward_nn.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/scripts/.feedforward_nn.py.swp
--------------------------------------------------------------------------------
/pipeline/strategies/.ff_strat.py.swo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/pipeline/strategies/.ff_strat.py.swo
--------------------------------------------------------------------------------
/pipeline/strategies/.ff_strat.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/pipeline/strategies/.ff_strat.py.swp
--------------------------------------------------------------------------------
/generic-data-system-finance-FSI_shutterstock-545018428-370x290.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ethic-ai-dev/bloomberg-gpt-stock-trader/HEAD/generic-data-system-finance-FSI_shutterstock-545018428-370x290.jpg
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/driver.py:
--------------------------------------------------------------------------------
1 | from misc.arg_handler import arg_parser, InputHandler
2 |
3 |
4 | def main():
5 | inputs = arg_parser()
6 | InputHandler(inputs)
7 |
8 |
9 | if __name__ == "__main__":
10 | main()
11 |
--------------------------------------------------------------------------------
/scripts/constants.py:
--------------------------------------------------------------------------------
1 | hm_epoch = 5 # Number of epoch in training
2 | lag_range = 1 # Lag range
3 | lag_epoch_num = 1 # Number of epoch while finding lag
4 |
5 | learning_rate = 0.001 # Default learning rate
6 |
7 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/Quant_stock.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/examples/test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib as mpl
4 | import matplotlib.pyplot as plt
5 |
6 | url = "https://www.quandl.com/api/v3/datasets/CHRIS/CME_CL1.csv"
7 | wticl1 = pd.read_csv(url, index_col=0, parse_dates=True)
8 | wticl1.sort_index(inplace=True)
9 | wticl1_last = wticl1['Last']
10 | wticl1['PctCh'] = wticl1.Last.pct_change()
11 |
12 | print(type(wticl1_last))
13 | print(wticl1_last)
14 | #fig = plt.figure(figsize=[7,5])
15 | #ax1 = plt.subplot(111)
16 | #line = wticl1_last.plot(color='red',linewidth=3)
17 | #ax1.set_ylabel('USD per barrel')
18 | #ax1.set_xlabel('')
19 | #ax1.set_title('WTI Crude Oil Price', fontsize=18)
20 | #ax1.spines["top"].set_visible(False)
21 | #ax1.spines["right"].set_visible(False)
22 | #ax1.get_xaxis().tick_bottom()
23 | #ax1.get_yaxis().tick_left()
24 | #ax1.tick_params(axis='x', which='major', labelsize=8)
25 | #plt.savefig('oil.png', dpi=1000)
26 | #plt.show()
27 |
28 |
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # BloombergGPT-based Stock Market Trader POC
2 | Stock analysis/prediction model using machine learning using the impact between different out-of-the-market factors (weather, etc.) and the stock prices.
3 |
4 | ---
5 |
6 | 
7 |
8 | ## Models used
9 | There are three ML model that are being implemented:
10 | * A simple feedforward neural network
11 | * A recurrent neural network with LSTM (long short term memory)
12 | * BloombergGPT for processing raw text
13 |
14 | ## Accuracy measurements
15 | The pipeline implemented is using [backtrader](https://www.backtrader.com) to implement backtesting in order to test each individual strategy. In the future, it is worthwhile to try using a genetic algorithm to better figure the accuracy of the model.
16 |
17 | ## Usage
18 | There are three main usages for this project:
19 |
20 | run `python driver.py -t model_name` to train
21 |
22 | run `python driver.py -b model_name` to backtest the model
23 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Lam Nguyen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/scripts/data_process.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data
3 | import pickle
4 |
5 |
6 | def create_data():
7 | url = "data/CME_CL1.csv"
8 | crude_oil = pd.read_csv(url, index_col=0, parse_dates=True)
9 | crude_oil.sort_index(inplace=True)
10 | crude_oil_last = crude_oil['Last']
11 |
12 | param = {
13 | 'q': 'XOM',
14 | 'i': 86400,
15 | 'x': "NYSE",
16 | 'p': '40Y'
17 | }
18 |
19 | try:
20 | print("Retrieving data")
21 | df = get_price_data(param)
22 | df.set_index(df.index.normalize(), inplace=True)
23 | stock_close = df['Close']
24 | pickle.dump(stock_close, open("data/stock_close.p", "wb"))
25 | except:
26 | print("Error in retrieving data... Loading previous saved stock data.")
27 | stock_close = pickle.load(open("data/stock_close.p", "rb"))
28 |
29 | oil_price, stock_price = crude_oil_last.align(stock_close, join='inner')
30 |
31 | split_index = int(3*len(oil_price)/4)
32 | oil_train = oil_price.iloc[:split_index]
33 | stock_train = oil_price.iloc[:split_index]
34 |
35 | oil_test = oil_price.iloc[split_index:]
36 | stock_test = oil_price.iloc[split_index:]
37 |
38 | return oil_train, stock_train, oil_test, stock_test, oil_price, stock_price
39 |
40 |
41 | def add_lag(dataset_1, dataset_2, lag):
42 | if lag != 0:
43 | dataset_2 = dataset_2[lag:]
44 | dataset_1 = dataset_1[:-lag]
45 |
46 | return dataset_1, dataset_2
47 |
48 |
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
--------------------------------------------------------------------------------
/misc/graph.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import matplotlib
3 | import tensorflow as tf
4 | from scripts import data_process as dp
5 |
6 |
7 | n_classes = 2
8 | chunk_size = 1
9 | rnn_size = 512
10 | n_chunks = 2
11 | total_chunk_size = chunk_size*n_chunks
12 |
13 |
14 | def graph(models):
15 | for model in models:
16 | print("Loading pre-trained model...")
17 | sess = tf.Session()
18 | saver = tf.train.import_meta_graph("data/model/"+str(model)+'/'+str(model)+'.ckpt.meta')
19 | saver.restore(sess, tf.train.latest_checkpoint('data/model/'+str(model)))
20 | print("Model loaded...")
21 |
22 | graph = tf.get_default_graph()
23 | if model == 'feedforward':
24 | x = graph.get_tensor_by_name('input:0')
25 | prediction = graph.get_tensor_by_name('output:0')
26 | elif model == 'recurrent':
27 | x = graph.get_tensor_by_name('input_recurrent:0')
28 | prediction = graph.get_tensor_by_name('output_recurrent:0')
29 | _, _, _, _, oil_price, stock_price = dp.create_data()
30 |
31 | predictions = []
32 | if model == 'feedforward':
33 | date_labels = oil_price.index
34 | date_labels = matplotlib.dates.date2num(date_labels.to_pydatetime())
35 | for i in oil_price:
36 | predictions.append(sess.run(prediction, feed_dict={x: [[i]]})[0][0])
37 | elif model == 'recurrent':
38 | predictions = []
39 | for index in range(int(len(oil_price.values) / total_chunk_size)):
40 | x_in = oil_price.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape(
41 | (1, n_chunks, chunk_size))
42 | predictions += sess.run(prediction, feed_dict={x: x_in})[0].reshape(total_chunk_size).tolist()
43 |
44 | plt.plot_date(date_labels, predictions, 'b-', label="Feedforward Predictions")
45 | plt.plot_date(date_labels, stock_price.values, 'r-', label='Stock Prices')
46 | plt.legend()
47 | plt.ylabel('Price')
48 | plt.xlabel('Year')
49 | plt.show()
50 |
--------------------------------------------------------------------------------
/misc/arg_handler.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pipeline import backtest
3 | from pipeline.backtest import TestStrategy
4 | from pipeline.strategies.ff_strat import FeedforwardStrategy
5 | from scripts import feedforward_nn, recurrent_lstm, ConvNet
6 | from scripts import data_process as dp
7 | from misc.graph import graph
8 |
9 |
10 | def arg_parser():
11 | parser = argparse.ArgumentParser(description="Stock prediction model", formatter_class=argparse.RawTextHelpFormatter)
12 | parser.add_argument('-b', '--btest', help='Run backtest with the model',
13 | default=None, choices=['test', 'feedforward', 'recurrent', 'cnn'])
14 | parser.add_argument('-t', '--train', help='Train a model', default=None,
15 | choices=['feedforward', 'recurrent', 'cnn'])
16 | parser.add_argument('-g', '--graph', help='Graph', nargs='*', choices=['test', 'feedforward', 'recurrent', 'cnn'])
17 |
18 | args = parser.parse_args()
19 | return args
20 |
21 |
22 | class InputHandler:
23 | def __init__(self, inputs):
24 | self.inputs = inputs
25 | if self.inputs.train:
26 | self.train(self.inputs.train)
27 | if self.inputs.btest:
28 | if self.inputs.btest == "test":
29 | self.run(TestStrategy)
30 | elif self.inputs.btest == "feedforward":
31 | self.run(FeedforwardStrategy)
32 | if self.inputs.graph:
33 | if len(self.inputs.graph) != 0:
34 | graph(self.inputs.graph)
35 | else:
36 | graph(['feedforward', 'recurrent', 'cnn'])
37 |
38 | @staticmethod
39 | def run(strategy):
40 | backtest_obj = backtest.Backtest(stock_symbol='XOM', strategy=strategy)
41 | backtest_obj.run(plot=True)
42 |
43 | @staticmethod
44 | def train(model):
45 | inputs = dp.create_data()
46 | if model == "feedforward":
47 | feedforward_nn.feedforward_neural_network(inputs)
48 | elif model == "recurrent":
49 | recurrent_lstm.recurrent_neural_network(inputs)
50 | elif model == 'cnn':
51 | ConvNet.conv_neural_network(inputs)
52 |
--------------------------------------------------------------------------------
/examples/ex2.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.examples.tutorials.mnist import input_data
3 | from tensorflow.contrib import rnn
4 |
5 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
6 |
7 | n_classes = 10
8 | batch_size = 128
9 | epochs_num = 10
10 |
11 | chunk_size = 28
12 | chunk_num = 28
13 | rnn_size = 128
14 |
15 | x = tf.placeholder('float', [None, chunk_num, chunk_size])
16 | y = tf.placeholder('float')
17 |
18 | learning_rate = 0.001
19 |
20 | def recurrent_neural_network(x):
21 | layer = {'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
22 | 'biases': tf.Variable(tf.random_normal([n_classes]))}
23 |
24 | x = tf.transpose(x, [1, 0, 2])
25 | x = tf.reshape(x, [-1, chunk_size])
26 | x = tf.split(x, chunk_num, 0)
27 |
28 | lstm_cell = rnn.BasicLSTMCell(rnn_size)
29 | outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
30 |
31 | output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']
32 |
33 | return output
34 |
35 | def train_neural_network(x):
36 | prediction = recurrent_neural_network(x)
37 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction) )
38 |
39 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
40 |
41 | with tf.Session() as sess:
42 | sess.run(tf.global_variables_initializer())
43 | for epoch in range(epochs_num):
44 | epoch_loss = 0
45 | for _ in range(int(mnist.train.num_examples/batch_size)):
46 | epoch_x, epoch_y = mnist.train.next_batch(batch_size)
47 | epoch_x = epoch_x.reshape((batch_size, chunk_num, chunk_size ))
48 | _, c = sess.run([optimizer, cost], feed_dict = {x:epoch_x, y:epoch_y})
49 | epoch_loss += c
50 | print ('Epoch', epoch, 'completed out of', epochs_num, 'loss:', epoch_loss)
51 |
52 | correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
53 | accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
54 | print('Accuracy:', accuracy.eval({x:mnist.test.images.reshape((-1,chunk_num,chunk_size)), y:mnist.test.labels }))
55 |
56 | train_neural_network(x)
57 |
--------------------------------------------------------------------------------
/examples/international-airline-passengers.csv:
--------------------------------------------------------------------------------
1 | "Month","International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60"
2 | "1949-01",112
3 | "1949-02",118
4 | "1949-03",132
5 | "1949-04",129
6 | "1949-05",121
7 | "1949-06",135
8 | "1949-07",148
9 | "1949-08",148
10 | "1949-09",136
11 | "1949-10",119
12 | "1949-11",104
13 | "1949-12",118
14 | "1950-01",115
15 | "1950-02",126
16 | "1950-03",141
17 | "1950-04",135
18 | "1950-05",125
19 | "1950-06",149
20 | "1950-07",170
21 | "1950-08",170
22 | "1950-09",158
23 | "1950-10",133
24 | "1950-11",114
25 | "1950-12",140
26 | "1951-01",145
27 | "1951-02",150
28 | "1951-03",178
29 | "1951-04",163
30 | "1951-05",172
31 | "1951-06",178
32 | "1951-07",199
33 | "1951-08",199
34 | "1951-09",184
35 | "1951-10",162
36 | "1951-11",146
37 | "1951-12",166
38 | "1952-01",171
39 | "1952-02",180
40 | "1952-03",193
41 | "1952-04",181
42 | "1952-05",183
43 | "1952-06",218
44 | "1952-07",230
45 | "1952-08",242
46 | "1952-09",209
47 | "1952-10",191
48 | "1952-11",172
49 | "1952-12",194
50 | "1953-01",196
51 | "1953-02",196
52 | "1953-03",236
53 | "1953-04",235
54 | "1953-05",229
55 | "1953-06",243
56 | "1953-07",264
57 | "1953-08",272
58 | "1953-09",237
59 | "1953-10",211
60 | "1953-11",180
61 | "1953-12",201
62 | "1954-01",204
63 | "1954-02",188
64 | "1954-03",235
65 | "1954-04",227
66 | "1954-05",234
67 | "1954-06",264
68 | "1954-07",302
69 | "1954-08",293
70 | "1954-09",259
71 | "1954-10",229
72 | "1954-11",203
73 | "1954-12",229
74 | "1955-01",242
75 | "1955-02",233
76 | "1955-03",267
77 | "1955-04",269
78 | "1955-05",270
79 | "1955-06",315
80 | "1955-07",364
81 | "1955-08",347
82 | "1955-09",312
83 | "1955-10",274
84 | "1955-11",237
85 | "1955-12",278
86 | "1956-01",284
87 | "1956-02",277
88 | "1956-03",317
89 | "1956-04",313
90 | "1956-05",318
91 | "1956-06",374
92 | "1956-07",413
93 | "1956-08",405
94 | "1956-09",355
95 | "1956-10",306
96 | "1956-11",271
97 | "1956-12",306
98 | "1957-01",315
99 | "1957-02",301
100 | "1957-03",356
101 | "1957-04",348
102 | "1957-05",355
103 | "1957-06",422
104 | "1957-07",465
105 | "1957-08",467
106 | "1957-09",404
107 | "1957-10",347
108 | "1957-11",305
109 | "1957-12",336
110 | "1958-01",340
111 | "1958-02",318
112 | "1958-03",362
113 | "1958-04",348
114 | "1958-05",363
115 | "1958-06",435
116 | "1958-07",491
117 | "1958-08",505
118 | "1958-09",404
119 | "1958-10",359
120 | "1958-11",310
121 | "1958-12",337
122 | "1959-01",360
123 | "1959-02",342
124 | "1959-03",406
125 | "1959-04",396
126 | "1959-05",420
127 | "1959-06",472
128 | "1959-07",548
129 | "1959-08",559
130 | "1959-09",463
131 | "1959-10",407
132 | "1959-11",362
133 | "1959-12",405
134 | "1960-01",417
135 | "1960-02",391
136 | "1960-03",419
137 | "1960-04",461
138 | "1960-05",472
139 | "1960-06",535
140 | "1960-07",622
141 | "1960-08",606
142 | "1960-09",508
143 | "1960-10",461
144 | "1960-11",390
145 | "1960-12",432
146 |
147 | International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60
148 |
149 |
--------------------------------------------------------------------------------
/examples/passenger.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import matplotlib.pyplot as plt
3 | from pandas import read_csv
4 | import math
5 | from keras.models import Sequential
6 | from keras.layers import Dense
7 | from keras.layers import LSTM
8 | from sklearn.preprocessing import MinMaxScaler
9 | from sklearn.metrics import mean_squared_error
10 | # convert an array of values into a dataset matrix
11 | def create_dataset(dataset, look_back=1):
12 | dataX, dataY = [], []
13 | for i in range(len(dataset)-look_back-1):
14 | a = dataset[i:(i+look_back), 0]
15 | dataX.append(a)
16 | dataY.append(dataset[i + look_back, 0])
17 | return numpy.array(dataX), numpy.array(dataY)
18 | # fix random seed for reproducibility
19 | numpy.random.seed(7)
20 | # load the dataset
21 | dataframe = read_csv('international-airline-passengers.csv', usecols=[1], engine='python', skipfooter=3)
22 | dataset = dataframe.values
23 | dataset = dataset.astype('float32')
24 | # normalize the dataset
25 | scaler = MinMaxScaler(feature_range=(0, 1))
26 | dataset = scaler.fit_transform(dataset)
27 | # split into train and test sets
28 | train_size = int(len(dataset) * 0.67)
29 | test_size = len(dataset) - train_size
30 | train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
31 | # reshape into X=t and Y=t+1
32 | look_back = 3
33 | trainX, trainY = create_dataset(train, look_back)
34 | testX, testY = create_dataset(test, look_back)
35 | # reshape input to be [samples, time steps, features]
36 | trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
37 | testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1))
38 | # create and fit the LSTM network
39 | model = Sequential()
40 | model.add(LSTM(4, input_shape=(look_back, 1)))
41 | model.add(Dense(1))
42 | model.compile(loss='mean_squared_error', optimizer='adam')
43 | model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)
44 | # make predictions
45 | trainPredict = model.predict(trainX)
46 | testPredict = model.predict(testX)
47 | # invert predictions
48 | trainPredict = scaler.inverse_transform(trainPredict)
49 | trainY = scaler.inverse_transform([trainY])
50 | testPredict = scaler.inverse_transform(testPredict)
51 | testY = scaler.inverse_transform([testY])
52 | # calculate root mean squared error
53 | trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
54 | print('Train Score: %.2f RMSE' % (trainScore))
55 | testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
56 | print('Test Score: %.2f RMSE' % (testScore))
57 | # shift train predictions for plotting
58 | trainPredictPlot = numpy.empty_like(dataset)
59 | trainPredictPlot[:, :] = numpy.nan
60 | trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
61 | # shift test predictions for plotting
62 | testPredictPlot = numpy.empty_like(dataset)
63 | testPredictPlot[:, :] = numpy.nan
64 | testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
65 | # plot baseline and predictions
66 | plt.plot(scaler.inverse_transform(dataset))
67 | plt.plot(trainPredictPlot)
68 | plt.plot(testPredictPlot)
69 | plt.show()
70 |
--------------------------------------------------------------------------------
/pipeline/backtest.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import backtrader as bt
3 |
4 |
5 | class SmaCross(bt.SignalStrategy):
6 | params = (('pfast', 10), ('pslow', 30),)
7 | def __init__(self):
8 | sma1, sma2 = bt.ind.SMA(period=self.p.pfast), bt.ind.SMA(period=self.p.pslow)
9 | self.signal_add(bt.SIGNAL_LONG, bt.ind.CrossOver(sma1, sma2))
10 |
11 |
12 | # Create a Stratey
13 | class TestStrategy(bt.Strategy):
14 | def log(self, txt, dt=None):
15 | ''' Logging function fot this strategy'''
16 | dt = dt or self.datas[0].datetime.date(0)
17 | print('%s, %s' % (dt.isoformat(), txt))
18 |
19 | def __init__(self):
20 | # Keep a reference to the "close" line in the data[0] dataseries
21 | self.dataclose = self.datas[0].close
22 |
23 | # To keep track of pending orders and buy price/commission
24 | self.order = None
25 | self.buyprice = None
26 | self.buycomm = None
27 |
28 | def notify_order(self, order):
29 | if order.status in [order.Submitted, order.Accepted]:
30 | # Buy/Sell order submitted/accepted to/by broker - Nothing to do
31 | return
32 |
33 | # Check if an order has been completed
34 | # Attention: broker could reject order if not enougth cash
35 | if order.status in [order.Completed]:
36 | if order.isbuy():
37 | self.log(
38 | 'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
39 | (order.executed.price,
40 | order.executed.value,
41 | order.executed.comm))
42 |
43 | self.buyprice = order.executed.price
44 | self.buycomm = order.executed.comm
45 | else: # Sell
46 | self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
47 | (order.executed.price,
48 | order.executed.value,
49 | order.executed.comm))
50 |
51 | self.bar_executed = len(self)
52 |
53 | elif order.status in [order.Canceled, order.Margin, order.Rejected]:
54 | self.log('Order Canceled/Margin/Rejected')
55 |
56 | self.order = None
57 |
58 | def notify_trade(self, trade):
59 | if not trade.isclosed:
60 | return
61 |
62 | self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' %
63 | (trade.pnl, trade.pnlcomm))
64 |
65 | def next(self):
66 | # Simply log the closing price of the series from the reference
67 | self.log('Close, %.2f' % self.dataclose[0])
68 |
69 | # Check if an order is pending ... if yes, we cannot send a 2nd one
70 | if self.order:
71 | return
72 |
73 | # Check if we are in the market
74 | if not self.position:
75 |
76 | # Not yet ... we MIGHT BUY if ...
77 | if self.dataclose[0] < self.dataclose[-1]:
78 | # current close less than previous close
79 |
80 | if self.dataclose[-1] < self.dataclose[-2]:
81 | # previous close less than the previous close
82 |
83 | # BUY, BUY, BUY!!! (with default parameters)
84 | self.log('BUY CREATE, %.2f' % self.dataclose[0])
85 |
86 | # Keep track of the created order to avoid a 2nd order
87 | self.order = self.buy()
88 |
89 | else:
90 |
91 | # Already in the market ... we might sell
92 | if len(self) >= (self.bar_executed + 5):
93 | # SELL, SELL, SELL!!! (with all possible default parameters)
94 | self.log('SELL CREATE, %.2f' % self.dataclose[0])
95 |
96 | # Keep track of the created order to avoid a 2nd order
97 | self.order = self.sell()
98 |
99 |
100 | class Backtest:
101 | def __init__(self, stock_symbol, start=datetime(2010, 1, 1),
102 | end=datetime(2017, 1, 1), strategy=TestStrategy):
103 | self.cerebro = bt.Cerebro()
104 | self.cerebro.broker.setcommission(commission=0.001)
105 | self.cerebro.broker.setcash(100000)
106 | data = bt.feeds.YahooFinanceData(dataname=stock_symbol, fromdate=start,
107 | todate=end)
108 | self.cerebro.adddata(data)
109 | print("Value before transactions:", self.cerebro.broker.getvalue())
110 | self.cerebro.addstrategy(strategy)
111 |
112 | def run(self, plot=False):
113 | self.cerebro.run()
114 | print("Value after transactions:", self.cerebro.broker.getvalue())
115 | if plot:
116 | self.cerebro.plot()
117 |
--------------------------------------------------------------------------------
/pipeline/strategies/ff_strat.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import backtrader as bt
3 | import tensorflow as tf
4 | from scripts import data_process as dp
5 | import pickle
6 |
7 |
8 | class FeedforwardStrategy(bt.Strategy):
9 | def log(self, txt, dt=None):
10 | ''' Logging function fot this strategy'''
11 | dt = dt or self.datas[0].datetime.date(0)
12 | print('%s, %s' % (dt.isoformat(), txt))
13 |
14 | def __init__(self):
15 | # Keep a reference to the "close" line in the data[0] dataseries
16 | self.dataclose = self.datas[0].close
17 |
18 | # To keep track of pending orders and buy price/commission
19 | self.order = None
20 | self.buyprice = None
21 | self.buycomm = None
22 |
23 | print("Loading pre-trained model...")
24 | self.sess = tf.Session()
25 | self.saver = tf.train.import_meta_graph("data/model/feedforward/feedforward.ckpt.meta")
26 | self.saver.restore(self.sess, tf.train.latest_checkpoint('data/model/feedforward'))
27 | print("Model loaded...")
28 |
29 | self.graph = tf.get_default_graph()
30 | self.x = self.graph.get_tensor_by_name('input:0')
31 | self.prediction = self.graph.get_tensor_by_name('output:0')
32 | _, _, _, _, self.oil_price, self.stock_price = dp.create_data()
33 |
34 | self.prediction_graph()
35 |
36 | def notify_order(self, order):
37 | if order.status in [order.Submitted, order.Accepted]:
38 | # Buy/Sell order submitted/accepted to/by broker - Nothing to do
39 | return
40 | # Check if an order has been completed
41 | # Attention: broker could reject order if not enougth cash
42 | if order.status in [order.Completed]:
43 | if order.isbuy():
44 | self.log(
45 | 'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
46 | (order.executed.price,
47 | order.executed.value,
48 | order.executed.comm))
49 | self.buyprice = order.executed.price
50 | self.buycomm = order.executed.comm
51 | else: # Sell
52 | self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
53 | (order.executed.price,
54 | order.executed.value,
55 | order.executed.comm))
56 | self.bar_executed = len(self)
57 | elif order.status in [order.Canceled, order.Margin, order.Rejected]:
58 | self.log('Order Canceled/Margin/Rejected')
59 | self.order = None
60 |
61 | def notify_trade(self, trade):
62 | if not trade.isclosed:
63 | return
64 | self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' %
65 | (trade.pnl, trade.pnlcomm))
66 |
67 | def prediction_graph(self):
68 | predictions = []
69 | for i in self.oil_price:
70 | predictions.append(self.sess.run(self.prediction, feed_dict={self.x: [[i]]})[0][0])
71 | plt.plot(self.oil_price.values, label='Oil Prices')
72 | plt.plot(self.stock_price.values, label='Stock Prices')
73 | plt.plot(predictions, label="Predictions")
74 | plt.legend()
75 | plt.ylabel('Price')
76 | plt.xlabel('Date')
77 | plt.show()
78 |
79 | def next(self):
80 | # Simply log the closing price of the series from the reference
81 | #self.log('Close, %.2f' % self.dataclose[0])
82 | # Check if an order is pending ... if yes, we cannot send a 2nd one
83 | if self.order:
84 | return
85 | # Check if we are in the market
86 | if not self.position:
87 | # Not yet ... we MIGHT BUY if ...
88 | if self.datas[0].datetime.date(0) in self.oil_price:
89 | if self.sess.run(self.prediction,
90 | feed_dict={self.x: [[self.oil_price[self.datas[0].datetime.date(0)]]]}) > self.dataclose[0]:
91 | # previous close less than the previous close
92 | # BUY, BUY, BUY!!! (with default parameters)
93 | self.log('BUY CREATE, %.2f' % self.dataclose[0])
94 | # Keep track of the created order to avoid a 2nd order
95 | self.order = self.buy()
96 |
97 | else:
98 | # Already in the market ... we might sell
99 | if len(self) >= (self.bar_executed + 2):
100 | # SELL, SELL, SELL!!! (with all possible default parameters)
101 | self.log('SELL CREATE, %.2f' % self.dataclose[0])
102 | # Keep track of the created order to avoid a 2nd order
103 | self.order = self.sell()
--------------------------------------------------------------------------------
/scripts/feedforward_nn.py:
--------------------------------------------------------------------------------
1 | from scripts import data_process as dp
2 | import matplotlib
3 | from scripts.constants import *
4 | import tensorflow as tf
5 | import matplotlib.pyplot as plt
6 | import pickle
7 |
8 | n_nodes_hl1 = 500
9 | n_nodes_hl2 = 500
10 | n_nodes_hl3 = 500
11 |
12 | n_classes = 1
13 | batch_size = 100
14 |
15 | x = tf.placeholder('float')
16 | y = tf.placeholder('float')
17 |
18 |
19 | def neural_network_model(data):
20 | hidden_1_layer = {'weights': tf.Variable(tf.random_normal([1, n_nodes_hl1])),
21 | 'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))}
22 |
23 | hidden_2_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
24 | 'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}
25 |
26 | hidden_3_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
27 | 'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))}
28 |
29 | output_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
30 | 'biases': tf.Variable(tf.random_normal([n_classes])),}
31 |
32 | l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
33 | l1 = tf.nn.relu(l1)
34 |
35 | l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
36 | l2 = tf.nn.relu(l2)
37 |
38 | l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
39 | l3 = tf.nn.relu(l3)
40 |
41 | output = tf.add(tf.matmul(l3, output_layer['weights']),
42 | output_layer['biases'], name="output")
43 |
44 | return output
45 |
46 |
47 | def refine_input_with_lag(oil_train, stock_train, oil_test, stock_test):
48 | prediction = neural_network_model(x)
49 | cost = tf.reduce_mean(tf.square(tf.transpose(prediction)-y))
50 | optimizer = tf.train.AdamOptimizer().minimize(cost)
51 | #Adding lag
52 | all_lag_losses = []
53 | for i in range(lag_range):
54 | with tf.Session() as sess:
55 | sess.run(tf.global_variables_initializer())
56 | oil_lag, stock_lag = dp.add_lag(oil_train, stock_train, i)
57 | for epoch in range(lag_epoch_num):
58 | lag_loss = 0
59 | for (X, Y) in zip(oil_lag.values, stock_lag.values):
60 | _, c = sess.run([optimizer, cost], feed_dict={x: [[X]], y: [[Y]]})
61 | lag_loss += c
62 | print('Lag', i, 'epoch', epoch, 'loss:', lag_loss)
63 | all_lag_losses.append(lag_loss)
64 | lag = all_lag_losses.index(min(all_lag_losses))
65 | oil_train, stock_train = dp.add_lag(oil_train, stock_train, lag)
66 | oil_test, stock_test = dp.add_lag(oil_test, stock_test, lag)
67 | print("The best lag is:", lag)
68 | pickle.dump(lag, open("data/save.p", "wb"))
69 | return oil_train, stock_train, oil_test, stock_test
70 |
71 |
72 | def feedforward_neural_network(inputs):
73 | x = tf.placeholder('float', name='input')
74 | oil_train, stock_train, oil_test, stock_test, oil_price, stock_price = inputs
75 | prediction = neural_network_model(x)
76 | cost = tf.reduce_mean(tf.square(tf.transpose(prediction)-y))
77 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
78 | #oil_train, stock_train, oil_test, stock_test = inputs
79 |
80 | oil_train, stock_train, oil_test, stock_test = refine_input_with_lag(oil_train, stock_train, oil_test, stock_test)
81 | saver = tf.train.Saver()
82 | with tf.Session() as sess:
83 | sess.run(tf.global_variables_initializer())
84 | #Running neural net
85 | for epoch in range(hm_epoch):
86 | epoch_loss = 0
87 | for (X, Y) in zip(oil_train.values, stock_train.values):
88 | _, c = sess.run([optimizer, cost], feed_dict={x: [[X]], y: [[Y]]})
89 | epoch_loss += c
90 | print('Epoch', epoch, 'completed out of', hm_epoch, 'loss:', epoch_loss)
91 | correct = tf.subtract(prediction, y)
92 | total = 0
93 | cor = 0
94 | for (X,Y) in zip(oil_test.values, stock_test.values):
95 | total += 1
96 | if abs(correct.eval({x: [[X]], y: [[Y]]})) < 5:
97 | cor += 1
98 | print('Accuracy:', cor/total)
99 | save_path = saver.save(sess, "data/model/feedforward/feedforward.ckpt")
100 | print("Model saved in file: %s" % save_path)
101 |
102 | date_labels = oil_price.index
103 | date_labels = matplotlib.dates.date2num(date_labels.to_pydatetime())
104 |
105 | predictions = []
106 | for i in oil_price:
107 | predictions.append(sess.run(prediction, feed_dict={x: [[i]]})[0][0])
108 | plt.plot_date(date_labels, predictions, 'b-', label="Feedforward Predictions")
109 | plt.plot_date(date_labels, stock_price.values, 'r-', label='Stock Prices')
110 | plt.legend()
111 | plt.ylabel('Price')
112 | plt.xlabel('Year')
113 | plt.show()
114 |
115 |
116 | if __name__ == "__main__":
117 | feedforward_neural_network(x)
--------------------------------------------------------------------------------
/scripts/recurrent_lstm.py:
--------------------------------------------------------------------------------
1 | from scripts import data_process as dp
2 | from scripts.constants import *
3 | import tensorflow as tf
4 | from tensorflow.contrib import rnn
5 | import matplotlib.pyplot as plt
6 | import matplotlib
7 | import pickle
8 |
9 | # Shape of output matrix
10 | n_classes = 5
11 | rnn_size = 512
12 | chunk_size = 1
13 | n_chunks = 5
14 | # Shape of matrix input
15 | total_chunk_size = chunk_size*n_chunks
16 |
17 | x = tf.placeholder('float', name='input_recurrent')
18 | y = tf.placeholder('float', name='train_output_recurrent')
19 |
20 |
21 | def rnn_model(data):
22 | layer = {'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
23 | 'biases': tf.Variable(tf.random_normal([n_classes]))}
24 |
25 | data = tf.transpose(data, [1, 0, 2])
26 | data = tf.reshape(data, [-1, chunk_size])
27 | data = tf.split(data, n_chunks, 0)
28 |
29 | lstm_cell = rnn.BasicLSTMCell(rnn_size, state_is_tuple=True)
30 | outputs, states = rnn.static_rnn(lstm_cell, data, dtype=tf.float32)
31 |
32 | output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases'], name='output_recurrent')
33 |
34 | return output
35 |
36 |
37 | prediction = rnn_model(x)
38 |
39 |
40 | def refine_input_with_lag(oil_train, stock_train, oil_test, stock_test):
41 | cost = tf.reduce_mean(tf.square(prediction-y))
42 | optimizer = tf.train.AdamOptimizer().minimize(cost)
43 | #Adding lag
44 | all_lag_losses = []
45 | for i in range(lag_range):
46 | with tf.Session() as sess:
47 | sess.run(tf.global_variables_initializer())
48 | oil_lag, stock_lag = dp.add_lag(oil_train, stock_train, i)
49 | for epoch in range(lag_epoch_num):
50 | lag_loss = 0
51 | for index in range(int(len(oil_lag.values)/total_chunk_size)):
52 | x_in = oil_lag.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size))
53 | y_in = stock_lag.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size))
54 | _, c = sess.run([optimizer, cost], feed_dict={x: x_in, y: y_in})
55 | lag_loss += c
56 | print('Lag', i, 'epoch', epoch, 'loss:', lag_loss)
57 | all_lag_losses.append(lag_loss)
58 | lag = all_lag_losses.index(min(all_lag_losses))
59 | oil_train, stock_train = dp.add_lag(oil_train, stock_train, lag)
60 | oil_test, stock_test = dp.add_lag(oil_test, stock_test, lag)
61 | print("The best lag is:", lag)
62 | pickle.dump(lag, open("data/lag.p", "wb"))
63 | return oil_train, stock_train, oil_test, stock_test
64 |
65 |
66 | def recurrent_neural_network(inputs):
67 | oil_train, stock_train, oil_test, stock_test, oil_price, stock_price = inputs
68 | cost = tf.reduce_mean(tf.square(prediction-y))
69 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
70 | #oil_train, stock_train, oil_test, stock_test = inputs
71 |
72 | oil_train, stock_train, oil_test, stock_test = refine_input_with_lag(oil_train, stock_train, oil_test, stock_test)
73 | with tf.Session() as sess:
74 | sess.run(tf.global_variables_initializer())
75 | #Running neural net
76 | for epoch in range(hm_epoch):
77 | epoch_loss = 0
78 | for index in range(int(len(oil_train.values) / total_chunk_size)):
79 | x_in = oil_train.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size))
80 | y_in = stock_train.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size))
81 | _, c = sess.run([optimizer, cost], feed_dict={x: x_in, y: y_in})
82 | epoch_loss += c
83 | print('Epoch', epoch, 'completed out of', hm_epoch, 'loss:', epoch_loss)
84 | correct = tf.reduce_mean(tf.square(tf.subtract(prediction, y)))
85 | total = 0
86 | cor = 0
87 | for index in range(int(len(oil_test.values) / total_chunk_size)):
88 | x_in = oil_test.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size))
89 | y_in = stock_test.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size))
90 | total += total_chunk_size
91 | if abs(correct.eval(feed_dict={x: x_in, y: y_in})) < 5:
92 | cor += total_chunk_size
93 |
94 | saver = tf.train.Saver()
95 | print('Accuracy:', cor/total)
96 | save_path = saver.save(sess, "data/model/recurrent/recurrent.ckpt")
97 | print("Model saved in file: %s" % save_path)
98 |
99 | date_labels = oil_price.index
100 | date_labels = matplotlib.dates.date2num(date_labels.to_pydatetime())[:-4]
101 |
102 | predictions = []
103 | for index in range(int(len(oil_price.values) / total_chunk_size)):
104 | x_in = oil_price.values[index * total_chunk_size:index * total_chunk_size + total_chunk_size].reshape((1, n_chunks, chunk_size))
105 | predictions += sess.run(prediction, feed_dict={x: x_in})[0].reshape(total_chunk_size).tolist()
106 | print(len(predictions), len(date_labels))
107 | plt.plot_date(date_labels, predictions, 'b-', label="RNN Predictions")
108 | plt.plot_date(date_labels, stock_price.values[:-4], 'r-', label='Stock Prices')
109 | plt.legend()
110 | plt.ylabel('Price')
111 | plt.xlabel('Year')
112 | plt.show()
113 |
114 |
115 | if __name__ == "__main__":
116 | recurrent_neural_network(x)
117 |
--------------------------------------------------------------------------------
/scripts/ConvNet.py:
--------------------------------------------------------------------------------
1 | from scripts import data_process as dp
2 | from scripts.constants import *
3 | import tensorflow as tf
4 | import matplotlib.pyplot as plt
5 | import matplotlib
6 | import numpy as np
7 | import pickle
8 |
9 | n_classes = 20
10 | batch_size = 128
11 | input_size = (20, 160)
12 | keep_rate = 0.8
13 | keep_prob = tf.placeholder(tf.float32)
14 |
15 |
16 | def conv2d(x, W):
17 | return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
18 |
19 |
20 | def maxpool2d(x):
21 | # size of window movement of window
22 | return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
23 |
24 |
25 | x = tf.placeholder('float', name='input_cnn')
26 | y = tf.placeholder('float', name='train_output_cnn')
27 |
28 |
29 | def cnn_model(data):
30 | weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
31 | 'W_conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
32 | 'W_fc': tf.Variable(tf.random_normal([5 * 40 * 64, 1024])),
33 | 'out': tf.Variable(tf.random_normal([1024, n_classes]))}
34 |
35 | biases = {'b_conv1': tf.Variable(tf.random_normal([32])),
36 | 'b_conv2': tf.Variable(tf.random_normal([64])),
37 | 'b_fc': tf.Variable(tf.random_normal([1024])),
38 | 'out': tf.Variable(tf.random_normal([n_classes]))}
39 |
40 | data = tf.reshape(data, shape=[-1, 20, 160, 1])
41 |
42 | conv1 = tf.nn.relu(conv2d(data, weights['W_conv1']) + biases['b_conv1'])
43 | conv1 = maxpool2d(conv1)
44 |
45 | conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
46 | conv2 = maxpool2d(conv2)
47 |
48 | fc = tf.reshape(conv2, [-1, 5 * 40 * 64])
49 | fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
50 | fc = tf.nn.dropout(fc, keep_rate)
51 |
52 | output = tf.add(tf.matmul(fc, weights['out']), biases['out'], name='cnn_output')
53 |
54 | return output
55 |
56 |
57 | prediction = cnn_model(x)
58 |
59 |
60 | def refine_input_with_lag(oil_train, stock_train, oil_test, stock_test):
61 | cost = tf.reduce_mean(tf.square(prediction-y))
62 | optimizer = tf.train.AdamOptimizer().minimize(cost)
63 | #Adding lag
64 | all_lag_losses = []
65 | for i in range(lag_range):
66 | with tf.Session() as sess:
67 | sess.run(tf.global_variables_initializer())
68 | oil_lag, stock_lag = dp.add_lag(oil_train, stock_train, i)
69 | for epoch in range(lag_epoch_num):
70 | lag_loss = 0
71 | for index in range(int(len(oil_lag.values)/input_size[0])):
72 | x_in = np.zeros((input_size[1], input_size[0], 1, 1))
73 | for index_in, value in enumerate(oil_lag.values[index * input_size[0]:index * input_size[0] + input_size[0]]):
74 | x_in[int(value), index_in, 0, 0] = 1
75 | y_in = stock_lag.values[index * input_size[0]:index * input_size[0] + input_size[0]]
76 | _, c = sess.run([optimizer, cost], feed_dict={x: x_in, y: y_in})
77 | lag_loss += c
78 | print('Lag', i, 'epoch', epoch, 'loss:', lag_loss)
79 | all_lag_losses.append(lag_loss)
80 | lag = all_lag_losses.index(min(all_lag_losses))
81 | oil_train, stock_train = dp.add_lag(oil_train, stock_train, lag)
82 | oil_test, stock_test = dp.add_lag(oil_test, stock_test, lag)
83 | print("The best lag is:", lag)
84 | pickle.dump(lag, open("data/lag.p", "wb"))
85 | return oil_train, stock_train, oil_test, stock_test
86 |
87 |
88 | def conv_neural_network(inputs):
89 | oil_train, stock_train, oil_test, stock_test, oil_price, stock_price = inputs
90 | cost = tf.reduce_mean(tf.square(prediction-y))
91 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
92 | #oil_train, stock_train, oil_test, stock_test = inputs
93 |
94 | oil_train, stock_train, oil_test, stock_test = refine_input_with_lag(oil_train, stock_train, oil_test, stock_test)
95 | with tf.Session() as sess:
96 | sess.run(tf.global_variables_initializer())
97 | #Running neural net
98 | for epoch in range(hm_epoch):
99 | epoch_loss = 0
100 | for index in range(int(len(oil_train.values) / input_size[0])):
101 | x_in = np.zeros((input_size[1], input_size[0], 1, 1))
102 | for index_in, value in enumerate(
103 | oil_train.values[index * input_size[0]:index * input_size[0] + input_size[0]]):
104 | x_in[int(value), index_in, 0, 0] = 1
105 | y_in = stock_train.values[index * input_size[0]:index * input_size[0] + input_size[0]]
106 | _, c = sess.run([optimizer, cost], feed_dict={x: x_in, y: y_in})
107 | epoch_loss += c
108 | print('Epoch', epoch, 'completed out of', hm_epoch, 'loss:', epoch_loss)
109 | correct = tf.reduce_mean(tf.square(tf.subtract(prediction, y)))
110 | total = 0
111 | cor = 0
112 | for index in range(int(len(oil_test.values) / input_size[0])):
113 | x_in = np.zeros((input_size[1], input_size[0], 1, 1))
114 | for index_in, value in enumerate(
115 | oil_test.values[index * input_size[0]:index * input_size[0] + input_size[0]]):
116 | x_in[int(value), index_in, 0, 0] = 1
117 | y_in = stock_test.values[index * input_size[0]:index * input_size[0] + input_size[0]]
118 | total += input_size[0]
119 | if abs(correct.eval(feed_dict={x: x_in, y: y_in})) < 5:
120 | cor += input_size[0]
121 |
122 | saver = tf.train.Saver()
123 | print('Accuracy:', cor/total)
124 | save_path = saver.save(sess, "data/model/recurrent/recurrent.ckpt")
125 | print("Model saved in file: %s" % save_path)
126 |
127 | predictions = []
128 | for index in range(int(len(oil_price.values) / input_size[0])):
129 | x_in = np.zeros((input_size[1], input_size[0], 1, 1))
130 | for index_in, value in enumerate(
131 | oil_price.values[index * input_size[0]:index * input_size[0] + input_size[0]]):
132 | x_in[int(value), index_in, 0, 0] = 1
133 | predictions += sess.run(prediction, feed_dict={x: x_in})[0].tolist()
134 |
135 | date_labels = oil_price.index
136 | date_labels = matplotlib.dates.date2num(date_labels.to_pydatetime())[:-14]
137 |
138 | plt.plot_date(date_labels, predictions, 'b-', label="RNN Predictions")
139 | plt.plot_date(date_labels, stock_price.values[:-14], 'r-', label='Stock Prices')
140 | plt.legend()
141 | plt.ylabel('Price')
142 | plt.xlabel('Year')
143 | plt.show()
144 |
145 |
146 | if __name__ == "__main__":
147 | conv_neural_network(x)
148 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 | input
155 | save
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 | true
183 | DEFINITION_ORDER
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 | project
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 | 1502755907923
464 |
465 |
466 | 1502755907923
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
854 |
855 |
856 |
857 |
858 |
859 |
860 |
861 |
862 |
863 |
864 |
865 |
866 |
867 |
868 |
869 |
870 |
871 |
872 |
873 |
874 |
875 |
876 |
877 |
878 |
879 |
880 |
881 |
882 |
883 |
884 |
885 |
886 |
887 |
888 |
889 |
890 |
891 |
892 |
893 |
894 |
895 |
896 |
897 |
898 |
899 |
900 |
901 |
902 |
903 |
904 |
905 |
906 |
907 |
908 |
909 |
910 |
911 |
912 |
913 |
914 |
915 |
916 |
917 |
918 |
919 |
920 |
921 |
922 |
923 |
924 |
925 |
926 |
927 |
928 |
929 |
930 |
931 |
932 |
933 |
934 |
935 |
936 |
937 |
938 |
939 |
940 |
941 |
942 |
943 |
944 |
945 |
946 |
947 |
948 |
949 |
950 |
951 |
952 |
953 |
954 |
955 |
956 |
957 |
958 |
959 |
960 |
961 |
962 |
963 |
964 |
--------------------------------------------------------------------------------