├── requirements.txt
├── Compare.png
├── Compare_zoom.png
├── Predict the stock for tomorrow.png
├── stock price.txt
├── project.py
└── mystock.ipynb


/requirements.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apollo000104/Stock_Price_Predction_LSTM/HEAD/Compare.png


--------------------------------------------------------------------------------
/Compare_zoom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apollo000104/Stock_Price_Predction_LSTM/HEAD/Compare_zoom.png


--------------------------------------------------------------------------------
/Predict the stock for tomorrow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apollo000104/Stock_Price_Predction_LSTM/HEAD/Predict the stock for tomorrow.png


--------------------------------------------------------------------------------
/stock price.txt:
--------------------------------------------------------------------------------
 1 | Yfinance is an open source library developed by Ran Aroussi for accessing Yahoo Finance’s financial data
 2 | 
 3 | !pip install yfinance --quiet
 4 | !pip install pmdarima --quiet
 5 | 
 6 | With the second line “!pip install pmdarima — quiet”, the “pmdarima” library is installed. The AutoRegressive Integrated Moving Average (ARIMA) model is used in this library for time series analysis and forecasting
 7 | 
 8 | !pip install statsmodels==0.11.0rc1 --quiet
 9 | !pip install -Iv pulp==1.6.8 --quiet
10 | 
11 | “!pip install statsmodels==0.11.0rc1 — quiet” installs the “statsmodels” library version 0.11.0rc1. Statistical modeling and econometrics can be performed using this library in Python.
12 | 
13 | It installs version 1.6.8 of the “pulp” library with the second line “!pip install -Iv pulp==1.6.8 — quiet”. In Python, this library is used for linear programming optimization. If a newer version of the library is already installed, the “-Iv” option forces the installation of the specified version. Installing this specific version is ensured by the “==1.6.8” notation. During the installation process, “ — quiet” suppresses any output messages generated.
14 | 
15 | Ex:
16 | 	import yfinance as yf
17 | 
18 | 	# getting data from Yahoo Finance
19 | 	stock_name = 'AMD'  # here you can change the name of stock ticker, for example we will 	take AMD ticker
20 | 	data = yf.download(stock_name, start="2020-03-26", end="2021-03-29")
21 | 
22 | # import plotly package for graphs
23 | import plotly
24 | import plotly.graph_objs as go
25 | import plotly.express as px
26 | from plotly.subplots import make_subplots
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/project.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torch.optim as optim
  7 | from torch.utils.data import Dataset
  8 | from torch.utils.data import DataLoader
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | from matplotlib.pyplot import figure
 12 | 
 13 | from alpha_vantage.timeseries import TimeSeries 
 14 | 
 15 | print("All libraries loaded")
 16 | 
 17 | config = {
 18 |     "alpha_vantage": {
 19 |         "key": "demo", # you can use the demo API key for this project, but please make sure to get your own API key at https://www.alphavantage.co/support/#api-key
 20 |         "symbol": "IBM",
 21 |         "outputsize": "full",
 22 |         "key_adjusted_close": "5. adjusted close",
 23 |     },
 24 |     "data": {
 25 |         "window_size": 20,
 26 |         "train_split_size": 0.80,
 27 |     }, 
 28 |     "plots": {
 29 |         "xticks_interval": 90, # show a date every 90 days
 30 |         "color_actual": "#001f3f",
 31 |         "color_train": "#3D9970",
 32 |         "color_val": "#0074D9",
 33 |         "color_pred_train": "#3D9970",
 34 |         "color_pred_val": "#0074D9",
 35 |         "color_pred_test": "#FF4136",
 36 |     },
 37 |     "model": {
 38 |         "input_size": 1, # since we are only using 1 feature, close price
 39 |         "num_lstm_layers": 2,
 40 |         "lstm_size": 32,
 41 |         "dropout": 0.2,
 42 |     },
 43 |     "training": {
 44 |         "device": "cpu", # "cuda" or "cpu"
 45 |         "batch_size": 64,
 46 |         "num_epoch": 100,
 47 |         "learning_rate": 0.01,
 48 |         "scheduler_step_size": 40,
 49 |     }
 50 | }
 51 | def download_data(config):
 52 |     ts = TimeSeries(key='demo') #you can use the demo API key for this project, but please make sure to eventually get your own API key at https://www.alphavantage.co/support/#api-key. 
 53 |     data, meta_data = ts.get_daily_adjusted(config["alpha_vantage"]["symbol"], outputsize=config["alpha_vantage"]["outputsize"])
 54 | 
 55 |     data_date = [date for date in data.keys()]
 56 |     data_date.reverse()
 57 | 
 58 |     data_close_price = [float(data[date][config["alpha_vantage"]["key_adjusted_close"]]) for date in data.keys()]
 59 |     data_close_price.reverse()
 60 |     data_close_price = np.array(data_close_price)
 61 | 
 62 |     num_data_points = len(data_date)
 63 |     display_date_range = "from " + data_date[0] + " to " + data_date[num_data_points-1]
 64 |     print("Number data points", num_data_points, display_date_range)
 65 | 
 66 |     return data_date, data_close_price, num_data_points, display_date_range
 67 | 
 68 | data_date, data_close_price, num_data_points, display_date_range = download_data(config)
 69 | 
 70 | # plot
 71 | 
 72 | fig = figure(figsize=(25, 5), dpi=80)
 73 | fig.patch.set_facecolor((1.0, 1.0, 1.0))
 74 | plt.plot(data_date, data_close_price, color=config["plots"]["color_actual"])
 75 | xticks = [data_date[i] if ((i%config["plots"]["xticks_interval"]==0 and (num_data_points-i) > config["plots"]["xticks_interval"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice
 76 | x = np.arange(0,len(xticks))
 77 | plt.xticks(x, xticks, rotation='vertical')
 78 | plt.title("Daily close price for " + config["alpha_vantage"]["symbol"] + ", " + display_date_range)
 79 | plt.grid(visible=None, which='major', axis='y', linestyle='--')
 80 | plt.show()
 81 | 
 82 | class Normalizer():
 83 |     def __init__(self):
 84 |         self.mu = None
 85 |         self.sd = None
 86 | 
 87 |     def fit_transform(self, x):
 88 |         self.mu = np.mean(x, axis=(0), keepdims=True)
 89 |         self.sd = np.std(x, axis=(0), keepdims=True)
 90 |         normalized_x = (x - self.mu)/self.sd
 91 |         return normalized_x
 92 | 
 93 |     def inverse_transform(self, x):
 94 |         return (x*self.sd) + self.mu
 95 | 
 96 | # normalize
 97 | scaler = Normalizer()
 98 | normalized_data_close_price = scaler.fit_transform(data_close_price)
 99 | def prepare_data_x(x, window_size):
100 |     # perform windowing
101 |     n_row = x.shape[0] - window_size + 1
102 |     output = np.lib.stride_tricks.as_strided(x, shape=(n_row, window_size), strides=(x.strides[0], x.strides[0]))
103 |     return output[:-1], output[-1]
104 | 
105 | 
106 | def prepare_data_y(x, window_size):
107 |     # # perform simple moving average
108 |     # output = np.convolve(x, np.ones(window_size), 'valid') / window_size
109 | 
110 |     # use the next day as label
111 |     output = x[window_size:]
112 |     return output
113 | 
114 | data_x, data_x_unseen = prepare_data_x(normalized_data_close_price, window_size=config["data"]["window_size"])
115 | data_y = prepare_data_y(normalized_data_close_price, window_size=config["data"]["window_size"])
116 | 
117 | # split dataset
118 | 
119 | split_index = int(data_y.shape[0]*config["data"]["train_split_size"])
120 | data_x_train = data_x[:split_index]
121 | data_x_val = data_x[split_index:]
122 | data_y_train = data_y[:split_index]
123 | data_y_val = data_y[split_index:]
124 | 
125 | # prepare data for plotting
126 | 
127 | to_plot_data_y_train = np.zeros(num_data_points)
128 | to_plot_data_y_val = np.zeros(num_data_points)
129 | 
130 | to_plot_data_y_train[config["data"]["window_size"]:split_index+config["data"]["window_size"]] = scaler.inverse_transform(data_y_train)
131 | to_plot_data_y_val[split_index+config["data"]["window_size"]:] = scaler.inverse_transform(data_y_val)
132 | 
133 | to_plot_data_y_train = np.where(to_plot_data_y_train == 0, None, to_plot_data_y_train)
134 | to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val)
135 | 
136 | ## plots
137 | 
138 | fig = figure(figsize=(25, 5), dpi=80)
139 | fig.patch.set_facecolor((1.0, 1.0, 1.0))
140 | plt.plot(data_date, to_plot_data_y_train, label="Prices (train)", color=config["plots"]["color_train"])
141 | plt.plot(data_date, to_plot_data_y_val, label="Prices (validation)", color=config["plots"]["color_val"])
142 | xticks = [data_date[i] if ((i%config["plots"]["xticks_interval"]==0 and (num_data_points-i) > config["plots"]["xticks_interval"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice
143 | x = np.arange(0,len(xticks))
144 | plt.xticks(x, xticks, rotation='vertical')
145 | plt.title("Daily close prices for " + config["alpha_vantage"]["symbol"] + " - showing training and validation data")
146 | plt.grid(b=None, which='major', axis='y', linestyle='--')
147 | plt.legend()
148 | plt.show()
149 | 
150 | class TimeSeriesDataset(Dataset):
151 |     def __init__(self, x, y):
152 |         x = np.expand_dims(x, 2) # in our case, we have only 1 feature, so we need to convert `x` into [batch, sequence, features] for LSTM
153 |         self.x = x.astype(np.float32)
154 |         self.y = y.astype(np.float32)
155 |         
156 |     def __len__(self):
157 |         return len(self.x)
158 | 
159 |     def __getitem__(self, idx):
160 |         return (self.x[idx], self.y[idx])
161 | 
162 | dataset_train = TimeSeriesDataset(data_x_train, data_y_train)
163 | dataset_val = TimeSeriesDataset(data_x_val, data_y_val)
164 | 
165 | print("Train data shape", dataset_train.x.shape, dataset_train.y.shape)
166 | print("Validation data shape", dataset_val.x.shape, dataset_val.y.shape)
167 | 
168 | train_dataloader = DataLoader(dataset_train, batch_size=config["training"]["batch_size"], shuffle=True)
169 | val_dataloader = DataLoader(dataset_val, batch_size=config["training"]["batch_size"], shuffle=True)
170 | class LSTMModel(nn.Module):
171 |     def __init__(self, input_size=1, hidden_layer_size=32, num_layers=2, output_size=1, dropout=0.2):
172 |         super().__init__()
173 |         self.hidden_layer_size = hidden_layer_size
174 | 
175 |         self.linear_1 = nn.Linear(input_size, hidden_layer_size)
176 |         self.relu = nn.ReLU()
177 |         self.lstm = nn.LSTM(hidden_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers, batch_first=True)
178 |         self.dropout = nn.Dropout(dropout)
179 |         self.linear_2 = nn.Linear(num_layers*hidden_layer_size, output_size)
180 |         
181 |         self.init_weights()
182 | 
183 |     def init_weights(self):
184 |         for name, param in self.lstm.named_parameters():
185 |             if 'bias' in name:
186 |                  nn.init.constant_(param, 0.0)
187 |             elif 'weight_ih' in name:
188 |                  nn.init.kaiming_normal_(param)
189 |             elif 'weight_hh' in name:
190 |                  nn.init.orthogonal_(param)
191 | 
192 |     def forward(self, x):
193 |         batchsize = x.shape[0]
194 | 
195 |         # layer 1
196 |         x = self.linear_1(x)
197 |         x = self.relu(x)
198 |         
199 |         # LSTM layer
200 |         lstm_out, (h_n, c_n) = self.lstm(x)
201 | 
202 |         # reshape output from hidden cell into [batch, features] for `linear_2`
203 |         x = h_n.permute(1, 0, 2).reshape(batchsize, -1) 
204 |         
205 |         # layer 2
206 |         x = self.dropout(x)
207 |         predictions = self.linear_2(x)
208 |         return predictions[:,-1]
209 |     pass
210 | def run_epoch(dataloader, is_training=False):
211 |     epoch_loss = 0
212 | 
213 |     if is_training:
214 |         model.train()
215 |     else:
216 |         model.eval()
217 | 
218 |     for idx, (x, y) in enumerate(dataloader):
219 |         if is_training:
220 |             optimizer.zero_grad()
221 | 
222 |         batchsize = x.shape[0]
223 | 
224 |         x = x.to(config["training"]["device"])
225 |         y = y.to(config["training"]["device"])
226 | 
227 |         out = model(x)
228 |         loss = criterion(out.contiguous(), y.contiguous())
229 | 
230 |         if is_training:
231 |             loss.backward()
232 |             optimizer.step()
233 | 
234 |         epoch_loss += (loss.detach().item() / batchsize)
235 | 
236 |     lr = scheduler.get_last_lr()[0]
237 | 
238 |     return epoch_loss, lr
239 | 
240 | train_dataloader = DataLoader(dataset_train, batch_size=config["training"]["batch_size"], shuffle=True)
241 | val_dataloader = DataLoader(dataset_val, batch_size=config["training"]["batch_size"], shuffle=True)
242 | 
243 | model = LSTMModel(input_size=config["model"]["input_size"], hidden_layer_size=config["model"]["lstm_size"], num_layers=config["model"]["num_lstm_layers"], output_size=1, dropout=config["model"]["dropout"])
244 | model = model.to(config["training"]["device"])
245 | 
246 | criterion = nn.MSELoss()
247 | optimizer = optim.Adam(model.parameters(), lr=config["training"]["learning_rate"], betas=(0.9, 0.98), eps=1e-9)
248 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config["training"]["scheduler_step_size"], gamma=0.1)
249 | 
250 | for epoch in range(config["training"]["num_epoch"]):
251 |     loss_train, lr_train = run_epoch(train_dataloader, is_training=True)
252 |     loss_val, lr_val = run_epoch(val_dataloader)
253 |     scheduler.step()
254 |     
255 |     print('Epoch[{}/{}] | loss train:{:.6f}, test:{:.6f} | lr:{:.6f}'
256 |               .format(epoch+1, config["training"]["num_epoch"], loss_train, loss_val, lr_train))
257 |     pass
258 | # here we re-initialize dataloader so the data doesn't shuffled, so we can plot the values by date
259 | 
260 | train_dataloader = DataLoader(dataset_train, batch_size=config["training"]["batch_size"], shuffle=False)
261 | val_dataloader = DataLoader(dataset_val, batch_size=config["training"]["batch_size"], shuffle=False)
262 | 
263 | model.eval()
264 | 
265 | # predict on the training data, to see how well the model managed to learn and memorize
266 | 
267 | predicted_train = np.array([])
268 | 
269 | for idx, (x, y) in enumerate(train_dataloader):
270 |     x = x.to(config["training"]["device"])
271 |     out = model(x)
272 |     out = out.cpu().detach().numpy()
273 |     predicted_train = np.concatenate((predicted_train, out))
274 | 
275 | # predict on the validation data, to see how the model does
276 | 
277 | predicted_val = np.array([])
278 | 
279 | for idx, (x, y) in enumerate(val_dataloader):
280 |     x = x.to(config["training"]["device"])
281 |     out = model(x)
282 |     out = out.cpu().detach().numpy()
283 |     predicted_val = np.concatenate((predicted_val, out))
284 | 
285 | # prepare data for plotting
286 | 
287 | to_plot_data_y_train_pred = np.zeros(num_data_points)
288 | to_plot_data_y_val_pred = np.zeros(num_data_points)
289 | 
290 | to_plot_data_y_train_pred[config["data"]["window_size"]:split_index+config["data"]["window_size"]] = scaler.inverse_transform(predicted_train)
291 | to_plot_data_y_val_pred[split_index+config["data"]["window_size"]:] = scaler.inverse_transform(predicted_val)
292 | 
293 | to_plot_data_y_train_pred = np.where(to_plot_data_y_train_pred == 0, None, to_plot_data_y_train_pred)
294 | to_plot_data_y_val_pred = np.where(to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred)
295 | 
296 | # plots
297 | 
298 | fig = figure(figsize=(25, 5), dpi=80)
299 | fig.patch.set_facecolor((1.0, 1.0, 1.0))
300 | plt.plot(data_date, data_close_price, label="Actual prices", color=config["plots"]["color_actual"])
301 | plt.plot(data_date, to_plot_data_y_train_pred, label="Predicted prices (train)", color=config["plots"]["color_pred_train"])
302 | plt.plot(data_date, to_plot_data_y_val_pred, label="Predicted prices (validation)", color=config["plots"]["color_pred_val"])
303 | plt.title("Compare predicted prices to actual prices")
304 | xticks = [data_date[i] if ((i%config["plots"]["xticks_interval"]==0 and (num_data_points-i) > config["plots"]["xticks_interval"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice
305 | x = np.arange(0,len(xticks))
306 | plt.xticks(x, xticks, rotation='vertical')
307 | plt.grid(b=None, which='major', axis='y', linestyle='--')
308 | plt.legend()
309 | plt.show()
310 | 
311 | 
312 | 
313 | # prepare data for plotting the zoomed in view of the predicted prices (on validation set) vs. actual prices
314 | 
315 | to_plot_data_y_val_subset = scaler.inverse_transform(data_y_val)
316 | to_plot_predicted_val = scaler.inverse_transform(predicted_val)
317 | to_plot_data_date = data_date[split_index+config["data"]["window_size"]:]
318 | 
319 | # plots
320 | 
321 | fig = figure(figsize=(25, 5), dpi=80)
322 | fig.patch.set_facecolor((1.0, 1.0, 1.0))
323 | plt.plot(to_plot_data_date, to_plot_data_y_val_subset, label="Actual prices", color=config["plots"]["color_actual"])
324 | plt.plot(to_plot_data_date, to_plot_predicted_val, label="Predicted prices (validation)", color=config["plots"]["color_pred_val"])
325 | plt.title("Zoom in to examine predicted price on validation data portion")
326 | xticks = [to_plot_data_date[i] if ((i%int(config["plots"]["xticks_interval"]/5)==0 and (len(to_plot_data_date)-i) > config["plots"]["xticks_interval"]/6) or i==len(to_plot_data_date)-1) else None for i in range(len(to_plot_data_date))] # make x ticks nice
327 | xs = np.arange(0,len(xticks))
328 | plt.xticks(xs, xticks, rotation='vertical')
329 | plt.grid(b=None, which='major', axis='y', linestyle='--')
330 | plt.legend()
331 | plt.show()
332 | 
333 | 
334 | # predict the closing price of the next trading day
335 | 
336 | model.eval()
337 | 
338 | x = torch.tensor(data_x_unseen).float().to(config["training"]["device"]).unsqueeze(0).unsqueeze(2) # this is the data type and shape required, [batch, sequence, feature]
339 | prediction = model(x)
340 | prediction = prediction.cpu().detach().numpy()
341 | 
342 | # prepare plots
343 | 
344 | plot_range = 10
345 | to_plot_data_y_val = np.zeros(plot_range)
346 | to_plot_data_y_val_pred = np.zeros(plot_range)
347 | to_plot_data_y_test_pred = np.zeros(plot_range)
348 | 
349 | to_plot_data_y_val[:plot_range-1] = scaler.inverse_transform(data_y_val)[-plot_range+1:]
350 | to_plot_data_y_val_pred[:plot_range-1] = scaler.inverse_transform(predicted_val)[-plot_range+1:]
351 | 
352 | to_plot_data_y_test_pred[plot_range-1] = scaler.inverse_transform(prediction)
353 | 
354 | to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val)
355 | to_plot_data_y_val_pred = np.where(to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred)
356 | to_plot_data_y_test_pred = np.where(to_plot_data_y_test_pred == 0, None, to_plot_data_y_test_pred)
357 | 
358 | # plot
359 | 
360 | plot_date_test = data_date[-plot_range+1:]
361 | plot_date_test.append("tomorrow")
362 | 
363 | fig = figure(figsize=(25, 5), dpi=80)
364 | fig.patch.set_facecolor((1.0, 1.0, 1.0))
365 | plt.plot(plot_date_test, to_plot_data_y_val, label="Actual prices", marker=".", markersize=10, color=config["plots"]["color_actual"])
366 | plt.plot(plot_date_test, to_plot_data_y_val_pred, label="Past predicted prices", marker=".", markersize=10, color=config["plots"]["color_pred_val"])
367 | plt.plot(plot_date_test, to_plot_data_y_test_pred, label="Predicted price for next day", marker=".", markersize=20, color=config["plots"]["color_pred_test"])
368 | plt.title("Predicted close price of the next trading day")
369 | plt.grid(b=None, which='major', axis='y', linestyle='--')
370 | plt.legend()
371 | plt.show()
372 | 
373 | print("Predicted close price of the next trading day:", round(to_plot_data_y_test_pred[plot_range-1], 2))
374 |     


--------------------------------------------------------------------------------
/mystock.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "\n",
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "import torch.nn.functional as F\n",
 14 |     "import torch.optim as optim\n",
 15 |     "from torch.utils.data import Dataset\n",
 16 |     "from torch.utils.data import DataLoader\n",
 17 |     "\n",
 18 |     "import matplotlib.pyplot as plt\n",
 19 |     "from matplotlib.pyplot import figure\n",
 20 |     "\n",
 21 |     "from alpha_vantage.timeseries import TimeSeries \n",
 22 |     "\n",
 23 |     "print(\"All libraries loaded\")\n",
 24 |     "\n",
 25 |     "config = {\n",
 26 |     "    \"alpha_vantage\": {\n",
 27 |     "        \"key\": \"demo\", # you can use the demo API key for this project, but please make sure to get your own API key at https://www.alphavantage.co/support/#api-key\n",
 28 |     "        \"symbol\": \"IBM\",\n",
 29 |     "        \"outputsize\": \"full\",\n",
 30 |     "        \"key_adjusted_close\": \"5. adjusted close\",\n",
 31 |     "    },\n",
 32 |     "    \"data\": {\n",
 33 |     "        \"window_size\": 20,\n",
 34 |     "        \"train_split_size\": 0.80,\n",
 35 |     "    }, \n",
 36 |     "    \"plots\": {\n",
 37 |     "        \"xticks_interval\": 90, # show a date every 90 days\n",
 38 |     "        \"color_actual\": \"#001f3f\",\n",
 39 |     "        \"color_train\": \"#3D9970\",\n",
 40 |     "        \"color_val\": \"#0074D9\",\n",
 41 |     "        \"color_pred_train\": \"#3D9970\",\n",
 42 |     "        \"color_pred_val\": \"#0074D9\",\n",
 43 |     "        \"color_pred_test\": \"#FF4136\",\n",
 44 |     "    },\n",
 45 |     "    \"model\": {\n",
 46 |     "        \"input_size\": 1, # since we are only using 1 feature, close price\n",
 47 |     "        \"num_lstm_layers\": 2,\n",
 48 |     "        \"lstm_size\": 32,\n",
 49 |     "        \"dropout\": 0.2,\n",
 50 |     "    },\n",
 51 |     "    \"training\": {\n",
 52 |     "        \"device\": \"cpu\", # \"cuda\" or \"cpu\"\n",
 53 |     "        \"batch_size\": 64,\n",
 54 |     "        \"num_epoch\": 100,\n",
 55 |     "        \"learning_rate\": 0.01,\n",
 56 |     "        \"scheduler_step_size\": 40,\n",
 57 |     "    }\n",
 58 |     "}\n",
 59 |     "def download_data(config):\n",
 60 |     "    ts = TimeSeries(key='demo') #you can use the demo API key for this project, but please make sure to eventually get your own API key at https://www.alphavantage.co/support/#api-key. \n",
 61 |     "    data, meta_data = ts.get_daily_adjusted(config[\"alpha_vantage\"][\"symbol\"], outputsize=config[\"alpha_vantage\"][\"outputsize\"])\n",
 62 |     "\n",
 63 |     "    data_date = [date for date in data.keys()]\n",
 64 |     "    data_date.reverse()\n",
 65 |     "\n",
 66 |     "    data_close_price = [float(data[date][config[\"alpha_vantage\"][\"key_adjusted_close\"]]) for date in data.keys()]\n",
 67 |     "    data_close_price.reverse()\n",
 68 |     "    data_close_price = np.array(data_close_price)\n",
 69 |     "\n",
 70 |     "    num_data_points = len(data_date)\n",
 71 |     "    display_date_range = \"from \" + data_date[0] + \" to \" + data_date[num_data_points-1]\n",
 72 |     "    print(\"Number data points\", num_data_points, display_date_range)\n",
 73 |     "\n",
 74 |     "    return data_date, data_close_price, num_data_points, display_date_range\n",
 75 |     "\n",
 76 |     "data_date, data_close_price, num_data_points, display_date_range = download_data(config)\n",
 77 |     "\n",
 78 |     "# plot\n",
 79 |     "\n",
 80 |     "fig = figure(figsize=(25, 5), dpi=80)\n",
 81 |     "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n",
 82 |     "plt.plot(data_date, data_close_price, color=config[\"plots\"][\"color_actual\"])\n",
 83 |     "xticks = [data_date[i] if ((i%config[\"plots\"][\"xticks_interval\"]==0 and (num_data_points-i) > config[\"plots\"][\"xticks_interval\"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice\n",
 84 |     "x = np.arange(0,len(xticks))\n",
 85 |     "plt.xticks(x, xticks, rotation='vertical')\n",
 86 |     "plt.title(\"Daily close price for \" + config[\"alpha_vantage\"][\"symbol\"] + \", \" + display_date_range)\n",
 87 |     "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n",
 88 |     "plt.show()\n",
 89 |     "\n",
 90 |     "class Normalizer():\n",
 91 |     "    def __init__(self):\n",
 92 |     "        self.mu = None\n",
 93 |     "        self.sd = None\n",
 94 |     "\n",
 95 |     "    def fit_transform(self, x):\n",
 96 |     "        self.mu = np.mean(x, axis=(0), keepdims=True)\n",
 97 |     "        self.sd = np.std(x, axis=(0), keepdims=True)\n",
 98 |     "        normalized_x = (x - self.mu)/self.sd\n",
 99 |     "        return normalized_x\n",
100 |     "\n",
101 |     "    def inverse_transform(self, x):\n",
102 |     "        return (x*self.sd) + self.mu\n",
103 |     "\n",
104 |     "# normalize\n",
105 |     "scaler = Normalizer()\n",
106 |     "normalized_data_close_price = scaler.fit_transform(data_close_price)\n",
107 |     "def prepare_data_x(x, window_size):\n",
108 |     "    # perform windowing\n",
109 |     "    n_row = x.shape[0] - window_size + 1\n",
110 |     "    output = np.lib.stride_tricks.as_strided(x, shape=(n_row, window_size), strides=(x.strides[0], x.strides[0]))\n",
111 |     "    return output[:-1], output[-1]\n",
112 |     "\n",
113 |     "\n",
114 |     "def prepare_data_y(x, window_size):\n",
115 |     "    # # perform simple moving average\n",
116 |     "    # output = np.convolve(x, np.ones(window_size), 'valid') / window_size\n",
117 |     "\n",
118 |     "    # use the next day as label\n",
119 |     "    output = x[window_size:]\n",
120 |     "    return output\n",
121 |     "\n",
122 |     "data_x, data_x_unseen = prepare_data_x(normalized_data_close_price, window_size=config[\"data\"][\"window_size\"])\n",
123 |     "data_y = prepare_data_y(normalized_data_close_price, window_size=config[\"data\"][\"window_size\"])\n",
124 |     "\n",
125 |     "# split dataset\n",
126 |     "\n",
127 |     "split_index = int(data_y.shape[0]*config[\"data\"][\"train_split_size\"])\n",
128 |     "data_x_train = data_x[:split_index]\n",
129 |     "data_x_val = data_x[split_index:]\n",
130 |     "data_y_train = data_y[:split_index]\n",
131 |     "data_y_val = data_y[split_index:]\n",
132 |     "\n",
133 |     "# prepare data for plotting\n",
134 |     "\n",
135 |     "to_plot_data_y_train = np.zeros(num_data_points)\n",
136 |     "to_plot_data_y_val = np.zeros(num_data_points)\n",
137 |     "\n",
138 |     "to_plot_data_y_train[config[\"data\"][\"window_size\"]:split_index+config[\"data\"][\"window_size\"]] = scaler.inverse_transform(data_y_train)\n",
139 |     "to_plot_data_y_val[split_index+config[\"data\"][\"window_size\"]:] = scaler.inverse_transform(data_y_val)\n",
140 |     "\n",
141 |     "to_plot_data_y_train = np.where(to_plot_data_y_train == 0, None, to_plot_data_y_train)\n",
142 |     "to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val)\n",
143 |     "\n",
144 |     "## plots\n",
145 |     "\n",
146 |     "fig = figure(figsize=(25, 5), dpi=80)\n",
147 |     "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n",
148 |     "plt.plot(data_date, to_plot_data_y_train, label=\"Prices (train)\", color=config[\"plots\"][\"color_train\"])\n",
149 |     "plt.plot(data_date, to_plot_data_y_val, label=\"Prices (validation)\", color=config[\"plots\"][\"color_val\"])\n",
150 |     "xticks = [data_date[i] if ((i%config[\"plots\"][\"xticks_interval\"]==0 and (num_data_points-i) > config[\"plots\"][\"xticks_interval\"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice\n",
151 |     "x = np.arange(0,len(xticks))\n",
152 |     "plt.xticks(x, xticks, rotation='vertical')\n",
153 |     "plt.title(\"Daily close prices for \" + config[\"alpha_vantage\"][\"symbol\"] + \" - showing training and validation data\")\n",
154 |     "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n",
155 |     "plt.legend()\n",
156 |     "plt.show()\n",
157 |     "\n",
158 |     "class TimeSeriesDataset(Dataset):\n",
159 |     "    def __init__(self, x, y):\n",
160 |     "        x = np.expand_dims(x, 2) # in our case, we have only 1 feature, so we need to convert `x` into [batch, sequence, features] for LSTM\n",
161 |     "        self.x = x.astype(np.float32)\n",
162 |     "        self.y = y.astype(np.float32)\n",
163 |     "        \n",
164 |     "    def __len__(self):\n",
165 |     "        return len(self.x)\n",
166 |     "\n",
167 |     "    def __getitem__(self, idx):\n",
168 |     "        return (self.x[idx], self.y[idx])\n",
169 |     "\n",
170 |     "dataset_train = TimeSeriesDataset(data_x_train, data_y_train)\n",
171 |     "dataset_val = TimeSeriesDataset(data_x_val, data_y_val)\n",
172 |     "\n",
173 |     "print(\"Train data shape\", dataset_train.x.shape, dataset_train.y.shape)\n",
174 |     "print(\"Validation data shape\", dataset_val.x.shape, dataset_val.y.shape)\n",
175 |     "\n",
176 |     "train_dataloader = DataLoader(dataset_train, batch_size=config[\"training\"][\"batch_size\"], shuffle=True)\n",
177 |     "val_dataloader = DataLoader(dataset_val, batch_size=config[\"training\"][\"batch_size\"], shuffle=True)\n",
178 |     "class LSTMModel(nn.Module):\n",
179 |     "    def __init__(self, input_size=1, hidden_layer_size=32, num_layers=2, output_size=1, dropout=0.2):\n",
180 |     "        super().__init__()\n",
181 |     "        self.hidden_layer_size = hidden_layer_size\n",
182 |     "\n",
183 |     "        self.linear_1 = nn.Linear(input_size, hidden_layer_size)\n",
184 |     "        self.relu = nn.ReLU()\n",
185 |     "        self.lstm = nn.LSTM(hidden_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers, batch_first=True)\n",
186 |     "        self.dropout = nn.Dropout(dropout)\n",
187 |     "        self.linear_2 = nn.Linear(num_layers*hidden_layer_size, output_size)\n",
188 |     "        \n",
189 |     "        self.init_weights()\n",
190 |     "\n",
191 |     "    def init_weights(self):\n",
192 |     "        for name, param in self.lstm.named_parameters():\n",
193 |     "            if 'bias' in name:\n",
194 |     "                 nn.init.constant_(param, 0.0)\n",
195 |     "            elif 'weight_ih' in name:\n",
196 |     "                 nn.init.kaiming_normal_(param)\n",
197 |     "            elif 'weight_hh' in name:\n",
198 |     "                 nn.init.orthogonal_(param)\n",
199 |     "\n",
200 |     "    def forward(self, x):\n",
201 |     "        batchsize = x.shape[0]\n",
202 |     "\n",
203 |     "        # layer 1\n",
204 |     "        x = self.linear_1(x)\n",
205 |     "        x = self.relu(x)\n",
206 |     "        \n",
207 |     "        # LSTM layer\n",
208 |     "        lstm_out, (h_n, c_n) = self.lstm(x)\n",
209 |     "\n",
210 |     "        # reshape output from hidden cell into [batch, features] for `linear_2`\n",
211 |     "        x = h_n.permute(1, 0, 2).reshape(batchsize, -1) \n",
212 |     "        \n",
213 |     "        # layer 2\n",
214 |     "        x = self.dropout(x)\n",
215 |     "        predictions = self.linear_2(x)\n",
216 |     "        return predictions[:,-1]\n",
217 |     "    pass\n",
218 |     "def run_epoch(dataloader, is_training=False):\n",
219 |     "    epoch_loss = 0\n",
220 |     "\n",
221 |     "    if is_training:\n",
222 |     "        model.train()\n",
223 |     "    else:\n",
224 |     "        model.eval()\n",
225 |     "\n",
226 |     "    for idx, (x, y) in enumerate(dataloader):\n",
227 |     "        if is_training:\n",
228 |     "            optimizer.zero_grad()\n",
229 |     "\n",
230 |     "        batchsize = x.shape[0]\n",
231 |     "\n",
232 |     "        x = x.to(config[\"training\"][\"device\"])\n",
233 |     "        y = y.to(config[\"training\"][\"device\"])\n",
234 |     "\n",
235 |     "        out = model(x)\n",
236 |     "        loss = criterion(out.contiguous(), y.contiguous())\n",
237 |     "\n",
238 |     "        if is_training:\n",
239 |     "            loss.backward()\n",
240 |     "            optimizer.step()\n",
241 |     "\n",
242 |     "        epoch_loss += (loss.detach().item() / batchsize)\n",
243 |     "\n",
244 |     "    lr = scheduler.get_last_lr()[0]\n",
245 |     "\n",
246 |     "    return epoch_loss, lr\n",
247 |     "\n",
248 |     "train_dataloader = DataLoader(dataset_train, batch_size=config[\"training\"][\"batch_size\"], shuffle=True)\n",
249 |     "val_dataloader = DataLoader(dataset_val, batch_size=config[\"training\"][\"batch_size\"], shuffle=True)\n",
250 |     "\n",
251 |     "model = LSTMModel(input_size=config[\"model\"][\"input_size\"], hidden_layer_size=config[\"model\"][\"lstm_size\"], num_layers=config[\"model\"][\"num_lstm_layers\"], output_size=1, dropout=config[\"model\"][\"dropout\"])\n",
252 |     "model = model.to(config[\"training\"][\"device\"])\n",
253 |     "\n",
254 |     "criterion = nn.MSELoss()\n",
255 |     "optimizer = optim.Adam(model.parameters(), lr=config[\"training\"][\"learning_rate\"], betas=(0.9, 0.98), eps=1e-9)\n",
256 |     "scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config[\"training\"][\"scheduler_step_size\"], gamma=0.1)\n",
257 |     "\n",
258 |     "for epoch in range(config[\"training\"][\"num_epoch\"]):\n",
259 |     "    loss_train, lr_train = run_epoch(train_dataloader, is_training=True)\n",
260 |     "    loss_val, lr_val = run_epoch(val_dataloader)\n",
261 |     "    scheduler.step()\n",
262 |     "    \n",
263 |     "    print('Epoch[{}/{}] | loss train:{:.6f}, test:{:.6f} | lr:{:.6f}'\n",
264 |     "              .format(epoch+1, config[\"training\"][\"num_epoch\"], loss_train, loss_val, lr_train))\n",
265 |     "    pass\n",
266 |     "# here we re-initialize dataloader so the data doesn't shuffled, so we can plot the values by date\n",
267 |     "\n",
268 |     "train_dataloader = DataLoader(dataset_train, batch_size=config[\"training\"][\"batch_size\"], shuffle=False)\n",
269 |     "val_dataloader = DataLoader(dataset_val, batch_size=config[\"training\"][\"batch_size\"], shuffle=False)\n",
270 |     "\n",
271 |     "model.eval()\n",
272 |     "\n",
273 |     "# predict on the training data, to see how well the model managed to learn and memorize\n",
274 |     "\n",
275 |     "predicted_train = np.array([])\n",
276 |     "\n",
277 |     "for idx, (x, y) in enumerate(train_dataloader):\n",
278 |     "    x = x.to(config[\"training\"][\"device\"])\n",
279 |     "    out = model(x)\n",
280 |     "    out = out.cpu().detach().numpy()\n",
281 |     "    predicted_train = np.concatenate((predicted_train, out))\n",
282 |     "\n",
283 |     "# predict on the validation data, to see how the model does\n",
284 |     "\n",
285 |     "predicted_val = np.array([])\n",
286 |     "\n",
287 |     "for idx, (x, y) in enumerate(val_dataloader):\n",
288 |     "    x = x.to(config[\"training\"][\"device\"])\n",
289 |     "    out = model(x)\n",
290 |     "    out = out.cpu().detach().numpy()\n",
291 |     "    predicted_val = np.concatenate((predicted_val, out))\n",
292 |     "\n",
293 |     "# prepare data for plotting\n",
294 |     "\n",
295 |     "to_plot_data_y_train_pred = np.zeros(num_data_points)\n",
296 |     "to_plot_data_y_val_pred = np.zeros(num_data_points)\n",
297 |     "\n",
298 |     "to_plot_data_y_train_pred[config[\"data\"][\"window_size\"]:split_index+config[\"data\"][\"window_size\"]] = scaler.inverse_transform(predicted_train)\n",
299 |     "to_plot_data_y_val_pred[split_index+config[\"data\"][\"window_size\"]:] = scaler.inverse_transform(predicted_val)\n",
300 |     "\n",
301 |     "to_plot_data_y_train_pred = np.where(to_plot_data_y_train_pred == 0, None, to_plot_data_y_train_pred)\n",
302 |     "to_plot_data_y_val_pred = np.where(to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred)\n",
303 |     "\n",
304 |     "# plots\n",
305 |     "\n",
306 |     "fig = figure(figsize=(25, 5), dpi=80)\n",
307 |     "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n",
308 |     "plt.plot(data_date, data_close_price, label=\"Actual prices\", color=config[\"plots\"][\"color_actual\"])\n",
309 |     "plt.plot(data_date, to_plot_data_y_train_pred, label=\"Predicted prices (train)\", color=config[\"plots\"][\"color_pred_train\"])\n",
310 |     "plt.plot(data_date, to_plot_data_y_val_pred, label=\"Predicted prices (validation)\", color=config[\"plots\"][\"color_pred_val\"])\n",
311 |     "plt.title(\"Compare predicted prices to actual prices\")\n",
312 |     "xticks = [data_date[i] if ((i%config[\"plots\"][\"xticks_interval\"]==0 and (num_data_points-i) > config[\"plots\"][\"xticks_interval\"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice\n",
313 |     "x = np.arange(0,len(xticks))\n",
314 |     "plt.xticks(x, xticks, rotation='vertical')\n",
315 |     "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n",
316 |     "plt.legend()\n",
317 |     "plt.show()\n",
318 |     "\n",
319 |     "\n",
320 |     "\n",
321 |     "# prepare data for plotting the zoomed in view of the predicted prices (on validation set) vs. actual prices\n",
322 |     "\n",
323 |     "to_plot_data_y_val_subset = scaler.inverse_transform(data_y_val)\n",
324 |     "to_plot_predicted_val = scaler.inverse_transform(predicted_val)\n",
325 |     "to_plot_data_date = data_date[split_index+config[\"data\"][\"window_size\"]:]\n",
326 |     "\n",
327 |     "# plots\n",
328 |     "\n",
329 |     "fig = figure(figsize=(25, 5), dpi=80)\n",
330 |     "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n",
331 |     "plt.plot(to_plot_data_date, to_plot_data_y_val_subset, label=\"Actual prices\", color=config[\"plots\"][\"color_actual\"])\n",
332 |     "plt.plot(to_plot_data_date, to_plot_predicted_val, label=\"Predicted prices (validation)\", color=config[\"plots\"][\"color_pred_val\"])\n",
333 |     "plt.title(\"Zoom in to examine predicted price on validation data portion\")\n",
334 |     "xticks = [to_plot_data_date[i] if ((i%int(config[\"plots\"][\"xticks_interval\"]/5)==0 and (len(to_plot_data_date)-i) > config[\"plots\"][\"xticks_interval\"]/6) or i==len(to_plot_data_date)-1) else None for i in range(len(to_plot_data_date))] # make x ticks nice\n",
335 |     "xs = np.arange(0,len(xticks))\n",
336 |     "plt.xticks(xs, xticks, rotation='vertical')\n",
337 |     "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n",
338 |     "plt.legend()\n",
339 |     "plt.show()\n",
340 |     "\n",
341 |     "\n",
342 |     "# predict the closing price of the next trading day\n",
343 |     "\n",
344 |     "model.eval()\n",
345 |     "\n",
346 |     "x = torch.tensor(data_x_unseen).float().to(config[\"training\"][\"device\"]).unsqueeze(0).unsqueeze(2) # this is the data type and shape required, [batch, sequence, feature]\n",
347 |     "prediction = model(x)\n",
348 |     "prediction = prediction.cpu().detach().numpy()\n",
349 |     "\n",
350 |     "# prepare plots\n",
351 |     "\n",
352 |     "plot_range = 10\n",
353 |     "to_plot_data_y_val = np.zeros(plot_range)\n",
354 |     "to_plot_data_y_val_pred = np.zeros(plot_range)\n",
355 |     "to_plot_data_y_test_pred = np.zeros(plot_range)\n",
356 |     "\n",
357 |     "to_plot_data_y_val[:plot_range-1] = scaler.inverse_transform(data_y_val)[-plot_range+1:]\n",
358 |     "to_plot_data_y_val_pred[:plot_range-1] = scaler.inverse_transform(predicted_val)[-plot_range+1:]\n",
359 |     "\n",
360 |     "to_plot_data_y_test_pred[plot_range-1] = scaler.inverse_transform(prediction)\n",
361 |     "\n",
362 |     "to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val)\n",
363 |     "to_plot_data_y_val_pred = np.where(to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred)\n",
364 |     "to_plot_data_y_test_pred = np.where(to_plot_data_y_test_pred == 0, None, to_plot_data_y_test_pred)\n",
365 |     "\n",
366 |     "# plot\n",
367 |     "\n",
368 |     "plot_date_test = data_date[-plot_range+1:]\n",
369 |     "plot_date_test.append(\"tomorrow\")\n",
370 |     "\n",
371 |     "fig = figure(figsize=(25, 5), dpi=80)\n",
372 |     "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n",
373 |     "plt.plot(plot_date_test, to_plot_data_y_val, label=\"Actual prices\", marker=\".\", markersize=10, color=config[\"plots\"][\"color_actual\"])\n",
374 |     "plt.plot(plot_date_test, to_plot_data_y_val_pred, label=\"Past predicted prices\", marker=\".\", markersize=10, color=config[\"plots\"][\"color_pred_val\"])\n",
375 |     "plt.plot(plot_date_test, to_plot_data_y_test_pred, label=\"Predicted price for next day\", marker=\".\", markersize=20, color=config[\"plots\"][\"color_pred_test\"])\n",
376 |     "plt.title(\"Predicted close price of the next trading day\")\n",
377 |     "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n",
378 |     "plt.legend()\n",
379 |     "plt.show()\n",
380 |     "\n",
381 |     "print(\"Predicted close price of the next trading day:\", round(to_plot_data_y_test_pred[plot_range-1], 2))\n",
382 |     "    "
383 |    ]
384 |   }
385 |  ],
386 |  "metadata": {
387 |   "kernelspec": {
388 |    "display_name": "myenv",
389 |    "language": "python",
390 |    "name": "python3"
391 |   },
392 |   "language_info": {
393 |    "codemirror_mode": {
394 |     "name": "ipython",
395 |     "version": 3
396 |    },
397 |    "file_extension": ".py",
398 |    "mimetype": "text/x-python",
399 |    "name": "python",
400 |    "nbconvert_exporter": "python",
401 |    "pygments_lexer": "ipython3",
402 |    "version": "3.11.4"
403 |   },
404 |   "orig_nbformat": 4
405 |  },
406 |  "nbformat": 4,
407 |  "nbformat_minor": 2
408 | }
409 | 


--------------------------------------------------------------------------------