├── requirements.txt ├── Compare.png ├── Compare_zoom.png ├── Predict the stock for tomorrow.png ├── stock price.txt ├── project.py └── mystock.ipynb /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apollo000104/Stock_Price_Predction_LSTM/HEAD/Compare.png -------------------------------------------------------------------------------- /Compare_zoom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apollo000104/Stock_Price_Predction_LSTM/HEAD/Compare_zoom.png -------------------------------------------------------------------------------- /Predict the stock for tomorrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apollo000104/Stock_Price_Predction_LSTM/HEAD/Predict the stock for tomorrow.png -------------------------------------------------------------------------------- /stock price.txt: -------------------------------------------------------------------------------- 1 | Yfinance is an open source library developed by Ran Aroussi for accessing Yahoo Finance’s financial data 2 | 3 | !pip install yfinance --quiet 4 | !pip install pmdarima --quiet 5 | 6 | With the second line “!pip install pmdarima — quiet”, the “pmdarima” library is installed. The AutoRegressive Integrated Moving Average (ARIMA) model is used in this library for time series analysis and forecasting 7 | 8 | !pip install statsmodels==0.11.0rc1 --quiet 9 | !pip install -Iv pulp==1.6.8 --quiet 10 | 11 | “!pip install statsmodels==0.11.0rc1 — quiet” installs the “statsmodels” library version 0.11.0rc1. Statistical modeling and econometrics can be performed using this library in Python. 12 | 13 | It installs version 1.6.8 of the “pulp” library with the second line “!pip install -Iv pulp==1.6.8 — quiet”. In Python, this library is used for linear programming optimization. If a newer version of the library is already installed, the “-Iv” option forces the installation of the specified version. Installing this specific version is ensured by the “==1.6.8” notation. During the installation process, “ — quiet” suppresses any output messages generated. 14 | 15 | Ex: 16 | import yfinance as yf 17 | 18 | # getting data from Yahoo Finance 19 | stock_name = 'AMD' # here you can change the name of stock ticker, for example we will take AMD ticker 20 | data = yf.download(stock_name, start="2020-03-26", end="2021-03-29") 21 | 22 | # import plotly package for graphs 23 | import plotly 24 | import plotly.graph_objs as go 25 | import plotly.express as px 26 | from plotly.subplots import make_subplots 27 | 28 | 29 | -------------------------------------------------------------------------------- /project.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | from torch.utils.data import Dataset 8 | from torch.utils.data import DataLoader 9 | 10 | import matplotlib.pyplot as plt 11 | from matplotlib.pyplot import figure 12 | 13 | from alpha_vantage.timeseries import TimeSeries 14 | 15 | print("All libraries loaded") 16 | 17 | config = { 18 | "alpha_vantage": { 19 | "key": "demo", # you can use the demo API key for this project, but please make sure to get your own API key at https://www.alphavantage.co/support/#api-key 20 | "symbol": "IBM", 21 | "outputsize": "full", 22 | "key_adjusted_close": "5. adjusted close", 23 | }, 24 | "data": { 25 | "window_size": 20, 26 | "train_split_size": 0.80, 27 | }, 28 | "plots": { 29 | "xticks_interval": 90, # show a date every 90 days 30 | "color_actual": "#001f3f", 31 | "color_train": "#3D9970", 32 | "color_val": "#0074D9", 33 | "color_pred_train": "#3D9970", 34 | "color_pred_val": "#0074D9", 35 | "color_pred_test": "#FF4136", 36 | }, 37 | "model": { 38 | "input_size": 1, # since we are only using 1 feature, close price 39 | "num_lstm_layers": 2, 40 | "lstm_size": 32, 41 | "dropout": 0.2, 42 | }, 43 | "training": { 44 | "device": "cpu", # "cuda" or "cpu" 45 | "batch_size": 64, 46 | "num_epoch": 100, 47 | "learning_rate": 0.01, 48 | "scheduler_step_size": 40, 49 | } 50 | } 51 | def download_data(config): 52 | ts = TimeSeries(key='demo') #you can use the demo API key for this project, but please make sure to eventually get your own API key at https://www.alphavantage.co/support/#api-key. 53 | data, meta_data = ts.get_daily_adjusted(config["alpha_vantage"]["symbol"], outputsize=config["alpha_vantage"]["outputsize"]) 54 | 55 | data_date = [date for date in data.keys()] 56 | data_date.reverse() 57 | 58 | data_close_price = [float(data[date][config["alpha_vantage"]["key_adjusted_close"]]) for date in data.keys()] 59 | data_close_price.reverse() 60 | data_close_price = np.array(data_close_price) 61 | 62 | num_data_points = len(data_date) 63 | display_date_range = "from " + data_date[0] + " to " + data_date[num_data_points-1] 64 | print("Number data points", num_data_points, display_date_range) 65 | 66 | return data_date, data_close_price, num_data_points, display_date_range 67 | 68 | data_date, data_close_price, num_data_points, display_date_range = download_data(config) 69 | 70 | # plot 71 | 72 | fig = figure(figsize=(25, 5), dpi=80) 73 | fig.patch.set_facecolor((1.0, 1.0, 1.0)) 74 | plt.plot(data_date, data_close_price, color=config["plots"]["color_actual"]) 75 | xticks = [data_date[i] if ((i%config["plots"]["xticks_interval"]==0 and (num_data_points-i) > config["plots"]["xticks_interval"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice 76 | x = np.arange(0,len(xticks)) 77 | plt.xticks(x, xticks, rotation='vertical') 78 | plt.title("Daily close price for " + config["alpha_vantage"]["symbol"] + ", " + display_date_range) 79 | plt.grid(visible=None, which='major', axis='y', linestyle='--') 80 | plt.show() 81 | 82 | class Normalizer(): 83 | def __init__(self): 84 | self.mu = None 85 | self.sd = None 86 | 87 | def fit_transform(self, x): 88 | self.mu = np.mean(x, axis=(0), keepdims=True) 89 | self.sd = np.std(x, axis=(0), keepdims=True) 90 | normalized_x = (x - self.mu)/self.sd 91 | return normalized_x 92 | 93 | def inverse_transform(self, x): 94 | return (x*self.sd) + self.mu 95 | 96 | # normalize 97 | scaler = Normalizer() 98 | normalized_data_close_price = scaler.fit_transform(data_close_price) 99 | def prepare_data_x(x, window_size): 100 | # perform windowing 101 | n_row = x.shape[0] - window_size + 1 102 | output = np.lib.stride_tricks.as_strided(x, shape=(n_row, window_size), strides=(x.strides[0], x.strides[0])) 103 | return output[:-1], output[-1] 104 | 105 | 106 | def prepare_data_y(x, window_size): 107 | # # perform simple moving average 108 | # output = np.convolve(x, np.ones(window_size), 'valid') / window_size 109 | 110 | # use the next day as label 111 | output = x[window_size:] 112 | return output 113 | 114 | data_x, data_x_unseen = prepare_data_x(normalized_data_close_price, window_size=config["data"]["window_size"]) 115 | data_y = prepare_data_y(normalized_data_close_price, window_size=config["data"]["window_size"]) 116 | 117 | # split dataset 118 | 119 | split_index = int(data_y.shape[0]*config["data"]["train_split_size"]) 120 | data_x_train = data_x[:split_index] 121 | data_x_val = data_x[split_index:] 122 | data_y_train = data_y[:split_index] 123 | data_y_val = data_y[split_index:] 124 | 125 | # prepare data for plotting 126 | 127 | to_plot_data_y_train = np.zeros(num_data_points) 128 | to_plot_data_y_val = np.zeros(num_data_points) 129 | 130 | to_plot_data_y_train[config["data"]["window_size"]:split_index+config["data"]["window_size"]] = scaler.inverse_transform(data_y_train) 131 | to_plot_data_y_val[split_index+config["data"]["window_size"]:] = scaler.inverse_transform(data_y_val) 132 | 133 | to_plot_data_y_train = np.where(to_plot_data_y_train == 0, None, to_plot_data_y_train) 134 | to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val) 135 | 136 | ## plots 137 | 138 | fig = figure(figsize=(25, 5), dpi=80) 139 | fig.patch.set_facecolor((1.0, 1.0, 1.0)) 140 | plt.plot(data_date, to_plot_data_y_train, label="Prices (train)", color=config["plots"]["color_train"]) 141 | plt.plot(data_date, to_plot_data_y_val, label="Prices (validation)", color=config["plots"]["color_val"]) 142 | xticks = [data_date[i] if ((i%config["plots"]["xticks_interval"]==0 and (num_data_points-i) > config["plots"]["xticks_interval"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice 143 | x = np.arange(0,len(xticks)) 144 | plt.xticks(x, xticks, rotation='vertical') 145 | plt.title("Daily close prices for " + config["alpha_vantage"]["symbol"] + " - showing training and validation data") 146 | plt.grid(b=None, which='major', axis='y', linestyle='--') 147 | plt.legend() 148 | plt.show() 149 | 150 | class TimeSeriesDataset(Dataset): 151 | def __init__(self, x, y): 152 | x = np.expand_dims(x, 2) # in our case, we have only 1 feature, so we need to convert `x` into [batch, sequence, features] for LSTM 153 | self.x = x.astype(np.float32) 154 | self.y = y.astype(np.float32) 155 | 156 | def __len__(self): 157 | return len(self.x) 158 | 159 | def __getitem__(self, idx): 160 | return (self.x[idx], self.y[idx]) 161 | 162 | dataset_train = TimeSeriesDataset(data_x_train, data_y_train) 163 | dataset_val = TimeSeriesDataset(data_x_val, data_y_val) 164 | 165 | print("Train data shape", dataset_train.x.shape, dataset_train.y.shape) 166 | print("Validation data shape", dataset_val.x.shape, dataset_val.y.shape) 167 | 168 | train_dataloader = DataLoader(dataset_train, batch_size=config["training"]["batch_size"], shuffle=True) 169 | val_dataloader = DataLoader(dataset_val, batch_size=config["training"]["batch_size"], shuffle=True) 170 | class LSTMModel(nn.Module): 171 | def __init__(self, input_size=1, hidden_layer_size=32, num_layers=2, output_size=1, dropout=0.2): 172 | super().__init__() 173 | self.hidden_layer_size = hidden_layer_size 174 | 175 | self.linear_1 = nn.Linear(input_size, hidden_layer_size) 176 | self.relu = nn.ReLU() 177 | self.lstm = nn.LSTM(hidden_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers, batch_first=True) 178 | self.dropout = nn.Dropout(dropout) 179 | self.linear_2 = nn.Linear(num_layers*hidden_layer_size, output_size) 180 | 181 | self.init_weights() 182 | 183 | def init_weights(self): 184 | for name, param in self.lstm.named_parameters(): 185 | if 'bias' in name: 186 | nn.init.constant_(param, 0.0) 187 | elif 'weight_ih' in name: 188 | nn.init.kaiming_normal_(param) 189 | elif 'weight_hh' in name: 190 | nn.init.orthogonal_(param) 191 | 192 | def forward(self, x): 193 | batchsize = x.shape[0] 194 | 195 | # layer 1 196 | x = self.linear_1(x) 197 | x = self.relu(x) 198 | 199 | # LSTM layer 200 | lstm_out, (h_n, c_n) = self.lstm(x) 201 | 202 | # reshape output from hidden cell into [batch, features] for `linear_2` 203 | x = h_n.permute(1, 0, 2).reshape(batchsize, -1) 204 | 205 | # layer 2 206 | x = self.dropout(x) 207 | predictions = self.linear_2(x) 208 | return predictions[:,-1] 209 | pass 210 | def run_epoch(dataloader, is_training=False): 211 | epoch_loss = 0 212 | 213 | if is_training: 214 | model.train() 215 | else: 216 | model.eval() 217 | 218 | for idx, (x, y) in enumerate(dataloader): 219 | if is_training: 220 | optimizer.zero_grad() 221 | 222 | batchsize = x.shape[0] 223 | 224 | x = x.to(config["training"]["device"]) 225 | y = y.to(config["training"]["device"]) 226 | 227 | out = model(x) 228 | loss = criterion(out.contiguous(), y.contiguous()) 229 | 230 | if is_training: 231 | loss.backward() 232 | optimizer.step() 233 | 234 | epoch_loss += (loss.detach().item() / batchsize) 235 | 236 | lr = scheduler.get_last_lr()[0] 237 | 238 | return epoch_loss, lr 239 | 240 | train_dataloader = DataLoader(dataset_train, batch_size=config["training"]["batch_size"], shuffle=True) 241 | val_dataloader = DataLoader(dataset_val, batch_size=config["training"]["batch_size"], shuffle=True) 242 | 243 | model = LSTMModel(input_size=config["model"]["input_size"], hidden_layer_size=config["model"]["lstm_size"], num_layers=config["model"]["num_lstm_layers"], output_size=1, dropout=config["model"]["dropout"]) 244 | model = model.to(config["training"]["device"]) 245 | 246 | criterion = nn.MSELoss() 247 | optimizer = optim.Adam(model.parameters(), lr=config["training"]["learning_rate"], betas=(0.9, 0.98), eps=1e-9) 248 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config["training"]["scheduler_step_size"], gamma=0.1) 249 | 250 | for epoch in range(config["training"]["num_epoch"]): 251 | loss_train, lr_train = run_epoch(train_dataloader, is_training=True) 252 | loss_val, lr_val = run_epoch(val_dataloader) 253 | scheduler.step() 254 | 255 | print('Epoch[{}/{}] | loss train:{:.6f}, test:{:.6f} | lr:{:.6f}' 256 | .format(epoch+1, config["training"]["num_epoch"], loss_train, loss_val, lr_train)) 257 | pass 258 | # here we re-initialize dataloader so the data doesn't shuffled, so we can plot the values by date 259 | 260 | train_dataloader = DataLoader(dataset_train, batch_size=config["training"]["batch_size"], shuffle=False) 261 | val_dataloader = DataLoader(dataset_val, batch_size=config["training"]["batch_size"], shuffle=False) 262 | 263 | model.eval() 264 | 265 | # predict on the training data, to see how well the model managed to learn and memorize 266 | 267 | predicted_train = np.array([]) 268 | 269 | for idx, (x, y) in enumerate(train_dataloader): 270 | x = x.to(config["training"]["device"]) 271 | out = model(x) 272 | out = out.cpu().detach().numpy() 273 | predicted_train = np.concatenate((predicted_train, out)) 274 | 275 | # predict on the validation data, to see how the model does 276 | 277 | predicted_val = np.array([]) 278 | 279 | for idx, (x, y) in enumerate(val_dataloader): 280 | x = x.to(config["training"]["device"]) 281 | out = model(x) 282 | out = out.cpu().detach().numpy() 283 | predicted_val = np.concatenate((predicted_val, out)) 284 | 285 | # prepare data for plotting 286 | 287 | to_plot_data_y_train_pred = np.zeros(num_data_points) 288 | to_plot_data_y_val_pred = np.zeros(num_data_points) 289 | 290 | to_plot_data_y_train_pred[config["data"]["window_size"]:split_index+config["data"]["window_size"]] = scaler.inverse_transform(predicted_train) 291 | to_plot_data_y_val_pred[split_index+config["data"]["window_size"]:] = scaler.inverse_transform(predicted_val) 292 | 293 | to_plot_data_y_train_pred = np.where(to_plot_data_y_train_pred == 0, None, to_plot_data_y_train_pred) 294 | to_plot_data_y_val_pred = np.where(to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred) 295 | 296 | # plots 297 | 298 | fig = figure(figsize=(25, 5), dpi=80) 299 | fig.patch.set_facecolor((1.0, 1.0, 1.0)) 300 | plt.plot(data_date, data_close_price, label="Actual prices", color=config["plots"]["color_actual"]) 301 | plt.plot(data_date, to_plot_data_y_train_pred, label="Predicted prices (train)", color=config["plots"]["color_pred_train"]) 302 | plt.plot(data_date, to_plot_data_y_val_pred, label="Predicted prices (validation)", color=config["plots"]["color_pred_val"]) 303 | plt.title("Compare predicted prices to actual prices") 304 | xticks = [data_date[i] if ((i%config["plots"]["xticks_interval"]==0 and (num_data_points-i) > config["plots"]["xticks_interval"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice 305 | x = np.arange(0,len(xticks)) 306 | plt.xticks(x, xticks, rotation='vertical') 307 | plt.grid(b=None, which='major', axis='y', linestyle='--') 308 | plt.legend() 309 | plt.show() 310 | 311 | 312 | 313 | # prepare data for plotting the zoomed in view of the predicted prices (on validation set) vs. actual prices 314 | 315 | to_plot_data_y_val_subset = scaler.inverse_transform(data_y_val) 316 | to_plot_predicted_val = scaler.inverse_transform(predicted_val) 317 | to_plot_data_date = data_date[split_index+config["data"]["window_size"]:] 318 | 319 | # plots 320 | 321 | fig = figure(figsize=(25, 5), dpi=80) 322 | fig.patch.set_facecolor((1.0, 1.0, 1.0)) 323 | plt.plot(to_plot_data_date, to_plot_data_y_val_subset, label="Actual prices", color=config["plots"]["color_actual"]) 324 | plt.plot(to_plot_data_date, to_plot_predicted_val, label="Predicted prices (validation)", color=config["plots"]["color_pred_val"]) 325 | plt.title("Zoom in to examine predicted price on validation data portion") 326 | xticks = [to_plot_data_date[i] if ((i%int(config["plots"]["xticks_interval"]/5)==0 and (len(to_plot_data_date)-i) > config["plots"]["xticks_interval"]/6) or i==len(to_plot_data_date)-1) else None for i in range(len(to_plot_data_date))] # make x ticks nice 327 | xs = np.arange(0,len(xticks)) 328 | plt.xticks(xs, xticks, rotation='vertical') 329 | plt.grid(b=None, which='major', axis='y', linestyle='--') 330 | plt.legend() 331 | plt.show() 332 | 333 | 334 | # predict the closing price of the next trading day 335 | 336 | model.eval() 337 | 338 | x = torch.tensor(data_x_unseen).float().to(config["training"]["device"]).unsqueeze(0).unsqueeze(2) # this is the data type and shape required, [batch, sequence, feature] 339 | prediction = model(x) 340 | prediction = prediction.cpu().detach().numpy() 341 | 342 | # prepare plots 343 | 344 | plot_range = 10 345 | to_plot_data_y_val = np.zeros(plot_range) 346 | to_plot_data_y_val_pred = np.zeros(plot_range) 347 | to_plot_data_y_test_pred = np.zeros(plot_range) 348 | 349 | to_plot_data_y_val[:plot_range-1] = scaler.inverse_transform(data_y_val)[-plot_range+1:] 350 | to_plot_data_y_val_pred[:plot_range-1] = scaler.inverse_transform(predicted_val)[-plot_range+1:] 351 | 352 | to_plot_data_y_test_pred[plot_range-1] = scaler.inverse_transform(prediction) 353 | 354 | to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val) 355 | to_plot_data_y_val_pred = np.where(to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred) 356 | to_plot_data_y_test_pred = np.where(to_plot_data_y_test_pred == 0, None, to_plot_data_y_test_pred) 357 | 358 | # plot 359 | 360 | plot_date_test = data_date[-plot_range+1:] 361 | plot_date_test.append("tomorrow") 362 | 363 | fig = figure(figsize=(25, 5), dpi=80) 364 | fig.patch.set_facecolor((1.0, 1.0, 1.0)) 365 | plt.plot(plot_date_test, to_plot_data_y_val, label="Actual prices", marker=".", markersize=10, color=config["plots"]["color_actual"]) 366 | plt.plot(plot_date_test, to_plot_data_y_val_pred, label="Past predicted prices", marker=".", markersize=10, color=config["plots"]["color_pred_val"]) 367 | plt.plot(plot_date_test, to_plot_data_y_test_pred, label="Predicted price for next day", marker=".", markersize=20, color=config["plots"]["color_pred_test"]) 368 | plt.title("Predicted close price of the next trading day") 369 | plt.grid(b=None, which='major', axis='y', linestyle='--') 370 | plt.legend() 371 | plt.show() 372 | 373 | print("Predicted close price of the next trading day:", round(to_plot_data_y_test_pred[plot_range-1], 2)) 374 | -------------------------------------------------------------------------------- /mystock.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "\n", 11 | "import torch\n", 12 | "import torch.nn as nn\n", 13 | "import torch.nn.functional as F\n", 14 | "import torch.optim as optim\n", 15 | "from torch.utils.data import Dataset\n", 16 | "from torch.utils.data import DataLoader\n", 17 | "\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "from matplotlib.pyplot import figure\n", 20 | "\n", 21 | "from alpha_vantage.timeseries import TimeSeries \n", 22 | "\n", 23 | "print(\"All libraries loaded\")\n", 24 | "\n", 25 | "config = {\n", 26 | " \"alpha_vantage\": {\n", 27 | " \"key\": \"demo\", # you can use the demo API key for this project, but please make sure to get your own API key at https://www.alphavantage.co/support/#api-key\n", 28 | " \"symbol\": \"IBM\",\n", 29 | " \"outputsize\": \"full\",\n", 30 | " \"key_adjusted_close\": \"5. adjusted close\",\n", 31 | " },\n", 32 | " \"data\": {\n", 33 | " \"window_size\": 20,\n", 34 | " \"train_split_size\": 0.80,\n", 35 | " }, \n", 36 | " \"plots\": {\n", 37 | " \"xticks_interval\": 90, # show a date every 90 days\n", 38 | " \"color_actual\": \"#001f3f\",\n", 39 | " \"color_train\": \"#3D9970\",\n", 40 | " \"color_val\": \"#0074D9\",\n", 41 | " \"color_pred_train\": \"#3D9970\",\n", 42 | " \"color_pred_val\": \"#0074D9\",\n", 43 | " \"color_pred_test\": \"#FF4136\",\n", 44 | " },\n", 45 | " \"model\": {\n", 46 | " \"input_size\": 1, # since we are only using 1 feature, close price\n", 47 | " \"num_lstm_layers\": 2,\n", 48 | " \"lstm_size\": 32,\n", 49 | " \"dropout\": 0.2,\n", 50 | " },\n", 51 | " \"training\": {\n", 52 | " \"device\": \"cpu\", # \"cuda\" or \"cpu\"\n", 53 | " \"batch_size\": 64,\n", 54 | " \"num_epoch\": 100,\n", 55 | " \"learning_rate\": 0.01,\n", 56 | " \"scheduler_step_size\": 40,\n", 57 | " }\n", 58 | "}\n", 59 | "def download_data(config):\n", 60 | " ts = TimeSeries(key='demo') #you can use the demo API key for this project, but please make sure to eventually get your own API key at https://www.alphavantage.co/support/#api-key. \n", 61 | " data, meta_data = ts.get_daily_adjusted(config[\"alpha_vantage\"][\"symbol\"], outputsize=config[\"alpha_vantage\"][\"outputsize\"])\n", 62 | "\n", 63 | " data_date = [date for date in data.keys()]\n", 64 | " data_date.reverse()\n", 65 | "\n", 66 | " data_close_price = [float(data[date][config[\"alpha_vantage\"][\"key_adjusted_close\"]]) for date in data.keys()]\n", 67 | " data_close_price.reverse()\n", 68 | " data_close_price = np.array(data_close_price)\n", 69 | "\n", 70 | " num_data_points = len(data_date)\n", 71 | " display_date_range = \"from \" + data_date[0] + \" to \" + data_date[num_data_points-1]\n", 72 | " print(\"Number data points\", num_data_points, display_date_range)\n", 73 | "\n", 74 | " return data_date, data_close_price, num_data_points, display_date_range\n", 75 | "\n", 76 | "data_date, data_close_price, num_data_points, display_date_range = download_data(config)\n", 77 | "\n", 78 | "# plot\n", 79 | "\n", 80 | "fig = figure(figsize=(25, 5), dpi=80)\n", 81 | "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n", 82 | "plt.plot(data_date, data_close_price, color=config[\"plots\"][\"color_actual\"])\n", 83 | "xticks = [data_date[i] if ((i%config[\"plots\"][\"xticks_interval\"]==0 and (num_data_points-i) > config[\"plots\"][\"xticks_interval\"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice\n", 84 | "x = np.arange(0,len(xticks))\n", 85 | "plt.xticks(x, xticks, rotation='vertical')\n", 86 | "plt.title(\"Daily close price for \" + config[\"alpha_vantage\"][\"symbol\"] + \", \" + display_date_range)\n", 87 | "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n", 88 | "plt.show()\n", 89 | "\n", 90 | "class Normalizer():\n", 91 | " def __init__(self):\n", 92 | " self.mu = None\n", 93 | " self.sd = None\n", 94 | "\n", 95 | " def fit_transform(self, x):\n", 96 | " self.mu = np.mean(x, axis=(0), keepdims=True)\n", 97 | " self.sd = np.std(x, axis=(0), keepdims=True)\n", 98 | " normalized_x = (x - self.mu)/self.sd\n", 99 | " return normalized_x\n", 100 | "\n", 101 | " def inverse_transform(self, x):\n", 102 | " return (x*self.sd) + self.mu\n", 103 | "\n", 104 | "# normalize\n", 105 | "scaler = Normalizer()\n", 106 | "normalized_data_close_price = scaler.fit_transform(data_close_price)\n", 107 | "def prepare_data_x(x, window_size):\n", 108 | " # perform windowing\n", 109 | " n_row = x.shape[0] - window_size + 1\n", 110 | " output = np.lib.stride_tricks.as_strided(x, shape=(n_row, window_size), strides=(x.strides[0], x.strides[0]))\n", 111 | " return output[:-1], output[-1]\n", 112 | "\n", 113 | "\n", 114 | "def prepare_data_y(x, window_size):\n", 115 | " # # perform simple moving average\n", 116 | " # output = np.convolve(x, np.ones(window_size), 'valid') / window_size\n", 117 | "\n", 118 | " # use the next day as label\n", 119 | " output = x[window_size:]\n", 120 | " return output\n", 121 | "\n", 122 | "data_x, data_x_unseen = prepare_data_x(normalized_data_close_price, window_size=config[\"data\"][\"window_size\"])\n", 123 | "data_y = prepare_data_y(normalized_data_close_price, window_size=config[\"data\"][\"window_size\"])\n", 124 | "\n", 125 | "# split dataset\n", 126 | "\n", 127 | "split_index = int(data_y.shape[0]*config[\"data\"][\"train_split_size\"])\n", 128 | "data_x_train = data_x[:split_index]\n", 129 | "data_x_val = data_x[split_index:]\n", 130 | "data_y_train = data_y[:split_index]\n", 131 | "data_y_val = data_y[split_index:]\n", 132 | "\n", 133 | "# prepare data for plotting\n", 134 | "\n", 135 | "to_plot_data_y_train = np.zeros(num_data_points)\n", 136 | "to_plot_data_y_val = np.zeros(num_data_points)\n", 137 | "\n", 138 | "to_plot_data_y_train[config[\"data\"][\"window_size\"]:split_index+config[\"data\"][\"window_size\"]] = scaler.inverse_transform(data_y_train)\n", 139 | "to_plot_data_y_val[split_index+config[\"data\"][\"window_size\"]:] = scaler.inverse_transform(data_y_val)\n", 140 | "\n", 141 | "to_plot_data_y_train = np.where(to_plot_data_y_train == 0, None, to_plot_data_y_train)\n", 142 | "to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val)\n", 143 | "\n", 144 | "## plots\n", 145 | "\n", 146 | "fig = figure(figsize=(25, 5), dpi=80)\n", 147 | "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n", 148 | "plt.plot(data_date, to_plot_data_y_train, label=\"Prices (train)\", color=config[\"plots\"][\"color_train\"])\n", 149 | "plt.plot(data_date, to_plot_data_y_val, label=\"Prices (validation)\", color=config[\"plots\"][\"color_val\"])\n", 150 | "xticks = [data_date[i] if ((i%config[\"plots\"][\"xticks_interval\"]==0 and (num_data_points-i) > config[\"plots\"][\"xticks_interval\"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice\n", 151 | "x = np.arange(0,len(xticks))\n", 152 | "plt.xticks(x, xticks, rotation='vertical')\n", 153 | "plt.title(\"Daily close prices for \" + config[\"alpha_vantage\"][\"symbol\"] + \" - showing training and validation data\")\n", 154 | "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n", 155 | "plt.legend()\n", 156 | "plt.show()\n", 157 | "\n", 158 | "class TimeSeriesDataset(Dataset):\n", 159 | " def __init__(self, x, y):\n", 160 | " x = np.expand_dims(x, 2) # in our case, we have only 1 feature, so we need to convert `x` into [batch, sequence, features] for LSTM\n", 161 | " self.x = x.astype(np.float32)\n", 162 | " self.y = y.astype(np.float32)\n", 163 | " \n", 164 | " def __len__(self):\n", 165 | " return len(self.x)\n", 166 | "\n", 167 | " def __getitem__(self, idx):\n", 168 | " return (self.x[idx], self.y[idx])\n", 169 | "\n", 170 | "dataset_train = TimeSeriesDataset(data_x_train, data_y_train)\n", 171 | "dataset_val = TimeSeriesDataset(data_x_val, data_y_val)\n", 172 | "\n", 173 | "print(\"Train data shape\", dataset_train.x.shape, dataset_train.y.shape)\n", 174 | "print(\"Validation data shape\", dataset_val.x.shape, dataset_val.y.shape)\n", 175 | "\n", 176 | "train_dataloader = DataLoader(dataset_train, batch_size=config[\"training\"][\"batch_size\"], shuffle=True)\n", 177 | "val_dataloader = DataLoader(dataset_val, batch_size=config[\"training\"][\"batch_size\"], shuffle=True)\n", 178 | "class LSTMModel(nn.Module):\n", 179 | " def __init__(self, input_size=1, hidden_layer_size=32, num_layers=2, output_size=1, dropout=0.2):\n", 180 | " super().__init__()\n", 181 | " self.hidden_layer_size = hidden_layer_size\n", 182 | "\n", 183 | " self.linear_1 = nn.Linear(input_size, hidden_layer_size)\n", 184 | " self.relu = nn.ReLU()\n", 185 | " self.lstm = nn.LSTM(hidden_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers, batch_first=True)\n", 186 | " self.dropout = nn.Dropout(dropout)\n", 187 | " self.linear_2 = nn.Linear(num_layers*hidden_layer_size, output_size)\n", 188 | " \n", 189 | " self.init_weights()\n", 190 | "\n", 191 | " def init_weights(self):\n", 192 | " for name, param in self.lstm.named_parameters():\n", 193 | " if 'bias' in name:\n", 194 | " nn.init.constant_(param, 0.0)\n", 195 | " elif 'weight_ih' in name:\n", 196 | " nn.init.kaiming_normal_(param)\n", 197 | " elif 'weight_hh' in name:\n", 198 | " nn.init.orthogonal_(param)\n", 199 | "\n", 200 | " def forward(self, x):\n", 201 | " batchsize = x.shape[0]\n", 202 | "\n", 203 | " # layer 1\n", 204 | " x = self.linear_1(x)\n", 205 | " x = self.relu(x)\n", 206 | " \n", 207 | " # LSTM layer\n", 208 | " lstm_out, (h_n, c_n) = self.lstm(x)\n", 209 | "\n", 210 | " # reshape output from hidden cell into [batch, features] for `linear_2`\n", 211 | " x = h_n.permute(1, 0, 2).reshape(batchsize, -1) \n", 212 | " \n", 213 | " # layer 2\n", 214 | " x = self.dropout(x)\n", 215 | " predictions = self.linear_2(x)\n", 216 | " return predictions[:,-1]\n", 217 | " pass\n", 218 | "def run_epoch(dataloader, is_training=False):\n", 219 | " epoch_loss = 0\n", 220 | "\n", 221 | " if is_training:\n", 222 | " model.train()\n", 223 | " else:\n", 224 | " model.eval()\n", 225 | "\n", 226 | " for idx, (x, y) in enumerate(dataloader):\n", 227 | " if is_training:\n", 228 | " optimizer.zero_grad()\n", 229 | "\n", 230 | " batchsize = x.shape[0]\n", 231 | "\n", 232 | " x = x.to(config[\"training\"][\"device\"])\n", 233 | " y = y.to(config[\"training\"][\"device\"])\n", 234 | "\n", 235 | " out = model(x)\n", 236 | " loss = criterion(out.contiguous(), y.contiguous())\n", 237 | "\n", 238 | " if is_training:\n", 239 | " loss.backward()\n", 240 | " optimizer.step()\n", 241 | "\n", 242 | " epoch_loss += (loss.detach().item() / batchsize)\n", 243 | "\n", 244 | " lr = scheduler.get_last_lr()[0]\n", 245 | "\n", 246 | " return epoch_loss, lr\n", 247 | "\n", 248 | "train_dataloader = DataLoader(dataset_train, batch_size=config[\"training\"][\"batch_size\"], shuffle=True)\n", 249 | "val_dataloader = DataLoader(dataset_val, batch_size=config[\"training\"][\"batch_size\"], shuffle=True)\n", 250 | "\n", 251 | "model = LSTMModel(input_size=config[\"model\"][\"input_size\"], hidden_layer_size=config[\"model\"][\"lstm_size\"], num_layers=config[\"model\"][\"num_lstm_layers\"], output_size=1, dropout=config[\"model\"][\"dropout\"])\n", 252 | "model = model.to(config[\"training\"][\"device\"])\n", 253 | "\n", 254 | "criterion = nn.MSELoss()\n", 255 | "optimizer = optim.Adam(model.parameters(), lr=config[\"training\"][\"learning_rate\"], betas=(0.9, 0.98), eps=1e-9)\n", 256 | "scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config[\"training\"][\"scheduler_step_size\"], gamma=0.1)\n", 257 | "\n", 258 | "for epoch in range(config[\"training\"][\"num_epoch\"]):\n", 259 | " loss_train, lr_train = run_epoch(train_dataloader, is_training=True)\n", 260 | " loss_val, lr_val = run_epoch(val_dataloader)\n", 261 | " scheduler.step()\n", 262 | " \n", 263 | " print('Epoch[{}/{}] | loss train:{:.6f}, test:{:.6f} | lr:{:.6f}'\n", 264 | " .format(epoch+1, config[\"training\"][\"num_epoch\"], loss_train, loss_val, lr_train))\n", 265 | " pass\n", 266 | "# here we re-initialize dataloader so the data doesn't shuffled, so we can plot the values by date\n", 267 | "\n", 268 | "train_dataloader = DataLoader(dataset_train, batch_size=config[\"training\"][\"batch_size\"], shuffle=False)\n", 269 | "val_dataloader = DataLoader(dataset_val, batch_size=config[\"training\"][\"batch_size\"], shuffle=False)\n", 270 | "\n", 271 | "model.eval()\n", 272 | "\n", 273 | "# predict on the training data, to see how well the model managed to learn and memorize\n", 274 | "\n", 275 | "predicted_train = np.array([])\n", 276 | "\n", 277 | "for idx, (x, y) in enumerate(train_dataloader):\n", 278 | " x = x.to(config[\"training\"][\"device\"])\n", 279 | " out = model(x)\n", 280 | " out = out.cpu().detach().numpy()\n", 281 | " predicted_train = np.concatenate((predicted_train, out))\n", 282 | "\n", 283 | "# predict on the validation data, to see how the model does\n", 284 | "\n", 285 | "predicted_val = np.array([])\n", 286 | "\n", 287 | "for idx, (x, y) in enumerate(val_dataloader):\n", 288 | " x = x.to(config[\"training\"][\"device\"])\n", 289 | " out = model(x)\n", 290 | " out = out.cpu().detach().numpy()\n", 291 | " predicted_val = np.concatenate((predicted_val, out))\n", 292 | "\n", 293 | "# prepare data for plotting\n", 294 | "\n", 295 | "to_plot_data_y_train_pred = np.zeros(num_data_points)\n", 296 | "to_plot_data_y_val_pred = np.zeros(num_data_points)\n", 297 | "\n", 298 | "to_plot_data_y_train_pred[config[\"data\"][\"window_size\"]:split_index+config[\"data\"][\"window_size\"]] = scaler.inverse_transform(predicted_train)\n", 299 | "to_plot_data_y_val_pred[split_index+config[\"data\"][\"window_size\"]:] = scaler.inverse_transform(predicted_val)\n", 300 | "\n", 301 | "to_plot_data_y_train_pred = np.where(to_plot_data_y_train_pred == 0, None, to_plot_data_y_train_pred)\n", 302 | "to_plot_data_y_val_pred = np.where(to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred)\n", 303 | "\n", 304 | "# plots\n", 305 | "\n", 306 | "fig = figure(figsize=(25, 5), dpi=80)\n", 307 | "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n", 308 | "plt.plot(data_date, data_close_price, label=\"Actual prices\", color=config[\"plots\"][\"color_actual\"])\n", 309 | "plt.plot(data_date, to_plot_data_y_train_pred, label=\"Predicted prices (train)\", color=config[\"plots\"][\"color_pred_train\"])\n", 310 | "plt.plot(data_date, to_plot_data_y_val_pred, label=\"Predicted prices (validation)\", color=config[\"plots\"][\"color_pred_val\"])\n", 311 | "plt.title(\"Compare predicted prices to actual prices\")\n", 312 | "xticks = [data_date[i] if ((i%config[\"plots\"][\"xticks_interval\"]==0 and (num_data_points-i) > config[\"plots\"][\"xticks_interval\"]) or i==num_data_points-1) else None for i in range(num_data_points)] # make x ticks nice\n", 313 | "x = np.arange(0,len(xticks))\n", 314 | "plt.xticks(x, xticks, rotation='vertical')\n", 315 | "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n", 316 | "plt.legend()\n", 317 | "plt.show()\n", 318 | "\n", 319 | "\n", 320 | "\n", 321 | "# prepare data for plotting the zoomed in view of the predicted prices (on validation set) vs. actual prices\n", 322 | "\n", 323 | "to_plot_data_y_val_subset = scaler.inverse_transform(data_y_val)\n", 324 | "to_plot_predicted_val = scaler.inverse_transform(predicted_val)\n", 325 | "to_plot_data_date = data_date[split_index+config[\"data\"][\"window_size\"]:]\n", 326 | "\n", 327 | "# plots\n", 328 | "\n", 329 | "fig = figure(figsize=(25, 5), dpi=80)\n", 330 | "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n", 331 | "plt.plot(to_plot_data_date, to_plot_data_y_val_subset, label=\"Actual prices\", color=config[\"plots\"][\"color_actual\"])\n", 332 | "plt.plot(to_plot_data_date, to_plot_predicted_val, label=\"Predicted prices (validation)\", color=config[\"plots\"][\"color_pred_val\"])\n", 333 | "plt.title(\"Zoom in to examine predicted price on validation data portion\")\n", 334 | "xticks = [to_plot_data_date[i] if ((i%int(config[\"plots\"][\"xticks_interval\"]/5)==0 and (len(to_plot_data_date)-i) > config[\"plots\"][\"xticks_interval\"]/6) or i==len(to_plot_data_date)-1) else None for i in range(len(to_plot_data_date))] # make x ticks nice\n", 335 | "xs = np.arange(0,len(xticks))\n", 336 | "plt.xticks(xs, xticks, rotation='vertical')\n", 337 | "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n", 338 | "plt.legend()\n", 339 | "plt.show()\n", 340 | "\n", 341 | "\n", 342 | "# predict the closing price of the next trading day\n", 343 | "\n", 344 | "model.eval()\n", 345 | "\n", 346 | "x = torch.tensor(data_x_unseen).float().to(config[\"training\"][\"device\"]).unsqueeze(0).unsqueeze(2) # this is the data type and shape required, [batch, sequence, feature]\n", 347 | "prediction = model(x)\n", 348 | "prediction = prediction.cpu().detach().numpy()\n", 349 | "\n", 350 | "# prepare plots\n", 351 | "\n", 352 | "plot_range = 10\n", 353 | "to_plot_data_y_val = np.zeros(plot_range)\n", 354 | "to_plot_data_y_val_pred = np.zeros(plot_range)\n", 355 | "to_plot_data_y_test_pred = np.zeros(plot_range)\n", 356 | "\n", 357 | "to_plot_data_y_val[:plot_range-1] = scaler.inverse_transform(data_y_val)[-plot_range+1:]\n", 358 | "to_plot_data_y_val_pred[:plot_range-1] = scaler.inverse_transform(predicted_val)[-plot_range+1:]\n", 359 | "\n", 360 | "to_plot_data_y_test_pred[plot_range-1] = scaler.inverse_transform(prediction)\n", 361 | "\n", 362 | "to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val)\n", 363 | "to_plot_data_y_val_pred = np.where(to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred)\n", 364 | "to_plot_data_y_test_pred = np.where(to_plot_data_y_test_pred == 0, None, to_plot_data_y_test_pred)\n", 365 | "\n", 366 | "# plot\n", 367 | "\n", 368 | "plot_date_test = data_date[-plot_range+1:]\n", 369 | "plot_date_test.append(\"tomorrow\")\n", 370 | "\n", 371 | "fig = figure(figsize=(25, 5), dpi=80)\n", 372 | "fig.patch.set_facecolor((1.0, 1.0, 1.0))\n", 373 | "plt.plot(plot_date_test, to_plot_data_y_val, label=\"Actual prices\", marker=\".\", markersize=10, color=config[\"plots\"][\"color_actual\"])\n", 374 | "plt.plot(plot_date_test, to_plot_data_y_val_pred, label=\"Past predicted prices\", marker=\".\", markersize=10, color=config[\"plots\"][\"color_pred_val\"])\n", 375 | "plt.plot(plot_date_test, to_plot_data_y_test_pred, label=\"Predicted price for next day\", marker=\".\", markersize=20, color=config[\"plots\"][\"color_pred_test\"])\n", 376 | "plt.title(\"Predicted close price of the next trading day\")\n", 377 | "plt.grid(visible=None, which='major', axis='y', linestyle='--')\n", 378 | "plt.legend()\n", 379 | "plt.show()\n", 380 | "\n", 381 | "print(\"Predicted close price of the next trading day:\", round(to_plot_data_y_test_pred[plot_range-1], 2))\n", 382 | " " 383 | ] 384 | } 385 | ], 386 | "metadata": { 387 | "kernelspec": { 388 | "display_name": "myenv", 389 | "language": "python", 390 | "name": "python3" 391 | }, 392 | "language_info": { 393 | "codemirror_mode": { 394 | "name": "ipython", 395 | "version": 3 396 | }, 397 | "file_extension": ".py", 398 | "mimetype": "text/x-python", 399 | "name": "python", 400 | "nbconvert_exporter": "python", 401 | "pygments_lexer": "ipython3", 402 | "version": "3.11.4" 403 | }, 404 | "orig_nbformat": 4 405 | }, 406 | "nbformat": 4, 407 | "nbformat_minor": 2 408 | } 409 | --------------------------------------------------------------------------------