├── README.md └── StocksImageCNN.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # StocksPredict-ImageCNN 2 | Predict the direction of stock prices with visual representation of charts and CNNs 3 | 4 |
5 | For a more detailed description, check out my blog post: 6 | 7 | [Can an ML model read stock charts and predict prices?](https://towardsdatascience.com/can-an-ml-model-read-stock-charts-and-predict-prices-fb73c551c7a4) 8 | -------------------------------------------------------------------------------- /StocksImageCNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import math\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "import os\n", 13 | "import yfinance as yf\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import matplotlib.gridspec as gridspec\n", 16 | "%matplotlib inline\n", 17 | "from pandas.plotting import register_matplotlib_converters\n", 18 | "register_matplotlib_converters()\n", 19 | "\n", 20 | "from keras.models import Sequential\n", 21 | "from keras import layers\n", 22 | "from keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "ticker = yf.Ticker(\"RELIANCE.NS\")\n", 32 | "data = ticker.history(\n", 33 | " # period = \"20y\",\n", 34 | " start = '2002-01-01',\n", 35 | " end = '2019-12-31',\n", 36 | " interval = \"1d\")\n", 37 | "\n", 38 | "data.sort_values('Date', inplace=True, ascending=True)\n", 39 | "# data.reset_index(inplace = True)\n", 40 | "data = data[data['Volume']>0] #To filter out garbage values\n", 41 | "data.drop(['Dividends', 'Stock Splits'], axis = 1, inplace=True)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# Feature engineering\n", 51 | "data['Change'] = 100*((data['Close']-data['Close'].shift(1))/data['Close'].shift(1))\n", 52 | "data['Signal'] = 2\n", 53 | "data['Signal'] = np.where(data['Change']>= 1, 3, data['Signal'])\n", 54 | "data['Signal'] = np.where(data['Change']>= 2.5, 4, data['Signal'])\n", 55 | "data['Signal'] = np.where(data['Change']<=-1, 1, data['Signal'])\n", 56 | "data['Signal'] = np.where(data['Change']<=-2.5, 0, data['Signal'])\n", 57 | "data = data[1:] # Drop first row because NAN\n", 58 | "data.head()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "def createImage(df, height_multiplier, min_val, max_val):\n", 68 | " image_width = len(df)\n", 69 | " image_height = height_multiplier*image_width\n", 70 | " image = np.zeros((image_height, image_width))\n", 71 | " factor = image_height/(max_val-min_val)\n", 72 | " for i in range(len(df)):\n", 73 | " if(df.Open.iloc[i]<=df.Close.iloc[i]):\n", 74 | " candle_width = max(int((df.Close.iloc[i] - df.Open.iloc[i])*factor),1)\n", 75 | " start = int((max_val - df.Close.iloc[i])*factor)\n", 76 | " image[start:start+candle_width,i] = 128\n", 77 | " else:\n", 78 | " candle_width = max(int((df.Open.iloc[i] - df.Close.iloc[i])*factor),1)\n", 79 | " start = int((max_val - df.Open.iloc[i])*factor)\n", 80 | " image[start:start+candle_width,i] = 255\n", 81 | " return image" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "def dataGenerator(df, timestep, window, height_multiplier=5, batch_size=16):\n", 91 | " c = 0\n", 92 | " while(True):\n", 93 | " image_width = timestep\n", 94 | " image_height = height_multiplier*image_width\n", 95 | "\n", 96 | " img = np.zeros((batch_size, image_height, image_width, 1)).astype('float')\n", 97 | " y = np.zeros((batch_size))\n", 98 | "\n", 99 | " for i in range(c, c+batch_size):\n", 100 | " # Create slice of dataframe and Retrieve image\n", 101 | " data = df[window+i:window+i+timestep]\n", 102 | " max_val = df[i:window+i+timestep].High.max()\n", 103 | " min_val = df[i:window+i+timestep].Low.min()\n", 104 | " image = createImage(data, height_multiplier, min_val, max_val)\n", 105 | "\n", 106 | " # Get prediction\n", 107 | " pred = df.Signal.iloc[window+i+timestep]\n", 108 | "\n", 109 | " # Add to respective batch sized arrays\n", 110 | " image = image.reshape(image.shape[0], image.shape[1], 1)\n", 111 | " img[i-c] = image\n", 112 | " y[i-c] = pred\n", 113 | "\n", 114 | " c+=batch_size\n", 115 | " if(c + batch_size+window+timestep >= len(df)):\n", 116 | " c=0\n", 117 | " yield img, y" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "model = Sequential([\n", 127 | " layers.Conv2D(8, 3, padding='same', activation='relu', input_shape=(int(timestep*height_multiplier), timestep, 1)),\n", 128 | " layers.MaxPooling2D(),\n", 129 | " layers.Conv2D(16, 3, padding='same', activation='relu'),\n", 130 | " layers.MaxPooling2D(),\n", 131 | " layers.Flatten(),\n", 132 | " layers.Dropout(0.2),\n", 133 | " layers.Dense(64, activation='relu'),\n", 134 | " layers.Dense(5, activation='softmax')\n", 135 | "])\n", 136 | "\n", 137 | "model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n", 138 | "\n", 139 | "model.summary()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "batch_size = 64\n", 149 | "timestep = 50\n", 150 | "height_multiplier = 2\n", 151 | "window = int(timestep*1)\n", 152 | "\n", 153 | "split_test = int(len(data)*0.95)\n", 154 | "split_val = int(len(data)*0.85)\n", 155 | "\n", 156 | "train_gen = dataGenerator(data[:split_val], timestep, window, height_multiplier, batch_size)\n", 157 | "val_gen = dataGenerator(data[split_val:split_test], timestep, window, height_multiplier, batch_size)\n", 158 | "test_gen = dataGenerator(data[split_test:], timestep, window, height_multiplier, batch_size)\n", 159 | "\n", 160 | "history = model.fit(x=train_gen,\n", 161 | " validation_data=val_gen,\n", 162 | " epochs=15,\n", 163 | " steps_per_epoch = split_val // batch_size,\n", 164 | " validation_steps = (split_test-split_val) // batch_size,\n", 165 | " shuffle=False,\n", 166 | " verbose=True)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "plt.plot(history.history['loss'], label='train')\n", 176 | "plt.plot(history.history['val_loss'], label='val')\n", 177 | "plt.legend()\n", 178 | "plt.show()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "print(model.evaluate(test_gen, steps = (len(data)-split_test)//batch_size))" 188 | ] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.7.6" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 4 212 | } 213 | --------------------------------------------------------------------------------