├── .gitignore ├── CNN Deeplearning v2.ipynb ├── CNN Deeplearning.ipynb ├── CNN_Deeplearning_mattijs.ipynb ├── CNN_Deeplearning_walkforward.ipynb ├── CoinBase.ipynb ├── DL_for_LOB_Sirignano.ipynb ├── Data_Filtering.ipynb ├── DepthAnalysis.ipynb ├── LICENSE ├── README.md ├── WalkForwardV2.ipynb ├── Walkthrough.ipynb └── paper └── main.tex /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # Latex 107 | *.aux 108 | *.fdb_latexmk 109 | *.fls 110 | paper/*.pdf 111 | *synctex* 112 | *.toc 113 | *.dvi 114 | 115 | scraped_data* -------------------------------------------------------------------------------- /CNN Deeplearning v2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "%matplotlib inline\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import seaborn as sns\n", 15 | "import gc\n", 16 | "from tqdm import tqdm\n", 17 | "from sklearn.preprocessing import StandardScaler\n", 18 | "from sklearn.metrics import classification_report" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "paths = 'coinbase_btc_usd/coinbase/btc_usd/l2_snapshots/100ms/'" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "register = {}" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "l2_snapshot = pd.DataFrame()" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 5, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "(566480, 200)\n", 58 | "(533024, 200)\n", 59 | "(517431, 200)\n", 60 | "(529814, 200)\n", 61 | "(537967, 200)\n", 62 | "(511850, 200)\n", 63 | "(545960, 200)\n", 64 | "(561058, 200)\n", 65 | "(535563, 200)\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "count = 0\n", 71 | "day = pd.DataFrame()\n", 72 | "i = 0\n", 73 | "for x in os.listdir(paths):\n", 74 | " path = paths+x\n", 75 | " temp = pd.read_parquet(path)\n", 76 | " if count%24 == 0:\n", 77 | " day = pd.read_parquet(path)\n", 78 | " else:\n", 79 | " day = pd.concat([day,temp])\n", 80 | " \n", 81 | " i+=1\n", 82 | " count +=1\n", 83 | "\n", 84 | " if count%24 == 0:\n", 85 | " flag = not register\n", 86 | " day = day.dropna()\n", 87 | " print(day.shape)\n", 88 | " result = []\n", 89 | " for cols in day.columns:\n", 90 | " values = day[cols].to_numpy()\n", 91 | " mean = np.mean(values)\n", 92 | " std = np.std(values)\n", 93 | " result.append([cols,[mean,std]])\n", 94 | " register[count/24] = result\n", 95 | " if not flag:\n", 96 | " for l in range(200):\n", 97 | " prev_stat = register[(count/24)-1]\n", 98 | " col_name = prev_stat[l][0]\n", 99 | " col_mean = prev_stat[l][1][0]\n", 100 | " col_std = prev_stat[l][1][1]\n", 101 | " values = day[col_name].to_numpy()\n", 102 | " values = (values - col_mean)/col_std\n", 103 | " day[col_name] = values\n", 104 | " l2_snapshot = pd.concat([day,l2_snapshot])\n", 105 | " del day\n", 106 | " \n", 107 | " if i>240:\n", 108 | " break" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "l2_snapshot_ = l2_snapshot[['b1','b2','b3','b4','b5','b6','b7','b8', 'b9', 'b10', 'a1','a2','a3','a4','a5','a6','a7','a8', 'a9', 'a10', 'bq1','bq2','bq3','bq4','bq5','bq6','bq7','bq8', 'bq9', 'bq10', 'aq1','aq2','aq3','aq4','aq5','aq6','aq7','aq8', 'aq9', 'aq10']]" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 7, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "def generate_features_labels(df,ask, bid,k=20, alpha = 10e-5):\n", 127 | " df_ = df.copy()\n", 128 | " df_['mid_price'] = (df_[ask].to_numpy()+df_[bid].to_numpy())/2\n", 129 | " df_['target'] = 1\n", 130 | " index = df_.columns.get_loc('mid_price')\n", 131 | " target_index = df_.columns.get_loc('target')\n", 132 | " shape = df_.shape[0]\n", 133 | " y = df_['target'].to_numpy()\n", 134 | " for i in tqdm(range(k,shape-k)):\n", 135 | " if i==k:\n", 136 | " m_b = np.mean(df_.iloc[(i-k):i, index].to_numpy())\n", 137 | " m_a = np.mean(df_.iloc[i+1:(i+k+1), index].to_numpy())\n", 138 | " val = df_.iloc[i-k,index]\n", 139 | " valB = df_.iloc[i+1, index]\n", 140 | " else:\n", 141 | " curr_val = df_.iloc[i-1,index]\n", 142 | " curr_valB = df_.iloc[i+k, index]\n", 143 | " m_b = (m_b*k+(curr_val)-(val))/k\n", 144 | " m_a = (m_a*k+(curr_valB - valB))/k\n", 145 | " val = df_.iloc[(i-k),index]\n", 146 | " valB = df_.iloc[(i+1), index]\n", 147 | "\n", 148 | " if (m_b > m_a*(1+alpha)):\n", 149 | " y[i] = 2\n", 150 | " if (m_b < m_a*(1-alpha)):\n", 151 | " y[i] = 0\n", 152 | "\n", 153 | " y = y[k:shape-k]\n", 154 | " X = df.iloc[k:shape-k,:].to_numpy()\n", 155 | " return X,y" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stderr", 165 | "output_type": "stream", 166 | "text": [ 167 | "100%|██████████| 4272627/4272627 [02:53<00:00, 24654.56it/s]\n", 168 | "100%|██████████| 4272627/4272627 [02:55<00:00, 24407.77it/s]\n", 169 | "100%|██████████| 4272627/4272627 [03:02<00:00, 23402.77it/s]\n", 170 | "100%|██████████| 4272627/4272627 [03:02<00:00, 23407.06it/s]\n", 171 | "100%|██████████| 4272627/4272627 [02:57<00:00, 24138.81it/s]\n", 172 | "100%|██████████| 4272627/4272627 [02:56<00:00, 24214.54it/s]\n", 173 | "100%|██████████| 4272627/4272627 [02:54<00:00, 24419.84it/s]\n", 174 | "100%|██████████| 4272627/4272627 [02:55<00:00, 24315.06it/s]\n", 175 | "100%|██████████| 4272627/4272627 [03:00<00:00, 23713.46it/s]\n", 176 | "100%|██████████| 4272627/4272627 [02:55<00:00, 24276.69it/s]\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "# GridSearch\n", 182 | "min_ = float('inf')\n", 183 | "minI = 0\n", 184 | "for i in np.logspace(0,-8,10, endpoint = True):\n", 185 | " X,y = generate_features_labels(l2_snapshot, ask='a1', bid = 'b1', alpha=i)\n", 186 | " y_ = pd.Series(y)\n", 187 | " temp = y_.value_counts()\n", 188 | " a,b,c = temp[0], temp[1], temp[2]\n", 189 | " sum_ = a+b+c\n", 190 | " l2 = ((1/3-(a/sum_))*100)**2 + ((1/3-(b/sum_))*100)**2 + ((1/3-(c/sum_))*100)**2\n", 191 | " if l2 < min_:\n", 192 | " min_ = l2\n", 193 | " minI = i\n", 194 | " del X,y,temp" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 9, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "7.742636826811277e-08 347.51453285264193\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "print(minI, min_)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 10, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "name": "stderr", 221 | "output_type": "stream", 222 | "text": [ 223 | "100%|██████████| 4272627/4272627 [02:55<00:00, 24396.68it/s]\n" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "X,y = generate_features_labels(l2_snapshot_, ask='a1', bid = 'b1', alpha=minI)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 11, 234 | "metadata": { 235 | "scrolled": true 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "y = y.astype('int')" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 12, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "" 251 | ] 252 | }, 253 | "execution_count": 12, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | }, 257 | { 258 | "data": { 259 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAD8CAYAAADezxtfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAF8pJREFUeJzt3X+s3XWd5/HnyyKuO4oUucsytGwZ7UxS2dkqDTbraliZKYXsWjToQnZsZRqqEWY1O5kVZ5OtQUl0HSWDq0wwdGiNA7KgQzcp22mQ0cxkilyU8FOGK8rSptJOi+AMI07xvX+cz9XT673tbem9H+A+H8k353ve38/n8/2c3MAr3+/59HtSVUiS1MPLek9AkjR3GUKSpG4MIUlSN4aQJKkbQ0iS1I0hJEnqxhCSJHVjCEmSujGEJEndHNN7Ai90J554Yi1atKj3NCTpReXuu+/+u6oaOVQ7Q+gQFi1axOjoaO9pSNKLSpLHptPO23GSpG4MIUlSN4aQJKkbQ0iS1I0hJEnqxhCSJHVjCEmSupmxEEqyMMkdSR5M8kCSD7X6CUm2JXmkvc5v9SS5OslYknuTvGlorDWt/SNJ1gzVz0hyX+tzdZIc6TkkSbNvJq+E9gO/X1VLgOXApUmWAJcDt1fVYuD29h7gXGBx29YB18AgUID1wJuBM4H146HS2lwy1G9lqx/WOSRJfczYExOqahewq+3/OMlDwCnAKuCs1mwj8JfAR1p9U1UVsD3J8UlObm23VdU+gCTbgJVJ/hI4rqq2t/om4HzgtsM9R5vr83bGH2w6GsPoEO7+9OreU5B0lMzKd0JJFgFvBO4EThr6n/4PgZPa/inA40PddrTaweo7JqlzBOeQJHUw4yGU5FXALcCHq+rp4WPtiqRm8vxHco4k65KMJhnds2fPDM1MkjSjIZTk5QwC6MtV9dVWfqLdZqO97m71ncDCoe4LWu1g9QWT1I/kHAeoqmurallVLRsZOeRDYCVJR2gmV8cFuA54qKo+O3RoMzC+wm0NcOtQfXVbwbYceKrdUtsKrEgyvy1IWAFsbceeTrK8nWv1hLEO5xySpA5m8qcc3gK8F7gvyT2t9ofAJ4GbkqwFHgPe045tAc4DxoBngIsBqmpfko8Dd7V2V4wvUgA+CFwPvJLBgoTbWv2wziFJ6mMmV8f9FZApDp89SfsCLp1irA3Ahknqo8Dpk9T3Hu45JEmzzycmSJK6MYQkSd0YQpKkbgwhSVI3hpAkqRtDSJLUjSEkSerGEJIkdWMISZK6MYQkSd0YQpKkbgwhSVI3hpAkqRtDSJLUjSEkSerGEJIkdWMISZK6mbEQSrIhye4k9w/VvpLknrb9YPxnv5MsSvKPQ8f+ZKjPGUnuSzKW5OokafUTkmxL8kh7nd/qae3Gktyb5E1DY61p7R9JsmamPrskaXpm8kroemDlcKGq/lNVLa2qpcAtwFeHDn9v/FhVfWCofg1wCbC4beNjXg7cXlWLgdvbe4Bzh9qua/1JcgKwHngzcCawfjy4JEl9zFgIVdU3gX2THWtXM+8BbjjYGElOBo6rqu1VVcAm4Px2eBWwse1vnFDfVAPbgePbOOcA26pqX1U9CWxjQkhKkmZXr++E3go8UVWPDNVOS/KdJN9I8tZWOwXYMdRmR6sBnFRVu9r+D4GThvo8Pkmfqeq/JMm6JKNJRvfs2XOYH02SNF29QugiDrwK2gWcWlVvBP4r8GdJjpvuYO0qqY7W5Krq2qpaVlXLRkZGjtawkqQJZj2EkhwDvAv4ynitqp6tqr1t/27ge8CvAzuBBUPdF7QawBPtNtv4bbvdrb4TWDhJn6nqkqROelwJ/Rbw3ar6+W22JCNJ5rX9X2OwqODRdrvt6STL2/dIq4FbW7fNwPgKtzUT6qvbKrnlwFNtnK3AiiTz24KEFa0mSerkmJkaOMkNwFnAiUl2AOur6jrgQn55QcLbgCuS/BPwM+ADVTW+qOGDDFbavRK4rW0AnwRuSrIWeIzBQgeALcB5wBjwDHAxQFXtS/Jx4K7W7oqhc0iSOpixEKqqi6aov2+S2i0MlmxP1n4UOH2S+l7g7EnqBVw6xVgbgA0Hm7ckafb4xARJUjeGkCSpG0NIktSNISRJ6sYQkiR1YwhJkroxhCRJ3RhCkqRuDCFJUjeGkCSpG0NIktSNISRJ6sYQkiR1YwhJkroxhCRJ3RhCkqRuZiyEkmxIsjvJ/UO1jyXZmeSetp03dOyjScaSPJzknKH6ylYbS3L5UP20JHe2+leSHNvqr2jvx9rxRYc6hySpj5m8EroeWDlJ/aqqWtq2LQBJljD42e83tD5fSDIvyTzg88C5wBLgotYW4FNtrNcDTwJrW30t8GSrX9XaTXmOo/yZJUmHYcZCqKq+CeybZvNVwI1V9WxVfR8YA85s21hVPVpVPwVuBFYlCfB24ObWfyNw/tBYG9v+zcDZrf1U55AkddLjO6HLktzbbtfNb7VTgMeH2uxotanqrwV+VFX7J9QPGKsdf6q1n2osSVInsx1C1wCvA5YCu4DPzPL5pyXJuiSjSUb37NnTezqS9JI1qyFUVU9U1XNV9TPgi/zidthOYOFQ0wWtNlV9L3B8kmMm1A8Yqx1/TWs/1ViTzfPaqlpWVctGRkaO5KNKkqZhVkMoyclDb98JjK+c2wxc2Fa2nQYsBr4F3AUsbivhjmWwsGBzVRVwB3BB678GuHVorDVt/wLg6639VOeQJHVyzKGbHJkkNwBnAScm2QGsB85KshQo4AfA+wGq6oEkNwEPAvuBS6vquTbOZcBWYB6woaoeaKf4CHBjkk8A3wGua/XrgC8lGWOwMOLCQ51DktRHBhcJmsqyZctqdHR0Wm3P+INNMzwbAdz96dW9pyDpEJLcXVXLDtXOJyZIkroxhCRJ3RhCkqRuDCFJUjeGkCSpG0NIktSNISRJ6sYQkiR1YwhJkroxhCRJ3czYs+OkF5v/d8W/7j2Fl7xT/8d9vaegFxivhCRJ3RhCkqRuDCFJUjeGkCSpG0NIktSNISRJ6mbGQijJhiS7k9w/VPt0ku8muTfJ15Ic3+qLkvxjknva9idDfc5Icl+SsSRXJ0mrn5BkW5JH2uv8Vk9rN9bO86ahsda09o8kWTNTn12SND0zeSV0PbByQm0bcHpV/Sbwt8BHh459r6qWtu0DQ/VrgEuAxW0bH/Ny4PaqWgzc3t4DnDvUdl3rT5ITgPXAm4EzgfXjwSVJ6mPGQqiqvgnsm1D7i6ra395uBxYcbIwkJwPHVdX2qipgE3B+O7wK2Nj2N06ob6qB7cDxbZxzgG1Vta+qnmQQiBNDUpI0i3p+J/S7wG1D709L8p0k30jy1lY7Bdgx1GZHqwGcVFW72v4PgZOG+jw+SZ+p6pKkTro8tifJfwf2A19upV3AqVW1N8kZwJ8necN0x6uqSlJHcX7rGNzK49RTTz1aw0qSJpj1K6Ek7wP+A/Cf2y02qurZqtrb9u8Gvgf8OrCTA2/ZLWg1gCfabbbx23a7W30nsHCSPlPVf0lVXVtVy6pq2cjIyBF+UknSocxqCCVZCfw34B1V9cxQfSTJvLb/awwWFTzabrc9nWR5WxW3Gri1ddsMjK9wWzOhvrqtklsOPNXG2QqsSDK/LUhY0WqSpE5m7HZckhuAs4ATk+xgsDLto8ArgG1tpfX2thLubcAVSf4J+BnwgaoaX9TwQQYr7V7J4Duk8e+RPgnclGQt8BjwnlbfApwHjAHPABcDVNW+JB8H7mrtrhg6hySpgxkLoaq6aJLydVO0vQW4ZYpjo8Dpk9T3AmdPUi/g0inG2gBsmHrWkqTZ5BMTJEndGEKSpG6mFUJJbp9OTZKkw3HQ74SS/DPgnzNYXDAfSDt0HP5DT0nS83SohQnvBz4M/CpwN78IoaeB/zWD85IkzQEHDaGq+mPgj5P8XlV9bpbmJEmaI6a1RLuqPpfk3wKLhvtU1aYZmpckaQ6YVggl+RLwOuAe4LlWHn+qtSRJR2S6/1h1GbBk/FlvkiQdDdP9d0L3A/9yJiciSZp7pnsldCLwYJJvAc+OF6vqHTMyK0nSnDDdEPrYTE5CkjQ3TXd13DdmeiKSpLlnuqvjfsxgNRzAscDLgX+oquNmamKSpJe+6V4JvXp8v/243Cpg+UxNSpI0Nxz2U7Rr4M+Bc2ZgPpKkOWS6t+PeNfT2ZQz+3dBPZmRGkqQ5Y7pXQv9xaDsH+DGDW3IHlWRDkt1J7h+qnZBkW5JH2uv8Vk+Sq5OMJbk3yZuG+qxp7R9JsmaofkaS+1qfq9utwiM6hyRp9k0rhKrq4qHtkqq6sqp2T6Pr9cDKCbXLgdurajFwe3sPcC6wuG3rgGtgECjAeuDNwJnA+vFQaW0uGeq38kjOIUnqY7o/arcgydfaVc3uJLckWXCoflX1TWDfhPIqYGPb3wicP1Tf1L5z2g4cn+RkBlde26pqX1U9CWwDVrZjx1XV9vY4oU0Txjqcc0iSOpju7bg/BTYz+F2hXwX+T6sdiZOqalfb/yFwUts/BXh8qN2OVjtYfcck9SM5xwGSrEsymmR0z549h/HRJEmHY7ohNFJVf1pV+9t2PTDyfE/ermBm9KGoR3KOqrq2qpZV1bKRkef9MSVJU5huCO1N8jtJ5rXtd4C9R3jOJ8ZvgbXX8e+WdgILh9otaLWD1RdMUj+Sc0iSOphuCP0u8B4Gt7Z2ARcA7zvCc24Gxle4rQFuHaqvbivYlgNPtVtqW4EVSea3BQkrgK3t2NNJlrdVcasnjHU455AkdTDdB5heAaxpCwPGV6z9EYNwmlKSG4CzgBOT7GCwyu2TwE1J1gKPMQg3gC3AecAY8AxwMUBV7UvyceCu8blU1fhihw8yWIH3SuC2tnG455Ak9THdEPrN8QCCnwfDGw/VqaoumuLQ2ZO0LeDSKcbZAGyYpD4KnD5Jfe/hnkOSNPumezvuZUP/Nmf8Smi6ASZJ0qSmGySfAf4myf9u798NXDkzU5IkzRXTfYr2piSjwNtb6V1V9eDMTUuSNBdM+5ZaCx2DR5J01Pi9jqSXhLd87i29p/CS99e/99dHfczD/j0hSZKOFkNIktSNISRJ6sYQkiR1YwhJkroxhCRJ3RhCkqRuDCFJUjeGkCSpG0NIktSNISRJ6mbWQyjJbyS5Z2h7OsmHk3wsyc6h+nlDfT6aZCzJw0nOGaqvbLWxJJcP1U9LcmerfyXJsa3+ivZ+rB1fNJufXZJ0oFkPoap6uKqWVtVS4AwGP7P9tXb4qvFjVbUFIMkS4ELgDcBK4AtJ5iWZB3weOBdYAlzU2gJ8qo31euBJYG2rrwWebPWrWjtJUie9b8edDXyvqh47SJtVwI1V9WxVfR8YA85s21hVPVpVPwVuBFYlCYPfPbq59d8InD801sa2fzNwdmsvSeqgdwhdCNww9P6yJPcm2TD0c+KnAI8PtdnRalPVXwv8qKr2T6gfMFY7/lRrL0nqoFsIte9p3gGM/2T4NcDrgKXALgY/Kd5FknVJRpOM7tmzp9c0JOklr+eV0LnAt6vqCYCqeqKqnquqnwFfZHC7DWAnsHCo34JWm6q+Fzg+yTET6geM1Y6/prU/QFVdW1XLqmrZyMjI8/6gkqTJ9Qyhixi6FZfk5KFj7wTub/ubgQvbyrbTgMXAt4C7gMVtJdyxDG7tba6qAu4ALmj91wC3Do21pu1fAHy9tZckddDl572T/Arw28D7h8r/M8lSoIAfjB+rqgeS3AQ8COwHLq2q59o4lwFbgXnAhqp6oI31EeDGJJ8AvgNc1+rXAV9KMgbsYxBckqROuoRQVf0DExYEVNV7D9L+SuDKSepbgC2T1B/lF7fzhus/Ad59BFOWJM2A3qvjJElzmCEkSerGEJIkdWMISZK6MYQkSd0YQpKkbgwhSVI3hpAkqRtDSJLUjSEkSerGEJIkdWMISZK6MYQkSd0YQpKkbgwhSVI3hpAkqRtDSJLUTbcQSvKDJPcluSfJaKudkGRbkkfa6/xWT5Krk4wluTfJm4bGWdPaP5JkzVD9jDb+WOubg51DkjT7el8J/fuqWlpVy9r7y4Hbq2oxcHt7D3AusLht64BrYBAowHrgzQx+znv9UKhcA1wy1G/lIc4hSZplvUNoolXAxra/ETh/qL6pBrYDxyc5GTgH2FZV+6rqSWAbsLIdO66qtldVAZsmjDXZOSRJs6xnCBXwF0nuTrKu1U6qql1t/4fASW3/FODxob47Wu1g9R2T1A92DknSLDum47n/XVXtTPIvgG1Jvjt8sKoqSc3kBKY6RwvFdQCnnnrqTE5Bkua0bldCVbWzve4GvsbgO50n2q002uvu1nwnsHCo+4JWO1h9wSR1DnKO4bldW1XLqmrZyMjI8/mYkqSD6BJCSX4lyavH94EVwP3AZmB8hdsa4Na2vxlY3VbJLQeearfUtgIrksxvCxJWAFvbsaeTLG+r4lZPGGuyc0iSZlmv23EnAV9rq6aPAf6sqv5vkruAm5KsBR4D3tPabwHOA8aAZ4CLAapqX5KPA3e1dldU1b62/0HgeuCVwG1tA/jkFOeQJM2yLiFUVY8C/2aS+l7g7EnqBVw6xVgbgA2T1EeB06d7DknS7HuhLdGWJM0hhpAkqRtDSJLUjSEkSerGEJIkdWMISZK6MYQkSd0YQpKkbgwhSVI3hpAkqRtDSJLUjSEkSerGEJIkdWMISZK6MYQkSd0YQpKkbgwhSVI3sx5CSRYmuSPJg0keSPKhVv9Ykp1J7mnbeUN9PppkLMnDSc4Zqq9stbEklw/VT0tyZ6t/Jcmxrf6K9n6sHV80e59ckjRRjyuh/cDvV9USYDlwaZIl7dhVVbW0bVsA2rELgTcAK4EvJJmXZB7weeBcYAlw0dA4n2pjvR54Eljb6muBJ1v9qtZOktTJrIdQVe2qqm+3/R8DDwGnHKTLKuDGqnq2qr4PjAFntm2sqh6tqp8CNwKrkgR4O3Bz678ROH9orI1t/2bg7NZektRB1++E2u2wNwJ3ttJlSe5NsiHJ/FY7BXh8qNuOVpuq/lrgR1W1f0L9gLHa8ada+4nzWpdkNMnonj17ntdnlCRNrVsIJXkVcAvw4ap6GrgGeB2wFNgFfKbX3Krq2qpaVlXLRkZGek1Dkl7yuoRQkpczCKAvV9VXAarqiap6rqp+BnyRwe02gJ3AwqHuC1ptqvpe4Pgkx0yoHzBWO/6a1l6S1EGP1XEBrgMeqqrPDtVPHmr2TuD+tr8ZuLCtbDsNWAx8C7gLWNxWwh3LYPHC5qoq4A7ggtZ/DXDr0Fhr2v4FwNdbe0lSB8ccuslR9xbgvcB9Se5ptT9ksLptKVDAD4D3A1TVA0luAh5ksLLu0qp6DiDJZcBWYB6woaoeaON9BLgxySeA7zAIPdrrl5KMAfsYBJckqZNZD6Gq+itgshVpWw7S50rgyknqWybrV1WP8ovbecP1nwDvPpz5SpJmjk9MkCR1YwhJkroxhCRJ3RhCkqRuDCFJUjeGkCSpG0NIktSNISRJ6sYQkiR1YwhJkroxhCRJ3RhCkqRuDCFJUjeGkCSpG0NIktSNISRJ6mZOhlCSlUkeTjKW5PLe85GkuWrOhVCSecDngXOBJQx+VnxJ31lJ0tw050KIwc9+j1XVo1X1U+BGYFXnOUnSnDQXQ+gU4PGh9ztaTZI0y47pPYEXoiTrgHXt7d8nebjnfGbYicDf9Z7E4cgfrek9hReSF9ffb316z+CF5MX1twPyXw7r7/evptNoLobQTmDh0PsFrfZzVXUtcO1sTqqXJKNVtaz3PHRk/Pu9ePm3G5iLt+PuAhYnOS3JscCFwObOc5KkOWnOXQlV1f4klwFbgXnAhqp6oPO0JGlOmnMhBFBVW4AtvefxAjEnbju+hPn3e/HybwekqnrPQZI0R83F74QkSS8QhtAc5uOLXrySbEiyO8n9veeiw5NkYZI7kjyY5IEkH+o9p568HTdHtccX/S3w2wz+we5dwEVV9WDXiWlakrwN+HtgU1Wd3ns+mr4kJwMnV9W3k7wauBs4f67+t+eV0Nzl44texKrqm8C+3vPQ4auqXVX17bb/Y+Ah5vBTWwyhucvHF0mdJVkEvBG4s+9M+jGEJKmDJK8CbgE+XFVP955PL4bQ3HXIxxdJmhlJXs4ggL5cVV/tPZ+eDKG5y8cXSR0kCXAd8FBVfbb3fHozhOaoqtoPjD++6CHgJh9f9OKR5Abgb4DfSLIjydrec9K0vQV4L/D2JPe07bzek+rFJdqSpG68EpIkdWMISZK6MYQkSd0YQpKkbgwhSVI3hpAkqRtDSJLUjSEkSerm/wOK4jfcO5/QBAAAAABJRU5ErkJggg==\n", 260 | "text/plain": [ 261 | "
" 262 | ] 263 | }, 264 | "metadata": { 265 | "needs_background": "light" 266 | }, 267 | "output_type": "display_data" 268 | } 269 | ], 270 | "source": [ 271 | "sns.countplot(x = y)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 13, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "T = 100\n", 281 | "D = 40\n", 282 | "N = len(X) - T" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 14, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "trainPart = 3176046" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 15, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "X_train = np.zeros((trainPart, T, D), dtype = 'float16')\n", 301 | "y_train = np.zeros(trainPart, dtype = 'float16')\n", 302 | "\n", 303 | "#Preparing the time series data using timestep of 100 and no of features = 200\n", 304 | "for t in range(trainPart):\n", 305 | " X_train[t, :, :] = X[t:t+T]\n", 306 | " y_train[t] = y[t+T] \n" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 16, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "name": "stdout", 316 | "output_type": "stream", 317 | "text": [ 318 | "X_train - Before: 23.663386702537537 GB float16\n" 319 | ] 320 | } 321 | ], 322 | "source": [ 323 | "print('X_train - Before: {} GB'.format(X_train.nbytes/1024**3), X_train.dtype)" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 17, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "y_train - Before: 0.005915846675634384 GB float16\n" 336 | ] 337 | } 338 | ], 339 | "source": [ 340 | "print('y_train - Before: {} GB'.format(y_train.nbytes/1024**3), y_train.dtype)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 18, 346 | "metadata": {}, 347 | "outputs": [ 348 | { 349 | "name": "stdout", 350 | "output_type": "stream", 351 | "text": [ 352 | "X_test - Before: 8.169420063495636 GB float16\n", 353 | "y_test - Before: 0.002042355015873909 GB float16\n" 354 | ] 355 | } 356 | ], 357 | "source": [ 358 | "X_test = np.zeros((N - trainPart, T, D), dtype='float16')\n", 359 | "y_test = np.zeros(N - trainPart, dtype='float16')\n", 360 | "\n", 361 | "#Preparing the time series data using timestep of 100 and no of features = 40\n", 362 | "for k in range(N - trainPart):\n", 363 | " t = k + trainPart\n", 364 | " X_test[k, :, :] = X[t:t+T]\n", 365 | " y_test[k] = y[t+T]\n", 366 | "\n", 367 | "print('X_test - Before: {} GB'.format(X_test.nbytes/1024**3), X_test.dtype)\n", 368 | "print('y_test - Before: {} GB'.format(y_test.nbytes/1024**3), y_test.dtype)" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 19, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "2.1.0\n" 381 | ] 382 | } 383 | ], 384 | "source": [ 385 | "import tensorflow as tf\n", 386 | "print(tf.__version__)" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 20, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "from tensorflow.keras.layers import Input, Conv1D, Conv2D, Flatten, MaxPooling1D, MaxPooling2D, Dense, LeakyReLU, Bidirectional\n", 396 | "from tensorflow.keras.models import Model\n", 397 | "from tensorflow.keras.optimizers import SGD, Adam\n", 398 | "from tensorflow.compat.v1.keras.layers import CuDNNLSTM" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 21, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "X_train = np.expand_dims(X_train, -1)\n", 408 | "X_test = np.expand_dims(X_test, -1)" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 22, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "i = Input(shape=X_train[0].shape)\n", 418 | "x = Conv2D(16, (4,D), activation=LeakyReLU(alpha=0.01))(i)\n", 419 | "x = tf.keras.layers.Reshape(target_shape=(T-3,16))(x)\n", 420 | "x = Conv1D(16, 4, activation=LeakyReLU(alpha=0.01))(x)\n", 421 | "x = tf.keras.layers.BatchNormalization()(x)\n", 422 | "x = MaxPooling1D(2)(x)\n", 423 | "x = Conv1D(32, 3, activation=LeakyReLU(alpha=0.01))(x)\n", 424 | "x = tf.keras.layers.BatchNormalization()(x)\n", 425 | "x = MaxPooling1D(2)(x)\n", 426 | "x = Bidirectional(CuDNNLSTM(64, return_sequences = False))(x)\n", 427 | "x = Dense(32, activation=LeakyReLU(alpha=0.01))(x)\n", 428 | "x = Dense(32, activation=LeakyReLU(alpha=0.01))(x)\n", 429 | "x = Dense(3, activation='softmax')(x)\n", 430 | "\n", 431 | "model = Model(i,x)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 23, 437 | "metadata": {}, 438 | "outputs": [ 439 | { 440 | "name": "stdout", 441 | "output_type": "stream", 442 | "text": [ 443 | "Model: \"model\"\n", 444 | "_________________________________________________________________\n", 445 | "Layer (type) Output Shape Param # \n", 446 | "=================================================================\n", 447 | "input_1 (InputLayer) [(None, 100, 40, 1)] 0 \n", 448 | "_________________________________________________________________\n", 449 | "conv2d (Conv2D) (None, 97, 1, 16) 2576 \n", 450 | "_________________________________________________________________\n", 451 | "reshape (Reshape) (None, 97, 16) 0 \n", 452 | "_________________________________________________________________\n", 453 | "conv1d (Conv1D) (None, 94, 16) 1040 \n", 454 | "_________________________________________________________________\n", 455 | "batch_normalization (BatchNo (None, 94, 16) 64 \n", 456 | "_________________________________________________________________\n", 457 | "max_pooling1d (MaxPooling1D) (None, 47, 16) 0 \n", 458 | "_________________________________________________________________\n", 459 | "conv1d_1 (Conv1D) (None, 45, 32) 1568 \n", 460 | "_________________________________________________________________\n", 461 | "batch_normalization_1 (Batch (None, 45, 32) 128 \n", 462 | "_________________________________________________________________\n", 463 | "max_pooling1d_1 (MaxPooling1 (None, 22, 32) 0 \n", 464 | "_________________________________________________________________\n", 465 | "bidirectional (Bidirectional (None, 128) 50176 \n", 466 | "_________________________________________________________________\n", 467 | "dense (Dense) (None, 32) 4128 \n", 468 | "_________________________________________________________________\n", 469 | "dense_1 (Dense) (None, 32) 1056 \n", 470 | "_________________________________________________________________\n", 471 | "dense_2 (Dense) (None, 3) 99 \n", 472 | "=================================================================\n", 473 | "Total params: 60,835\n", 474 | "Trainable params: 60,739\n", 475 | "Non-trainable params: 96\n", 476 | "_________________________________________________________________\n" 477 | ] 478 | } 479 | ], 480 | "source": [ 481 | "model.summary()" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 24, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "model.compile(optimizer='adam',\n", 491 | " loss='sparse_categorical_crossentropy',\n", 492 | " metrics=['accuracy'])" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": 25, 498 | "metadata": {}, 499 | "outputs": [ 500 | { 501 | "name": "stdout", 502 | "output_type": "stream", 503 | "text": [ 504 | "Train on 3176046 samples, validate on 1096481 samples\n", 505 | "Epoch 1/4\n", 506 | "3176046/3176046 [==============================] - 1623s 511us/sample - loss: 0.4721 - accuracy: 0.8082 - val_loss: 0.4457 - val_accuracy: 0.8195\n", 507 | "Epoch 2/4\n", 508 | "3176046/3176046 [==============================] - 1616s 509us/sample - loss: 0.4309 - accuracy: 0.8272 - val_loss: 0.4443 - val_accuracy: 0.8223\n", 509 | "Epoch 3/4\n", 510 | "3176046/3176046 [==============================] - 1624s 511us/sample - loss: 0.4118 - accuracy: 0.8345 - val_loss: 0.4485 - val_accuracy: 0.8240\n", 511 | "Epoch 4/4\n", 512 | "3176046/3176046 [==============================] - 1621s 511us/sample - loss: 0.3971 - accuracy: 0.8399 - val_loss: 0.4598 - val_accuracy: 0.8224\n" 513 | ] 514 | } 515 | ], 516 | "source": [ 517 | "EPOCHS = 4\n", 518 | "BATCH_SIZE = 32\n", 519 | "r = model.fit(\n", 520 | " X_train, y_train,\n", 521 | " batch_size=BATCH_SIZE,\n", 522 | " epochs=EPOCHS,\n", 523 | " validation_data=(X_test, y_test),\n", 524 | ")" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 26, 530 | "metadata": { 531 | "scrolled": true 532 | }, 533 | "outputs": [ 534 | { 535 | "data": { 536 | "text/plain": [ 537 | "" 538 | ] 539 | }, 540 | "execution_count": 26, 541 | "metadata": {}, 542 | "output_type": "execute_result" 543 | }, 544 | { 545 | "data": { 546 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd8VGX2+PHPSYEkkARIqEkglCBSBCGAKDawdxfBiuKi2Mt3193F3xbbFnV3XRuuomJDRRR1saIi2CkBkaZCqAlFSJCEJKRMcn5/3BsYYpKZkExmkpz36zWvzG1zz52BOfOU+zyiqhhjjDG1CQt2AMYYY0KfJQtjjDE+WbIwxhjjkyULY4wxPlmyMMYY45MlC2OMMT5ZsjAhQ0RSRURFJMKPfSeJyJeNEZcxxpKFOUwisllESkUkscr6b90v/NTgRHYgjlYicreIrBeRQjfeGcGMy41hv4gUeD0e9/PYhSJyTaBj9EddkrppPixZmPrYBFxauSAig4CY4IVziDeA84DLgHhgMLAMGFt1R3E01v+Fc1W1rdfj5oZ4UfviNoFmycLUx0vAlV7LVwEveu8gIvEi8qKI7BaRLSLyp8ovZhEJF5F/iUiOiGwEzq7m2GdFZIeIbBORv4pIuK+gROQU4FTgfFVdqqoeVc1T1Wmq+qy7z0IR+ZuIfAUUAb1EpJuIzBWRPSKSKSLXer3mCBHJEJF8EflJRB5y10eJyEwRyRWRvSKyVEQ61/WNrKxWc9+Pn0Vkk4ic6W77G3A88Lh3acT9dX+TiKwH1rvrjnVjyHP/Hut1joUi8g8RWeJex/9EpIO77T0RuaVKTCtF5MI6XkdrEXlYRLa7j4dFpLW7LVFE3nXfpz0i8oXXv4U/uJ/xPhH5UUR+kdRNkKmqPexR5wewGTgF+BE4EggHsoEegAKp7n4vAv8DYoFUYB0w2d12PfADkAJ0ABa4x0a4298CngLaAJ2AJcB17rZJwJc1xHY/8JmP+BcCW4EBQAQQCXwOPAFEAUOA3cAYd/9vgInu87bAMe7z64B3cEpU4cAwIK6296yGbZOAMuBa93VuALYD4hXvNVWOUeBj972Ldv/+DEx0r+lSdznB6zW2AQPd93QOMNPdNgFY7PXag4FcoFU1saZ6f05Vtt0LLHI/r47A18B97rZ/AE+673UkTgIU4AggC+jm9fq9g/1v3B6HPqxkYeqrsnRxKvA9zpcR4JQcgEuAO1V1n6puBv6N82UGzhfUw6qapap7cL5MKo/tDJwF3K6qhaq6C/iP+3q+JAA7/NjveVVdo6oeoAtwHPAHVS1W1RXAMxwsOZUBfUQkUVULVHWR1/oEoI+qlqvqMlXNr+Wcb7u/rCsf13pt26KqT6tqOfAC0BXwVUr5h6ruUdX9OCWz9ar6kjqlqVdxkvG5Xvu/pKqrVbUQ+DMwwf2c5gJ9RSTN3W8i8Jqqlvo4f1WXA/eq6i5V3Q3cw8HPu8y9ph6qWqaqX6iqAuVAa6C/iESq6mZV3VDH85oAs2Rh6uslnHaBSVSpggIScX5BbvFatwVIcp93w/lF6b2tUg/32B2VX6w4pYxOfsSUi/Ol5Iv3ubsBe1R1Xw2xTgb6Aj+41TvnuOtfAuYBs9xqlwdFJLKWc16gqu28Hk97bdtZ+URVi9ynbet4DVuqbPe+hqr7b8F5jxNVtRh4DbjCrRq61L22uqoawxZ3HcA/gUzgIxHZKCJTAVQ1E7gduBvYJSKzRKQbJqRYsjD1oqpbcBq6zwLerLI5B+fXZA+vdd05WPrYgVMF5b2tUhZQgvNFVvnFGqeqA/wI6xNghIgk+wrf6/l2oIOIxFYXq6quV9VLcZLVA8AbItLG/YV8j6r2B44FzuHQdpyGUtPw0FWvoUeV7d7vN/zy/S7D+ZzAKc1cjtMJoEhVvzmMOKvG0N1dh1u6/K2q9sLpfPCbyrYJVX1FVUdzsBrzgcM4twkgSxamIUzGqdsv9F7pVqfMBv4mIrEi0gP4DTDT3WU2cKuIJItIe2Cq17E7gI+Af4tInIiEiUhvETnRVzCq+glOXf5bIjJMRCLc818vIr+u4ZgsnPr1f7iN1ke51zUTQESuEJGOqloB7HUPqxCRk0VkkFuVk4/z5Vvhx3tWVz8BvXzs8z5OVdJl7jVfDPQH3vXa5woR6S8iMTjtC2+4nxNucqjAqSr0p1TR2n2vKh9hwKvAn0Skozjdqv/CwffwHBHpIyIC5OFUP1WIyBEiMsZtCC8G9hOY99DUgyULU2+qukFVM2rYfAtQCGwEvgReAWa4257GqcL5DljOL0smVwKtgLU4DbVv4F/1EsBFOF+er+F8Ma0G0nFKHTW5FKdxdTtO4/pdbuIBOANYIyIFwCPAJW47QRc3rnycNpvPqP2L9h059D6Lt/y8nkeAi9yeUo9Wt4Oq5uKUbH6LUxX3e+AcVc3x2u0l4HmcKq8o4NYqL/MiMIiDCb02BThf7JWPMcBfgQxgJbAK53P9q7t/Gs77X4DTYeAJVV2A015xP04JZydO6e1OP85vGlFlTwtjTDMnIgtxej89U8s+VwJT3CohYw6wkoUxBgC3aupGYHqwYzGhx5KFMQYROR3nvpKfcKoKjTmEVUMZY4zxyUoWxhhjfGo2g48lJiZqampqsMMwxpgmZdmyZTmq2tHXfs0mWaSmppKRUVPvTWOMMdURkap3/VfLqqGMMcb4ZMnCGGOMT5YsjDHG+NRs2iyqU1ZWRnZ2NsXFxcEOpUmIiooiOTmZyMjaBk01xrREzTpZZGdnExsbS2pqKs7YZaYmqkpubi7Z2dn07Nkz2OEYY0JMs66GKi4uJiEhwRKFH0SEhIQEK4UZY6rVrJMFYImiDuy9MsbUpNknC1/KK5Tte/fjqbDh840xpiYtPlkUl5WTW1jK5pwiyisadpys3NxchgwZwpAhQ+jSpQtJSUkHlktL/Zva+Oqrr+bHH3+sdZ9p06bx8ssvN0TIxhhTrWbdwO2PNq0j6N4+mq17iti6p4geCTGENVB1TEJCAitWrADg7rvvpm3bttxxxx2H7KOqqCphYdXn7eeee87neW666ab6B2uMMbVo8SULgPiYViS1j2FfcRlZe4oI9Ei8mZmZ9O/fn8svv5wBAwawY8cOpkyZQnp6OgMGDODee+89sO/o0aNZsWIFHo+Hdu3aMXXqVAYPHsyoUaPYtWsXAH/60594+OGHD+w/depURowYwRFHHMHXX38NQGFhIePGjaN///5cdNFFpKenH0hkxhjjS0BLFiJyBs50kOHAM6p6fw37jcOZmnK4qmaIyOXA77x2OQoYqqqH/e12zztrWLs9v9Z9ysorKPVUEBEeRusI33m0f7c47jp3wGHF88MPP/Diiy+Snp4OwP3330+HDh3weDycfPLJXHTRRfTv3/+QY/Ly8jjxxBO5//77+c1vfsOMGTOYOnXqL15bVVmyZAlz587l3nvv5cMPP+Sxxx6jS5cuzJkzh++++46hQ4ceVtzGmJYpYCULdwL7acCZOJPGXyoi/avZLxa4DVhcuU5VX1bVIao6BJgIbKpPovBXZHgYkRFheMorKC0PbIN37969DyQKgFdffZWhQ4cydOhQvv/+e9auXfuLY6KjoznzzDMBGDZsGJs3b672tX/1q1/9Yp8vv/ySSy65BIDBgwczYMDhJTljTMsUyJLFCCBTVTcCiMgs4Hyg6rfgfcADHFqS8HYpMKu+wfhbAlBVduQVk1NQQpe4KDrFRdX31NVq06bNgefr16/nkUceYcmSJbRr144rrrii2vsdWrVqdeB5eHg4Ho+n2tdu3bq1z32MMaYuAtlmkQRkeS1nu+sOEJGhQIqqvlfL61wMvFrdBhGZIiIZIpKxe/fu+sZb+Zp0jY+ifUwrduY7SSPQ8vPziY2NJS4ujh07djBv3rwGP8dxxx3H7NmzAVi1alW1JRdjjKlJ0HpDiUgY8BAwqZZ9RgJFqrq6uu2qOh13cvn09PQGa5UWEZLaRx+4ByM8TGgf08r3gYdp6NCh9O/fn379+tGjRw+OO+64Bj/HLbfcwpVXXkn//v0PPOLj4xv8PMaY5ilgc3CLyCjgblU93V2+E0BV/+EuxwMbgAL3kC7AHuA8Vc1w9/kPsFtV/+7rfOnp6Vp18qPvv/+eI4888rCvoaJC2ZxbSGFJOT0SYoiLbroD7Hk8HjweD1FRUaxfv57TTjuN9evXExFx6O+F+r5nxpimRUSWqWq6r/0CWbJYCqSJSE9gG3AJcFnlRlXNAxIrl0VkIXCHV6IIAyYAxwcwxlqFhQk9EtqwKaeALXuK6JnQhrZRTfPWlIKCAsaOHYvH40FVeeqpp36RKIwxpiYB+7ZQVY+I3AzMw+k6O0NV14jIvUCGqs718RInAFmVDeTBEh4mpCa0YePuQjbnFtKrYxtiWjW9L9l27dqxbNmyYIdhjGmiAvqtp6rvA+9XWfeXGvY9qcryQuCYQMVWFxHhYfRMbMOGnAI25xTSq2NboiLDgx2WMcY0GruD20+REU7CQIRNOYWUesqDHZIxxjQaSxZ10DoinF6JbahQZWNOIWUBvnHPGGNChSWLOoqKDKdnQhs85cqmnEI8ljCMMS2AJYvDENM6gtSEGEo8FWzOrXlo84YYohxgxowZ7Ny588CyP8OWG2NMQ2p63XpCRNuoSLp3iGFrbiFbcgtJTWzzi6HN/Rmi3B8zZsxg6NChdOnSBfBv2HJjjGlIVrKoh/joSJLax1BQ4qnz0OYvvPACI0aMYMiQIdx4441UVFTg8XiYOHEigwYNYuDAgTz66KO89tprrFixgosvvvhAicSfYcvXr1/PyJEjGTRoEH/84x9p165doN4GY0wL0HJKFh9MhZ2rGvY1uwyiw5n3U1GhbM/bT/bP+0luH+1zLuvVq1fz1ltv8fXXXxMREcGUKVOYNWsWvXv3Jicnh1WrnDj37t1Lu3bteOyxx3j88ccZMmTIL16rpmHLb7nlFu644w7Gjx/P448/3rDXbYxpcaxk0QASY1vTOS6Kn4tK2ZFX7LOE8cknn7B06VLS09MZMmQIn332GRs2bKBPnz78+OOP3HrrrcybN8+vsZtqGrZ88eLFjBs3DoDLLruspsONMcYvLadkcWa18y41mE6xrSmvUHIKSggPEzrXMrS5qvLrX/+a++677xfbVq5cyQcffMC0adOYM2cO06dPr/W8/g5bbowx9WEliwbiPbT5Tz6GNj/llFOYPXs2OTk5gNNrauvWrezevRtVZfz48dx7770sX74cgNjYWPbt21eneEaMGMFbb70FwKxZ9Z4OxBjTwrWckkUjEBGS20dToe7Q5iK0b/PLoc0HDRrEXXfdxSmnnEJFRQWRkZE8+eSThIeHM3nyZFQVEeGBBx4AnK6y11xzDdHR0SxZssSvWB599FEmTpzIPffcw+mnn27DkRtj6iVgQ5Q3tkAMUX64vIc2754QQ3wQhjYvLCwkJiYGEWHmzJm89dZbzJkzx+dxNkS5MS1LKAxR3mIdHNq8kK17iuiZEEPbqMZNGEuXLuX222+noqKC9u3b270Zxph6sWQRIM7Q5jFszClkc25Row9tftJJJx24IdAYY+qr2TdwB7OarXJo84hwZ6Ta4rLQHqm2uVRJGmMaXrNOFlFRUeTm5gb1SzAyPIxe7lAgm3IKKQnRoc1VldzcXKKiau7ya4xpuZp1NVRycjLZ2dns3r072KFQVl5Bzr4Sdm4ROsa2Jjys9ru8gyEqKork5ORgh2GMCUEBTRYicgbwCM60qs+oarV3xonIOOANYLjXHNxHAU8BcUCFu624LuePjIykZ8+e9biChvVd1l4ue3oRSe2jeW3KqGq71RpjTCgKWDWUiIQD04Azgf7ApSLSv5r9YoHbgMVe6yKAmcD1qjoAOAkoC1SsjWVwSjueviqdzblFTHp+KQUldre1MaZpCGSbxQggU1U3qmopMAs4v5r97gMeALxLDacBK1X1OwBVzVXV0Kzsr6Njeycy7bKhrN6Wx5QXM0K+0dsY0wSUB/63dCCTRRKQ5bWc7a47QESGAimq+l6VY/sCKiLzRGS5iPy+uhOIyBQRyRCRjFBol/DXqf0786/xR/H1hlxuffVbm23PGFM35WWw6Qv46E8wbSR8UO1XZIMKWgO3iIQBDwGTqtkcAYwGhgNFwHz3LsP53jup6nRgOjh3cAc04AZ24dHJ5O/3cNfcNfx+zkr+ddFgwkKw0dsYEyL2/QSZn8D6ebBhAZTkQ3gr6HEcJA8P+OkDmSy2ASley8nuukqxwEBgoTv/Qxdgroich1MK+VxVcwBE5H1gKHBIsmjqrjo2lbz9ZTz08TrioiK569z+PufCMMa0EBUVsP1bJzms/8h5DhDbFQZcCGmnQa+ToHXbRgknkMliKZAmIj1xksQlwIGJFVQ1D0isXBaRhcAdqpohIhuA34tIDFAKnAj8J4CxBs0tY/qQv7+MZ77cRFx0JL85tW+wQzLGBMv+vbDhUyc5rP8YinJAwpySw5g/OwmiyyAIwo/KgCULVfWIyM3APJyuszNUdY2I3AtkqOrcWo79WUQewkk4CrxfTbtGsyAi/PHsI8kvLuPR+euJj45k8ujQ6e5rjAkgVdj1vZscPoKti0DLIbo99DkF0k6HPmMhpkOwI23eo842JeUVys2vLOeD1Tt58KKjmJCe4vsgY0zTU1oEmz4/mCDy3H5AXQY5ySHtNEhOh7DwRgnHRp1tYsLDhIcvGULBCxlMnbOSuKgIzhjYNdhhGWMaws+bYd1HTvvDpi+gvAQi20Dvk+GE30HaqRDXLdhR1sqSRQhpHRHOUxOHccUzi7n11RXMmBTJ6LRE3wcaY0KLpxS2fnOw9JCzzlnfoTcMn+yUHnocCxGtgxtnHVg1VAjKKyrj4unfsHVPETOvGcnQ7u2DHZIxxpd9Px1MDhsWQOm+g11b+7rVSwm9gx3lL/hbDWXJIkTt2lfMhCe/YU9hKa9dN4oju8YFOyRjjLeKcqc76zq3a+sOd/6Y2G7Q9zQnOfQ8sdG6th4uSxbNQNaeIsY/+Q2eCuWN60eRmtgm2CEZ07Lt/9np2rruI8j8GIpy3a6tIw4miM4Dg9K19XBZsmgmMnftY/yT3xDTKoI5NxxLl3ibb8KYRqMKu9Y6JYd1H0HWYrdrawena2vf06H3mJDo2nq4LFk0I6uy87j06UV0iY9i9nWj6GBDmxsTOKWFTtfWdfOcG+Pys531XY5ySg59T4ekYY3WtTXQLFk0M4s25nLVjCUc0SWWl68ZSWxUZLBDMqb52LPRSQzr5sHmL52ura3aOsNppLnVS3HNsyu73WfRzBzTK4EnLh/KdS8t49oXM3j+6hFERTaPXzbGNDpPKWz9+mCCyF3vrE/oA8Ovcdofuo9qUl1bA82SRRMy9sjO/HvCYG5/bQU3v7Kc/14xjMjwZj2NujENZ99Or66tCw92bU0d7SSItFNDsmtrqLBk0cScPySJ/GIPf357Nb9/YyX/Hm9DmxtTrYpy2LbMbZyeBztXOuvjkmDQRe6orSdCK+tl6A9LFk3QxGN6kL+/jH/O+5HYqAjuOW+ADW1uDEDRnoOjtmZ+crBra8pIGHuX27V1QJPq2hoqLFk0UTee1Ju8/WVM/3wjcVGR3HH6EcEOyZjGpwo/rXHnfPjY7dpa4XRtTTvVSQ5NvGtrqLBk0USJCHee2Y/8/WU8viCT+OhIrj2hV7DDMibwSgrcUVsru7a6c6p1HQzH/9YZuTVpaLPp2hoqLFk0YSLC3y4cxL4SD397/3vioiO4eHj3YIdlTMPL3eAkhvWVXVtLna6tvU+Gk6ZCn1ObbdfWUGHJookLDxP+M2EIBcUe7nxzFbFRkZw1yP7TmCbOUwJbvj7Yeyk301mfkAYjpjhVTN2PhQi7QbWx+EwWIvIg8FdgP/AhcBTwf6o6M8CxGT+1igjjySuGMfHZxdw261vatI7gxL4dgx2WMXWTv90tPXwEGxdCaQGEt3a6tlYmiA5W1RosPu/gFpEVqjpERC4EzgF+A3yuqoN9vrjIGcAjONOqPqOq99ew3zjgDWC4Owd3KvA98KO7yyJVvb62czX3O7j9kbe/jEunL2JTTiEzrxnBsB7WqGdCWEU5ZGe4pYd5sHOVsz4u2UkMfU+HnidY19YAa8g7uCv3ORt4XVXz/OmmKSLhwDTgVCAbWCoic1V1bZX9YoHbgMVVXmKDqg7xIz7jio+O5IVfj2DCU98w6bmlvDZlFP272dDmJoQU7YHM+U5yyPzEGcVVwp2urafc7fRe6tTfuraGIH+Sxbsi8gNONdQNItIRKPbjuBFApqpuBBCRWcD5wNoq+90HPAD8zu+oTY06xrZm5jUjuei/X3PljMXMvm4UvTqG9nj6phlThZ9WH5zzIXup07U1JsHptdTX7doabRN8hTqfyUJVp7rtFnmqWi4ihThf+r4kAVley9nASO8dRGQokKKq74lI1WTRU0S+BfKBP6nqF36c0wBJ7aJ5afJIJjz1DROfXcLr14+iW7voYIdlWoqSAqfNYf1HThvEvu3O+q6D4fg7nOqlbkdb19Ymxp8G7vHAh26i+BMwFKfBe2d9TiwiYcBDwKRqNu8AuqtqrogMA94WkQGqml/lNaYAUwC6d7cuo976dGrLi78ewaXTF3HFs4t5/bpRJLS1QdFMgORucEsP85xeTOWl0CrW6dqadprTBhHbJdhRmnrwp4F7paoeJSKjcZLEP4G/qOpIH8eNAu5W1dPd5TsBVPUf7nI8sAEocA/pAuwBzlPVjCqvtRC4o+p6b9bAXb0lm/Yw8dnFpHVuyyvXHkOcDW1uGoKnBLZ85UwItH6eM8Q3QGLfg0N6dx9lXVubgIZs4C53/54NTHerjP7qx3FLgTQR6QlsAy4BLqvcqKp5QKJXwAtxE4LbLrLHLc30AtKAjX6c01QxomcHnrxiGNe+mME1L2Tw4q9taHNTR2XFzl3S+dudobzXf+JUM5UVOl1bex4PI29wu7b2DHa0JkD8SRbbROQpnF5ND4hIa8DnuNiq6hGRm4F5OF1nZ6jqGhG5F8hQ1bm1HH4CcK+IlAEVwPWqusePWE01Tu7Xif9cPIRbZ33LjS8v56mJNrS5cZXtd5JAZTLIy3aXtzszxOVvdwbj8xaXDIMvdhqoe54ArWKCE7tpVP5UQ8UAZwCrVHW9iHQFBqnqR40RoL+sGsq3VxZv5f+9tYpzB3fj4YuHEG5DmzdvpUWHfukfSAjbDi7vr+Y3WHR7JyHEdXMe8UnOsN5x3SA+xbkxzrq2NhsNVg2lqkUisgE4XUROB74ItURh/HPZyO7k7S/jgQ9/IDYqgr9dMNCGNm+qSgtrLglUri/e+8vjYhIOJoCUEW5CqEwEyRDb1UoKplr+9Ia6DbgWeNNdNVNEpqvqYwGNzATEDe7Q5k9+toH46Ej+cEa/YIdkqiopcEsB3iUBr9JA/jYozvvlcTGJB3/9dz/GKxEkHSwlRFoXanN4/GmzmAyMVNVCABF5APgGsGTRRP3hjCPILy7jvwudhHH9iTaVZKMpzj/0S7/y+YGqoe1QUk0iaNPR+dJv3xN6HHewJFCZBGK7QWRU41+PaTH8SRbCwR5RuM+t7qIJExHuO38g+4o93P/BD8RFRXLZSLtPpV5UoSS/mpKAdzXRdmefqtp2dr7wE3o7PYu8SwPxSU7VUITdI2OCy59k8RywWETecpcvAGYELiTTGMLDhIcmDKaguIw/vr2K2KgIzh3cLdhhhSZVp9qn2pKA19/SgioHilci6AO9Tvpl1VBsV7sXwTQJ/jRwP+TeAzHaXXW1qn4b0Kga047v4PlznSJ8ZDRExkBElPO3xnUxzvpq10W7x0QfuhwRBWGh1V01MjyMJy4fxlUzlvB/r62gbVQEJx/RKdhhNS5VZzC76hqJvZNCWWGVA8W5IzkuCToe4YxvVLXnUGxXCLebIE3z4LPrbLUHiWxV1ZCqtzjsrrM/b4FFTzj9zcv2Q1kReIq9lveDZ/+hyxVlhxdkRFSVhOKVkH6xLubg/r9YF1MlIVVZF96qTl0b84vLuOzpRWTuKuDFX49kRM9mMrT5gURQyz0E+dudz9ybhDlf9JXtAYc0Eic5CaFtZ0sEplnwt+vs4SaLLFVNOazIAqRR77Mo93glkCLnDtcDSabIXV91nde2A8f6sU4r6h6fhP2yhFNdqcdrXZFGMnPZbnJLwrnyhH4kdUzwo2QVDeFBmmxR1Rnu+kBDcXU9h7Y77+Eh7034oYnAu5G48t6Ctp2Dd13GNLKGHO6jOnXPMM1JeASEx0Lr2MCeR9UZkK2mEs4h66omrRoSWWkBFOb8IpHFePY7IzIKUJfxfcMia09I1ZaEaktkXgmp8l6C/CoJoLKEUF5yaCwSfvCLv+tgOOLMgyWBypJB28422qkxh6HGZCEiv6lpE2ATJDQGEacXTERriG4X2HNVVICnmM0/5XDT81/RNszDY+P70SmqopokVVtVXWWC2u9UAVVd59nvJMC6CotwuofGJ0HSUDjyHK+7jCsTQSdLBMYESG0li9p+Nj/S0IGYIAsLg1YxpKZ058HJ8VwyfRGXzC1k9vWjSGzooc0PVOPVUlVXtt9JkvFJTlJo0zHkOggY05IcVptFKLKxoRpWxuY9XPHsYnoltuXVKccQH22NucY0R/62WdhPNVOt9NQOPDUxnfW79nHNC0vZX1ru+yBjTLNlycLU6MS+HXn44qNZtuVnrp+5jFLPYfTMMsY0Cz6ThYhYi2ELdvZRXfn7hYP4bN1u/m/2Csormke1pTGmbvzpOrteROYAz6nq2kAHZELPJSO6k19cxt/f/4HY1hH841eDbGhzY1oYf5LFYJwpUZ8RkTCccaFmqWo1I6KZ5mrKCc7Q5tMWOCPVTj2znyUMY1oQf6ZH3aeqT6vqscAfgLuAHSLygoj0CXiEJmTccdoRTDymB099vpEnFm4IdjjGmEbkV5uFiJznjjr7MPBvoBfwDvC+j2PPEJEfRSRTRKbWst84EVERSa+yvruIFIjIHX5djQkoEeGe8wZw/pBu/HPej7y0aEuwQzLGNBK/2iyABcA/VfVrr/VviMgJNR3kNoxPA04FsoGlIjK3aruHiMRL443lAAAfEElEQVQCtwGLq3mZh4AP/IjRNJKwMOFf4wdTUOzhL/9bTVxUBOcPSQp2WMaYAPOn6+xRqjq5SqIAQFVvreW4EUCmqm5U1VJgFnB+NfvdBzwAFHuvFJELgE3AGj9iNI0oMjyMaZcPZURqB347+zs+/eGnYIdkjAkwf5JFJxF5R0RyRGSXiPxPRHr5cVwSkOW1nO2uO0BEhgIpqvpelfVtcdpH7qntBCIyRUQyRCRj9+7dfoRkGkpUZDjPXJVO/25x3DBzOYs25gY7JGNMAPmTLF4BZgNdgG7A68Cr9T2x27PqIeC31Wy+G/iPqladeuwQqjpdVdNVNb1jx471DcnUUWxUJM9fPYKUDjFc80IGK7P3BjskY0yA+JMsYlT1JVX1uI+ZgD8zw28DvOe8SHbXVYoFBgILRWQzcAww123kHgk86K6/Hfh/InKzH+c0jaxDm1a8NHkE8dGRXDVjCZm79gU7JGNMAPiTLD4QkakikioiPUTk98D7ItJBRGqbUm0pkCYiPUWkFc69GnMrN6pqnqomqmqqqqYCi4DzVDVDVY/3Wv8w8HdVffxwL9IEVtf4aF6+ZiThYWFc8cwSsvYU+T7IGNOk+JMsJgDX4fSIWgjcgPPFvwyocZhXVfUANwPzgO+B2aq6RkTuFZHz6hm3CTGpiW14afIIiko9THx2Mbv2Ffs+yBjTZNgQ5aZBLdvyM1c8s5geCTG8NmUU8TE2tLkxoazBhigXkUgRuVVE3nAfN4uIfQOYag3r0Z7pVw5j4+5Crn5+CUWlnmCHZIxpAP5UQ/0XGAY84T6GueuMqdbxaR159NIhrMjay3UvLaPEY3NhGNPU+ZMshqvqVar6qfu4Ghge6MBM03bGwK7cP+4ovlifw+2zVuApt7kwjGnK/EkW5SLSu3LBvSHPfioanyakp/Cns4/kg9U7+X9vraK5tI8Z0xL5MzbU74AFIrIREKAHcHVAozLNxjXH9yJ/fxmPfppJXFQkfzz7SBva3JgmqNZk4d5lvR9IA45wV/+oqiWBDsw0H/93al/yiz088+Um4qMjuWVsWrBDMsbUUa3JQlUrRGSaqh4NrGykmEwzIyL85Zz+5O8v498fryMuOpKrjk0NdljGmDrwp81ivjvfhNUdmMMWFiY8cNFRnHJkZ+6au4a3vs0OdkjGmDrwJ1lchzN4YImI5IvIPhGxKVVNnUWGh/H4ZUczqlcCd7y+ko/X2tDmxjQV/kyrGquqYaraSlXj3OW4xgjOND9RkeE8fVU6A5PiuemV5Xy9ISfYIRlj/ODPHdzz/VlnjL/ato7g+UnDSU2I4doXMliRZUObGxPqakwWIhLljiqbKCLtK0eZFZFUqkxiZExdtW/Tipcmj6RD21ZMem4J636yoc2NCWW1lSyuwxlZtp/7t/LxP8CGCzf11jkuipmTRxIZHsbEZxfb0ObGhLAak4WqPqKqPYE7VLWXqvZ0H4NtbgnTUHoktGHm5JEUl1Vw+TOL2ZVvQ5sbE4r8aeB+TESOFZHLROTKykdjBGdahiO6xPL81cPJKShh4rNL2FtUGuyQjDFV+NPA/RLwL2A0zgCCwwGfY58bUxdHd2/P01emsymnkEnPLaWwxIY2NyaU+DM2VDrQX20UOBNgx/VJ5NFLj+amV5Yz5aUMnr1qOFGR4cEOyxiDfzflrQa6HM6Li8gZIvKjiGSKyNRa9hsnIioi6e7yCBFZ4T6+E5ELD+f8puk5Y2AXHhx3FF9l5nL5M4tZvDE32CEZY/CvZJEIrBWRJcCBAQRVtdZ5tEUkHJgGnApkA0tFZK6qrq2yXyxwG7DYa/VqIF1VPSLSFfhORN5x5/U2zdy4YckA/OODH7h4+iJG9uzAbaekMapXgo1Ya0yQ+JMs7j7M1x4BZKrqRgARmQWcD6ytst99wAM4Q6EDoKrefSijAKsCa2HGDUvmrEFdeXXJVp78bAOXPb2YEakduHVsGsf1saRhTGOr7aa8fgCq+hmwSFU/q3zgVcKoRRKQ5bWcTZWb+URkKJCiqu9Vc/6RIrIGWAVcX12pQkSmiEiGiGTs3r3bj5BMUxLdKpxfj+7J578/mXvOG8DWPUVc8exiLnryGz5ft9smUzKmEdXWZvGK1/Nvqmx7or4ndufKeAj4bXXbVXWxqg7A6X11p4hEVbPPdFVNV9X0jh071jckE6KiIsO56thUFv7uJO47fwDb9+7nyhlLuPCJr1nw4y5LGsY0gtqShdTwvLrl6mwDUryWk911lWKBgcBCEdkMHAPMrWzkrqSq3wMF7r6mBYuKDGfiKCdp/O3CgezeV8LVzy3lgmlfMf/7nyxpGBNAtSULreF5dcvVWQqkiUhPEWkFXALMPfACqnmqmqiqqaqaCiwCzlPVDPeYCAAR6YEz5MhmP85pWoDWEeFcPrIHC+44ift/NYjcwlImv5DBuY9/yUdrdlrSMCYAamvgThaRR3FKEZXPcZd9DiTo9mS6GZgHhAMzVHWNiNwLZKjq3FoOHw1MFZEyoAK4UVVtLGtziFYRYVwyojvjhiXz1rfbmLYgkykvLePIrnHcNrYPp/XvQliYNYQb0xCkpl9hInJVbQeq6gsBiegwpaena0ZGRrDDMEHkKa/gfyu28/iCTDblFNKvSyy3jEnjzIGWNIypiYgsU1Wfo3LUmCyaGksWppKnvIJ3Vm7nsU8z2bi7kL6d23LLmDTOGtSVcEsaxhzCkoVp8corlHfdpJG5q4A+ndpyy5g+nHNUN0saxrgsWRjjqqhQ3l+9g0fnr2fdTwX0SmzDzWP6cN7gbkSE+zPijTHNlyULY6qoqFDmrdnJI/PX88POfaQmxHDTyX248OgkSxqmxfI3WfgzRPmDIhInIpEiMl9EdovIFQ0TpjGNJyxMOHNQV96/9XiemjiMNq0j+N0bKxnz7894belWysorgh2iMSHLn59Tp6lqPnAOzr0OffAax8mYpiYsTDh9QBfevWU0z1yZTnx0JH+Ys4qT/7WQV5dspdRjScOYqvxJFpX3YpwNvK6qeQGMx5hGIyKc0r8zc28+jhmT0klo04o733SSxsxFWyjxlAc7RGNChj/J4l0R+QEYBswXkY6ATZRsmg0RYUy/zrx903E8f/VwOsW15k9vr+akfy7kxW82U1xmScMYvxq4RaQDkKeq5SISA8Sp6s6AR1cH1sBtGoqq8mVmDo98sp6MLT/TOa4115/Ym0tHdLeZ+0yz05AN3OOBMjdR/AmYCXRrgBiNCUkiwvFpHXn9+lG8cs1IeiS04Z531nL8gwt45ouN7C+1koZpefyphvqzqu4TkdHAKcCzwH8DG5YxwSciHNsnkdnXjeLVa4+hT8e2/PW97zn+wQU8/flGikpt4kbTcviTLCp/Rp0NTHcnKmoVuJCMCT2jeifw6pRjmH3dKPp1ieVv73/P8Q8s4MnPNlBYYknDNH8+2yxE5F2ceShOBYYC+4Elqjo48OH5z9osTGNatmUPj8zP5PN1u2kfE8k1x/fiqmNTadvan5mKjQkdDXYHt9ugfQawSlXXi0hXYJCqftQwoTYMSxYmGJZv/ZlH569n4Y+7aRcTyeTjenLVcanERUUGOzRj/NKgw32IyGDgeHfxC1X9rp7xNThLFiaYvsvay6Pz1zP/h13ERUXw69E9ufq4nsRHW9Iwoa0he0PdBrwMdHIfM0XklvqHaEzzMTilHc9OGs67t4xmZK8EHv5kPaPv/5SHPvqRvUWlwQ7PmHrzpxpqJTBKVQvd5TbAN6p6VCPE5zcrWZhQsmZ7Ho/Nz+TDNTtp2zqCScemMnl0T9q3sb4hJrQ0WMkCZxpV747l5e46f4I4Q0R+FJFMEZlay37jRERFJN1dPlVElonIKvfvGH/OZ0yoGNAtnicnDuPD24/nxL4dmbYwk9EPfMoDH/5AbkFJsMMzps786brxHLBYRN5yly/AudeiViISDkzD6UWVDSwVkbmqurbKfrHAbcBir9U5wLmqul1EBuLM4+1z3m9jQk2/LnFMu3wo637ax2OfZvLkZxt44evNTDymB9ee0IvEtq2DHaIxfvG3gXsoMNpd/EJVv/XjmFHA3ap6urt8J4Cq/qPKfg8DH+OMZHuHqmZU2S5ALtBVVWv8SWbVUKYpyNy1j8c/zWTud9tpFRHGFSN7MOXEXnSKjQp2aKaFapBqKBEJF5EfVHW5qj7qPnwmClcSkOW1nE2V0oGbhFLcG/1qMg5YXluiMKap6NMplocvOZqPf3MiZw3syoyvNnH8Awu45501/JRv43Oa0FVrslDVcuBHEene0CcWkTDgIeC3tewzAHgAuK6G7VNEJENEMnbv3t3QIRoTML07tuWhi4fw6W9P4tzB3Xjxmy0c/+AC7p67hp15ljRM6PGnN9TnwNHAEqCwcr2qnufjuFqroUQkHtgAFLiHdAH2AOepaoaIJAOfAler6le+LsSqoUxTtjW3iGkLMpmzPJswES4ensINJ/WmW7voYIdmmrmGvIP7xOrWq+pnPo6LANYBY3GGC1kKXKaqa2rYfyFum4WItAM+A+5R1Td9XQRYsjDNQ9aeIp5YuIE3ljk1uOPTU7jxpN4kt48JcmSmufI3WdTYG0pE+gCdqyYFd/TZHb5eWFU9InIzTk+mcGCGqq4RkXuBDFWdW8vhN+NM3/oXEfmLu+40Vd3l67zGNGUpHWL4x68GcfOYPvx3YSazl2Yze2kWFw1L5qaT+5DSwZKGCY4aSxbuAIJ3quqqKusHAX9X1XMbIT6/WcnCNEfb9+7nyc82MGtJFuWq/OroJG4e04ceCW2CHZppJupdDSUiS1V1eA3bVqnqoHrG2KAsWZjmbGdeMU9+toFXl2zFU6FcMMRJGj0TLWmY+mmIZLFeVdNq2Japqn3qGWODsmRhWoJd+cU89flGXl68hVJPBecPSeKmk/vQp1PbYIdmmqiGuM8iQ0SureaFrwGW1Sc4Y8zh6RQXxZ/P6c8Xvx/DNcf34sPVOzn1P59x66vfsv6nfcEOzzRjtZUsOgNvAaUcTA7pOLPkXaiqOxslQj9ZycK0RDkFJTzzxSZe/GYz+8vKOWtQV24dk8YRXWKDHZppIhqy6+zJwEB3cY2qftoA8TU4SxamJdtTWMqzX27kha+3UFDi4cyBXbh1bBpHdo0LdmgmxDXo5EdNgSULY2BvUSnPfrmJ57/azL4SD6f178ytY9MYmBQf7NBMiLJkYUwLlldUxoyvNjHjq03sK/ZwypGduW1sGoOSLWmYQ1myMMaQX1zG819t5tkvN5G3v4wx/Tpx69g0hqS0C3ZoJkRYsjDGHLCvuIwXv9nC019sZG9RGSf27chtp6QxtHv7YIdmgsyShTHmFwpKPLzkJo09haUcn5bIbWPTSE/tEOzQTJBYsjDG1KiwxMPLi7cw/fON5BSUcmzvBG4bm8bIXgnBDs00MksWxhif9peW8/LiLTz1+UZ27ythZM8O3HZKGqN6JeBMUmmaO0sWxhi/FZeV88rirTz52QZ27SthRGoHbh2bxnF9LGk0d5YsjDF1VlxWzmtLs/jvwg3szC9mWI/23DY2jePTEi1pNFOWLIwxh63EU87sjGz+uyCT7XnFDElpx22npHFS346WNJoZSxbGmHor8ZQzZ9k2pi3IZNve/RyVHM8Vx/Tg7EFdadO6xrnTTBNiycIY02BKPRW89W02T32+kY27C4lpFc45R3VlQnoKw3q0t9JGE2bJwhjT4FSV5Vt/ZvbSbN5duZ3C0nJ6JbZhfHoK44Ym0SkuKtghmjoKiWQhImcAj+DMwf2Mqt5fw37jgDeA4aqaISIJlcvA86p6s69zWbIwpnEVlnh4b9UOXs/IYunmnwkPE04+oiPj01MY068TkeG1TZdjQoW/ySJglY4iEg5MA04FsoGlIjJXVddW2S8WuA1Y7LW6GPgzztDoAzHGhJw2rSOYkJ7ChPQUNu4u4PVl2cxZls0n3+8isW0rLjw6iQnpKaR1trk1moNApv4RQKaqblTVUmAWcH41+90HPICTIABQ1UJV/dJ7nTEmdPXq2JY/nNGPr6eOYcakdNJ7dOC5rzZz6n8+58InvuLVJVvZV1wW7DBNPQSyO0MSkOW1nA2M9N5BRIYCKar6noj8rq4nEJEpwBSA7t271yNUY0xDiAgPY0y/zozp15mcghLe/nYbry3N4s43V3HPO2s4a5DTKD6yZwdrFG9igtb3TUTCgIeASYf7Gqo6HZgOTptFw0RmjGkIiW1bc83xvZg8uiffZecxOyOLd1Zs583l2+iREMP4YcmMG5ZM1/joYIdq/BDIZLENSPFaTnbXVYrFaY9Y6P7C6ALMFZHzVNVaqo1pJkSEISntGJLSjj+f3Z8P1+xg9tJs/vXROh76eB0n9O3IhPQUxh7ZidYR4cEO19QgkMliKZAmIj1xksQlwGWVG1U1D0isXBaRhcAdliiMab6iW4Vz4dHJXHh0MltyC3ljWTZvLMvmxpeX0z4mkgvcRnGbOzz0BLrr7FnAwzhdZ2eo6t9E5F4gQ1XnVtl3IV7JQkQ2A3FAK2AvcFrVnlTerOusMU1TeYXyZWYOszOy+HjNT5SWVzAoKZ4J6cmcNySJ+OjIYIfYrIXEfRaNyZKFMU3fz4WlvL3CaRT/Yec+WkeEccbALkxIT2FUrwTCwqxRvKFZsjDGNFmqyprt+czOyOLtb7eRX+whqV0049OTuWhYMsntY4IdYrNhycIY0ywUl5Xz0dqfeD0jiy8zcwAY3SeR8ekpnNa/M1GR1iheH5YsjDHNTvbPRcxZto3Xl2WR/fN+4qIiDjSKD+gWZ/duHAZLFsaYZquiQvlmYy6zM7L4YPVOSj0VHNk1jgnpyVwwJIn2bVoFO8Qmw5KFMaZFyCsqY+7K7byekcXK7DxahYdxav/OTBiewug+iYRbo3itLFkYY1qctdvzeX2Z0yj+c1EZXeOjuGhYMuOHpdA9wRrFq2PJwhjTYpV4ypn//S5mZ2Tx+brdVCgc06sDE9JTOHNgV6JbWaN4JUsWxhgD7Mjbz5vLtzE7I4stuUXEto7g3CHdmJCewuDk+BbfKG7JwhhjvFRUKEs272F2Rhbvr9pBcVkFfTu3ZUJ6ChccnURi29bBDjEoLFkYY0wN9hWX8e7KHczOyOLbrXuJCBPGHtmJCekpnNi3IxEtaJY/SxbGGOOH9T/t4/Vl2by5PJucglI6xbZm3LBkxg9LplfHtsEOL+AsWRhjTB2UlVfw6Q+7eD0jiwU/7qa8Qhme2p7x6SmcPagrbVoHbfqfgLJkYYwxh2lXfjFvfruN2Uuz2JhTSEyrcM45qisXD09haPf2zapR3JKFMcbUk6qybMvPzM7I4t2VOygqLadXxzZMSE/hV0cn0SkuKtgh1pslC2OMaUCFJR7eW7WD1zOyWLr5Z8LDhJOP6Mj49BTG9OtEZBNtFLdkYYwxAbJxdwGvL8tmzrJsdu0rIbFtKy50BzRM6xwb7PDqxJKFMcYEmKe8gs/W7WZ2Rhbzv9+Fp0I5uns7JqSncM5RXYmNCv1Z/kIiWYjIGcAjONOqPqOq99ew3zjgDWC417SqdwKTgXLgVlWdV9u5LFkYY4Ipp6CEt791Zvlbv6uAqMgwzhrUlQnpKYzs2SFkG8WDnixEJBxYB5wKZANLgUurzqMtIrHAezhzbd+sqhki0h94FRgBdAM+AfqqanlN57NkYYwJBarKd9l5zM7I4p0V29lX4qFHQozTKD40ia7x0cEO8RD+JotAtsiMADJVdaOqlgKzgPOr2e8+4AGg2Gvd+cAsVS1R1U1Apvt6xhgT0kSEISnt+PuFg1jyx1N4aMJgusZH8c95P3Lc/Z8y6bklvL9qByWeGn/7hqRA3mWSBGR5LWcDI713EJGhQIqqviciv6ty7KIqxyZVPYGITAGmAHTv3r2BwjbGmIYR3SqcXw1N5ldDk9mSW8gby7J5Y1k2N768nPYxkVx4dDIThifTr0tcsEP1KWi3JIpIGPAQMOlwX0NVpwPTwamGapjIjDGm4fVIaMNvTzuC20/pyxfrd/N6RjYvLdrMjK82cVRyPOPTUzhvcDfio0OzUTyQyWIbkOK1nOyuqxQLDAQWug0/XYC5InKeH8caY0yTFB4mnHREJ046ohN7Ckv53wqnUfzPb6/mr++u5YyBXZiQnsKoXgmEhdAsf4Fs4I7AaeAei/NFvxS4TFXX1LD/QuAOt4F7APAKBxu45wNp1sBtjGmOVJU12/N5bWkW/1uxjfxiD0ntohmfnsxFw5JJbh+4Wf78beAOWMlCVT0icjMwD6fr7AxVXSMi9wIZqjq3lmPXiMhsYC3gAW6qLVEYY0xTJiIMTIpnYFI8fzz7SOat2cnrGdk8/Ml6Hpm/ntF9EhmfnsJp/TsTFRmcWf7spjxjjAlRWXuKmLM8m9czstm2dz9xURFc4N4pPjApvkHOEfT7LBqbJQtjTHNVUaF8szGX2RlZfLB6J6WeCvp3jWNCejLnD0mifZtWh/3aliyMMaYZyisqY+5325idkc2qbXm0Cg/jqmN78Mez+x/W6wW9zcIYY0zDi4+JZOKoVCaOSmXt9nxeX5ZFt3aBvyvckoUxxjRR/bvFcVe3AY1yrqY5ALsxxphGZcnCGGOMT5YsjDHG+GTJwhhjjE+WLIwxxvhkycIYY4xPliyMMcb4ZMnCGGOMT81muA8R2Q1sqcdLJAI5DRROMDWX6wC7llDUXK4D7Foq9VDVjr52ajbJor5EJMOf8VFCXXO5DrBrCUXN5TrArqWurBrKGGOMT5YsjDHG+GTJ4qDpwQ6ggTSX6wC7llDUXK4D7FrqxNosjDHG+GQlC2OMMT5ZsjDGGONTi0oWIjJDRHaJyOoatouIPCoimSKyUkSGNnaM/vDjOk4SkTwRWeE+/tLYMfpLRFJEZIGIrBWRNSJyWzX7hPzn4ud1NInPRUSiRGSJiHznXss91ezTWkRecz+TxSKS2viR+ubntUwSkd1en8s1wYjVHyISLiLfisi71WwL7Geiqi3mAZwADAVW17D9LOADQIBjgMXBjvkwr+Mk4N1gx+nntXQFhrrPY4F1QP+m9rn4eR1N4nNx3+e27vNIYDFwTJV9bgSedJ9fArwW7LjrcS2TgMeDHauf1/Mb4JXq/h0F+jNpUSULVf0c2FPLLucDL6pjEdBORLo2TnT+8+M6mgxV3aGqy93n+4DvgaQqu4X85+LndTQJ7vtc4C5Guo+qPWHOB15wn78BjBURaaQQ/ebntTQJIpIMnA08U8MuAf1MWlSy8EMSkOW1nE0T/Q8PjHKL3h+ISONM0ltPbrH5aJxff96a1OdSy3VAE/lc3OqOFcAu4GNVrfEzUVUPkAckNG6U/vHjWgDGuVWcb4hISiOH6K+Hgd8DFTVsD+hnYsmieVqOM97LYOAx4O0gx+OTiLQF5gC3q2p+sOM5XD6uo8l8LqparqpDgGRghIgMDHZMh8uPa3kHSFXVo4CPOfjrPGSIyDnALlVdFqwYLFkcahvg/asi2V3XpKhqfmXRW1XfByJFJDHIYdVIRCJxvmBfVtU3q9mlSXwuvq6jqX0uAKq6F1gAnFFl04HPREQigHggt3Gjq5uarkVVc1W1xF18BhjW2LH54TjgPBHZDMwCxojIzCr7BPQzsWRxqLnAlW7vm2OAPFXdEeyg6kpEulTWVYrICJzPOST/I7txPgt8r6oP1bBbyH8u/lxHU/lcRKSjiLRzn0cDpwI/VNltLnCV+/wi4FN1W1ZDiT/XUqX96zyc9qaQoqp3qmqyqqbiNF5/qqpXVNktoJ9JREO9UFMgIq/i9EhJFJFs4C6cBi9U9UngfZyeN5lAEXB1cCKtnR/XcRFwg4h4gP3AJaH4H9l1HDARWOXWKwP8P6A7NKnPxZ/raCqfS1fgBREJx0los1X1XRG5F8hQ1bk4ifElEcnE6WxxSfDCrZU/13KriJwHeHCuZVLQoq2jxvxMbLgPY4wxPlk1lDHGGJ8sWRhjjPHJkoUxxhifLFkYY4zxyZKFMcYYnyxZGOODiJR7jUi6QkSmNuBrp0oNowcbE0pa1H0Wxhym/e5wEca0WFayMOYwichmEXlQRFa5cyb0cdenisin7sB080Wku7u+s4i85Q4k+J2IHOu+VLiIPO3Ot/CRe6cxInKrOPNjrBSRWUG6TGMASxbG+CO6SjXUxV7b8lR1EPA4zqig4AwS+II7MN3LwKPu+keBz9yBBIcCa9z1acA0VR0A7AXGueunAke7r3N9oC7OGH/YHdzG+CAiBaratpr1m4ExqrrRHURwp6omiEgO0FVVy9z1O1Q1UUR2A8leg9ZVDmf+saqmuct/ACJV9a8i8iFQgDM67dte8zIY0+isZGFM/WgNz+uixOt5OQfbEs8GpuGUQpa6I4kaExSWLIypn4u9/n7jPv+ag4O4XQ584T6fD9wABybkia/pRUUkDEhR1QXAH3CGm/5F6caYxmK/VIzxLdprJFmAD1W1svtsexFZiVM6uNRddwvwnIj8DtjNwVFybwOmi8hknBLEDUBNQ62HAzPdhCLAo+58DMYEhbVZGHOY3DaLdFXNCXYsxgSaVUMZY4zxyUoWxhhjfLKShTHGGJ8sWRhjjPHJkoUxxhifLFkYY4zxyZKFMcYYn/4/F2pUw2FGFF4AAAAASUVORK5CYII=\n", 547 | "text/plain": [ 548 | "
" 549 | ] 550 | }, 551 | "metadata": { 552 | "needs_background": "light" 553 | }, 554 | "output_type": "display_data" 555 | } 556 | ], 557 | "source": [ 558 | "sns.lineplot(x=range(1,EPOCHS+1),y=r.history['loss'])\n", 559 | "sns.lineplot(x=range(1,EPOCHS+1),y=r.history['val_loss'])\n", 560 | "plt.title('Model Cross Entropy Loss')\n", 561 | "plt.ylabel('Cross Entropy Loss')\n", 562 | "plt.xlabel('Epochs')\n", 563 | "plt.legend(['Training', 'Testing'], loc='upper left')" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": 27, 569 | "metadata": {}, 570 | "outputs": [], 571 | "source": [ 572 | "y_pred = model.predict(X_test).argmax(axis=1)" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 28, 578 | "metadata": {}, 579 | "outputs": [ 580 | { 581 | "data": { 582 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAD8CAYAAADUv3dIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHoVJREFUeJzt3Xl4FeX5xvHvc5KwLwmQBEhAVo07CuLOKoJWEQq2WKu1Uvm5VlyoUNS6C4qgVbTivlWKUhEFQVxARcEgWrYIxA0SIInsiJCck/f3R9KYsCWUZM5kcn+85royM++ZeeYw3nnzzsw55pxDRES8EYp2ASIiNYlCV0TEQwpdEREPKXRFRDyk0BUR8ZBCV0TEQwpdEREPKXRFRDyk0BUR8VBsVe+g7tkP6pG3KrZu2o3RLkGkUiTUi7FD3UbdE66tcOb8/OVjh7y/g6WeroiIh6q8pysi4inzd19SoSsiwRKKiXYFB6TQFZFgMc+HaQ+KQldEgkXDCyIiHlJPV0TEQ+rpioh4SD1dEREP6e4FEREPaXhBRMRDGl4QEfGQeroiIh5S6IqIeChGF9JERLyjMV0REQ9peEFExEPq6YqIeEg9XRERD6mnKyLiIT0GLCLiIQ0viIh4SMMLIiIeUk9XRMRDCl0REQ/pQpqIiIc0pisi4iENL4iIeEg9XRER75hCV0TEO34PXX8PfoiIHCQLWYWncrdl1s/MVppZppmN3Mf61mb2oZl9aWZLzOzc8rZZo3q6fbq0YdxVvYkJGc/PWsK4f31eZn2rxIY8NeJcGjeoTUwoxG3PzGN2+ncM6XUkwy/sWtLu2LaJnHr1iyz5NtfrQ/Ctz+Z/zIQH76ewMEL/AYO59PIryqzPz8/nzttGsjJjOY0ax3PP2PG0bJkCwOpVKxl7zx389NMOQqEQz748hdq1azP8mmH8mJdHJBKm0wmduXnUbcT4/FsBqlJVvMcFBfmMG3Mvixd9TigU4v+uuZ5eZ50dhaOrPJXV0zWzGGAi0AfIAtLNbLpzbkWpZrcCU5xzT5jZUcBMoM2BtltjQjcUMh6+tg+/GjmF7B+388mjl/D2Z9/w9ZqNJW1uufhUpn60kqfe/oq01k2Zds8g0i6dxOQPMpj8QQYAR7dpxpQ7BipwS4lEIowbcw9/f+JpkpKT+ePFv+XM7j1p275DSZvp06bSqGEjXp8+mzmzZjLxkYe4d+x4wuEwd9x6C3fcPYaOR6SxdcsWYmOLTst7x46nfoMGOOcYdfNwPpgzmz79yu1IBFJVvcfPP/0kCU2a8Nqb71BYWMi2rVujdYiVphKHF7oCmc65b4u3Oxm4ACgdug5oVPxzY2BdeRstd3jBzNLM7BYz+3vxdIuZHXnQ5UfZSUe04Jt1m/l+w1YKwoW8Nu9rzjutQ5k2zkGjerUAaFy/Nus37thrO7/peSSvzc3wpObqYsWypaS2ak1Kaivi4mrRp+85fDT3gzJtPp77AeeePwCAnmedzaLPF+Cc4/PP5tOh4+F0PCINgMbx8SW92foNGgAQCYcpCBf4/qp0Vaqq9/itN9/gD8U95lAoRHxCgodHVTXMrMJTOVKAtaXms4qXlXYH8Hszy6Kol3tdeRs9YOia2S3AZMCAz4snA17d1/iGn7Vs1oCsvO0l89l520lp2qBMm3tfms+Q3keR+cqVvHHPIG58/P29tjO4expT5n5d5fVWJ3m5OSQlNy+ZT0puTl5e7l5tkpsXtYmNjaVBg4Zs3bKFNWt+wMy4/uoruPSiQbz0/DNlXnf91VdwTu8zqV+vfrX/s/dQVMV7vH37NgCenPgol140iL+OGM7GjT96dERVyCo+mdkwM1tUahp2kHu7CHjeOZcKnAu8ZHbgG4XL6+kOBU5yzo1xzr1cPI2hqNs99CCL873f9DySl99dRoeL/8HAW6fyzF/OLdO5OimtBTt3F7Di+wCcmD4RiYT5z5eLufPeB5j07MvM++A90hd+VrL+kcef4u0588jPz2dR+sIoVlp97e89joQj5OZs4LjjO/Hiq1M55rhOPDrhwWiXe8gOpqfrnJvknOtSappUalPZQKtS86nFy0obCkwBcM59BtQBmh2ovvJCtxBouY/lLYrX7VPp3x7hrAXl7MIb637cQWpiw5L5lMSGZO8xfPCHvscy9aOVACzMWEedWrE0a1yvZP2FPdKY8qGGFvaUmJRMbs6GkvncnA0kJibt1SZnQ1GbcDjMjh3baRwfT1JSc044sQvxCQnUqVuX087oxsqvV5R5be3atenWoxcf7/HndE1SFe9x4/h46tSpS4/efQDo3acvKzPKvvfVUSgUqvBUjnSgo5m1NbNawBBg+h5t1gC9AYqHXesAeQesr5ydDgfeN7N3zGxS8TQLeB+4fn8vKv3bIzb1lHJ24Y1FK9fTISWBw5o3Ji42xIXd05jxWWaZNmvzttGjU2sAjmjVhDq1YsnbshMoGk4c1O0IXtPQwl6OPPoY1q75gXXZWRQU5DNn9juc2aNnmTZndu/JzLemAfDhe+/S5aSTMTNOPu10MjNXsevnnwmHwyz+Ip227Tqwc+dP/JhXdO6Gw2HmfzKPw9q09fzY/KIq3mMz44xuPVi8qOgunvTPF9C2XXvPj62yVdaYrnMuDFwLzAYyKLpLYbmZ3WVm/Yub3QRcYWb/AV4FLnPOuQPWV856iscnuvLLAHI2kO6cixz40IvUPfvBA+/AQ31PasuDV/UiJhTihdlLeeDVBdx26eksXrWBGQu+Ia11Ux6/oS/168ThgNFPz+P9L74H4MzjWnHP0G50v/6VqB7DvqybdmO0S+DTj+cxYdwYCgsLOe+CgfzxT1cy6fFHSTvqaLr16MXu3bu589ZbWLUyg0aN4rl7zDhSUov+cntnxnRefPYpzIxTz+jGdcNvZuPGH7n5z1eTX5CPKyzkxC5dGX7zyJKr7jVRZb/HAOvXZXPnrSPZvmM7CQkJ3HrHvTRvsa8/br2RUC/mkK+WNv3DqxXOnI0vXOT51dlyQ/dQ+Sl0g8oPoStSGSojdJtdNrnCmfPj80M8D92a220QkUDy+2PACl0RCZSKPN4bTQpdEQkU9XRFRDyk0BUR8ZBCV0TEQwpdEREv+TtzFboiEiwVeLw3qhS6IhIoGl4QEfGSvzNXoSsiwaKeroiIhxS6IiIeUuiKiHhIn70gIuIh9XRFRDyk0BUR8ZDPM1ehKyLBop6uiIiHQrqQJiLiHZ93dBW6IhIs6umKiHhIPV0REQ/pQpqIiId8nrkKXREJFn2IuYiIh9TTFRHxkMZ0RUQ85PPMVeiKSLCopysi4iGfZ65CV0SCpcY/kbbx7Zurehc1XtOTr4t2CYG3Of2xaJcgFaThBRERD/k8cxW6IhIs6umKiHjI55mr0BWRYKnxF9JERLzk9+EFf38yhIjIQTKzCk8V2FY/M1tpZplmNnI/bX5jZivMbLmZ/bO8baqnKyKBUlkdXTOLASYCfYAsIN3MpjvnVpRq0xEYBZzunNtsZknlbVc9XREJlErs6XYFMp1z3zrn8oHJwAV7tLkCmOic2wzgnMstb6MKXREJFLOKT+VIAdaWms8qXlba4cDhZjbfzBaYWb/yNqrhBREJlIO5e8HMhgHDSi2a5JybdBC7iwU6Aj2AVOAjMzvWObflQC8QEQmM0EEM6hYH7P5CNhtoVWo+tXhZaVnAQudcAfCdma2iKITT91tfhasTEakGKnF4IR3oaGZtzawWMASYvkebaRT1cjGzZhQNN3x7oI2qpysigVJZ9+k658Jmdi0wG4gBnnXOLTezu4BFzrnpxevONrMVQAQY4ZzbeKDtKnRFJFAq84E059xMYOYey24v9bMDbiyeKkShKyKBoseARUQ8ZCh0RUQ84/OOrkJXRILF7x94o9AVkUDxeeYqdEUkWA7m4YhoUOiKSKDo7gUREQ/5vKOr0BWRYNHwgoiIh/wduQpdEQkY3TImIuIhn19HU+iKSLDo7gUREQ9peEFExEM+7+gqdEUkWNTTFRHxkL8jtwaE7vxPPubBsfdSGClkwK8Hc/mfhpVZn5+fz21/vYWMFctpHB/P2AfH0zIllWVLl3D3nUUfEO+c48qrr6VX7z7s3r2boZf9nvz8fCKRCGf1OZurrvlzNA7Nt/qcdiTjRgwmJhTi+WmfMu65OWXWt26RwD/+9nuaJTRg87adXD76BbJzi748dceiv7Mscx0Aazds5sLhT3pev1/N//gjxo4pOpcHDrqQoVfsfS6PHvUXMpYXncsPPDSBlJRUPvt0Po9MeIiCggLi4uK44aYRnHzKqQA8+sgE3po+jW1bt7Fg0ZfROKxKF+Pz8YVAh24kEmHMvXfxxKRnSW6ezMVDLqR7z160b9+hpM20f79Ow0aNmD7zXWa9M4NHJjzE2HETaN+hI69Mfp3Y2Fjy8nL57eABdOvek1q1ajHpmeepV68+BQUFXP6Hizn9jG4cd3ynKB6pf4RCxsMjf8OvrnqM7JwtfPLKCN6et5Svv91Q0ub+GwbyyozPeeWthXQ/6XDuuq4/Q297EYCfdxdwypAx0SrftyKRCPfdexdPPvUcycnJ/O63g+nRsxftO/xyLr8x9TUaNWrE27Pm8M7MGTw8fhwPPvQw8QkJ/H3iEyQlJbN69SquGjaU9z78GIDuPXoy5HcXc/45faN1aJXO78MLgf424GVLl9CqdWtSW7UiLq4Wfc85l7kfvl+mzdwP3+f8/gMAOKtPXz5f+BnOOerWrUtsbNHvpPzd+SWfRm9m1KtXH4BwOEw4HPb9P7KXTjqmDd+s/ZHvszdSEI7w2uzFnNfjuDJt0tq1YN7nKwGYl76K83ocG41Sq5VlS5fQqtVhRedyrVr0O/dXe53LH37wAf0vGAhAn7P78vmConP5yCOPIikpGYAOHTqye9du8vPzATju+E4kJiZ5ezBVrBK/DbhKBDp0c3NzSG7eomQ+Obk5eTk5e7TJpXlxm9jYWBo0aMiWLUV/6i5d8h8GDTiPC3/dn9G331ESwpFIhN8OHkDv7qdzyimncexxx3t0RP7XMqkxWTmbS+azczaTkti4TJulq7K5oFfRXwYX9DqeRg3q0qRx0S+yOrVi+eSVvzDvhZs4f4+wrslyc3Jo3qJ5yXxScjI5e53LOWXP5YYN2bJlc5k27707myOPOopatWpVfdFREjKr8BSV+v7XF5rZHyuzED869rjjmTrtbV6e/BrPPj2J3bt3AxATE8O/Xp/G7PfmsmzZEjJXr4pypdXLqAlvcGbnDnz26i2c2bkD2TmbiUQKATji3Ns54+IH+MNfn+fBEYNom9osytUGR2bmah6eMI7b/nZXtEupUkHu6d65vxVmNszMFpnZomefnnQIuzg0SUnJ5GxYXzKfk7OBxOTkPdoksaG4TTgcZseO7cTHx5dp065de+rVq0dmZtlwbdioEV1OOplP539cRUdQ/azL3UpqckLJfEpyAtl5W8u0WZ+3lSE3P82pF43lb4+9BcDWHT8Xvb647ffZG/lo0Wo6paV6VLm/JSUns2H9L+PiuTk5JO91LieXPZe3byc+vujfImfDBm7487Xcc99YWrVu7V3hUWBmFZ6i4YCha2ZL9jMtBZL39zrn3CTnXBfnXJc97xbw0tHHHMuaH34gOyuLgoJ8Zr8zkx49epVp071HL96aPg2A9+bM5qSup2BmZGdlEQ6HAVi3LpvvvvuWli1T2bRpE9u3bQNg165dLFzwKW3atvP2wHxs0fIf6NA6kcNaNiUuNoYL+57IjLlLyrRpGl+/5IQfcXlfXnhzAQDxDetSKy62pM2pndqRUeoCXE129DHHsmbN92RlraUgP59ZM2fQvWfZc7lHz15Mf/MNAOa8O5uuJxedy9u2bePaq4Zx/Q03ccKJnaNRvqdizCo8RUN5dy8kA32BzXssN+DTKqmoEsXGxnLLX2/j6iuHUhgp5IKBg2jfoSOPP/Z3jjr6GHr07MWAXw/m1lF/of+5Z9OocWPGPDAegC+//ILnnnmK2NhYQqEQfx39NxISEli1ciW33zqSwkiEQufoc3Y/unXvGeUj9Y9IpJAbxk7hrcevISZkvPDmAjK+3cBtV/2KxSvWMGPeUrp16chd1/XHOfhkcSbD758CQFq75jw6+iIKXSEhCzHuuTll7nqoyWJjYxk1+nauGvYnCgsjDBg4iA4dOjLx0Uc4+uhj6NGrNwMHDWb0yBGc168PjRo35oFxEwCY/M+XWbN2DZOemMikJyYC8MRTz9K0aVMmjHuAmTPfZteun+nTqxu/HnQhV11zXTQP9ZD5/I4xzDm3/5VmzwDPOec+2ce6fzrnflfeDnbmH2AHUimanly9/yepDjanPxbtEmqEOrGH/mzDjdO/rnDmjO+f5nlEH7Cn65wbeoB15QauiIjX/H4LZ6AfjhCRmsfvwwsKXREJFJ93dBW6IhIssT5PXYWuiASKzzNXoSsiwaKvYBcR8ZDPM1ehKyLBorsXREQ8pA8xFxHxkM8zV6ErIsFiPv+WNIWuiASK33u6gf7mCBGpeUJW8ak8ZtbPzFaaWaaZjTxAu0Fm5sysS3nbVE9XRAKlsj7wxsxigIlAHyALSDez6c65FXu0awhcDyysyHbV0xWRQIkJVXwqR1cg0zn3rXMuH5gMXLCPdncDY4FdFalPoSsigVKJX0yZAqwtNZ9VvKyEmZ0ItHLOzahofRpeEJFAOZgLaWY2DCj9nWKTnHMV+mJHMwsB44HLDqI8ha6IBMvBDOkWB+z+QjYbaFVqPrV42X81BI4B5haPIzcHpptZf+fcov3tU6ErIoESqrz7dNOBjmbWlqKwHQKUfGOOc24r0Oy/82Y2F7j5QIFbVJ+ISICYVXw6EOdcGLgWmA1kAFOcc8vN7C4z6/+/1qeerogESmwlPh3hnJsJzNxj2e37adujIttU6IpIoOijHUVEPKQPMRcR8ZDPM1ehKyLB4ve7AxS6IhIoGl4QEfGQQldExEP+jlyFrogEjM87ugpdEQmWyvo83aqi0BWRQNHdCyIiHqrxF9JmZmyo6l3UeCvmjIt2CYE39NWvol1CjfDKJZ0OeRsaXhAR8ZCGF0REPKSeroiIh/wduQpdEQmYGPV0RUS84/PMVeiKSLCYzwcYFLoiEijq6YqIeKgSvw24Sih0RSRQ1NMVEfFQjX8MWETES5X4DexVQqErIoGiuxdERDzk89EFha6IBIt6uiIiHtKYroiIh3T3goiIh/wduQpdEQkY9XRFRDzk78hV6IpI0Pg8dRW6IhIoGl4QEfGQvyNXoSsiQePz1FXoikig6Ik0EREP+XxIV6ErIsHi88wlFO0CREQqk5lVeKrAtvqZ2UozyzSzkftYf6OZrTCzJWb2vpkdVt42FboiEihmFZ8OvB2LASYC5wBHAReZ2VF7NPsS6OKcOw54HXigvPoUuiISKHYQUzm6ApnOuW+dc/nAZOCC0g2ccx8653YWzy4AUsvbqEJXRIKl8lI3BVhbaj6reNn+DAXeKW+jupAmIoFyMLeMmdkwYFipRZOcc5MOep9mvwe6AN3La1ujQnfVVwuZ8dxjFBZG6NL7V3QfcHGZ9QvffZOFs6dhoRC169RlwP/dTFJqG9ZmZjDtyXEl7XpdeBlHdz3T6/KrjUUL5vPEw2MpLCyk3/kD+e0lQ8usX/rVF/zjkQf47pvVjLpzLGf27FNm/U8/7eD/Lh7IqWf25Jqb/upl6dXGcS0bckmXFEJmzM3cyFvLc8us79auCRd1bsnmnQUAvLsyj7mZm2hWP47h3dsSMiMmBO9+/SPvr94YjUOoMgdzy1hxwO4vZLOBVqXmU4uX7bE/OwsYDXR3zu0ub581JnQLCyO89cwj/PHWcTRqmsgTo67kyC6nk5TapqTN8WecxclnFw3ZZCyaz8wXJnLZ6AdJbtWWq8c8SUxMLNs2b+SxEUNJ63wqMTE15u2rsEgkwsSH7uO+h5+kWVIyf/7T7zjljB4c1rZ9SZvE5ObcNPpupr76wj638eJTEzmmU2evSq52zOCyrqnc/943bNpZwN3nHM7irK1kby37//uC7zfzQnrZjNj8c5g7Zq0mXOioHRti7PlpfJG1lS0/h708hCpViffppgMdzawtRWE7BPhd2X3ZCcCTQD/nXO7em9hbuWO6ZpZmZr3NrMEey/tVtHI/yMr8mibNU2iS3JLY2DiOO60XGenzy7SpU69+yc/5u3aV/OvVql2nJGDDBfn+v/s6ilZmLKNFaitapKQSFxdH9979+OzjuWXaNG+RQrsOh2O29+m3+usVbNm0kRNPOtWjiquf9k3rkbN9N3k78okUOhb8sJnOrRpX6LWRQke40AEQF7JAnsp2EP8diHMuDFwLzAYygCnOueVmdpeZ9S9u9iDQAHjNzL4ys+nl1XfArpqZ/Rm4pniHz5jZ9c65N4tX3wfMKm8HfrFtUx6NmyaWzDdqmsja1Sv2ardg1hvMn/EakXABl98+oWT52tUr+PcTD7AlbwODrxutXu5+bMzLJTGpecl8s6QkVi5fWqHXFhYWMumxh/jL7ffxZfqCqiqx2mtSL46NPxWUzG/6qYD2zert1e6k1vGkJTdgw7bdvLQom03FQw1N6sUxolc7khvW5tUv1gWqlwuV2ydyzs0EZu6x7PZSP591sNssLzmuADo753aYWRvgdTNr45x7BP8/+PE/OaXfQE7pN5D/fPIec6e+xOBrRwHQquNRXD/+eXKzfuD1ifdzeKeuxNWqHeVqg+Xtf/+LrqeeQWJScrRLqfYWZ23l0+83Ey509OrYlCtPb819c74BYNPOAka9vZL4urHc2KMtC9dsYduu4ASv34OpvOGFkHNuB4Bz7nugB3COmY3nAMdmZsPMbJGZLZrz+suVVeshadQkka0b80rmt23Mo3GTxP22P/a0XqxI/2Sv5Umph1G7Tl1y1n5XJXVWd00Tk8jL3VAy/2NuLk0TKxaiGcuWMH3qZC4ddA5PTxzP+7Pe5tknHq6qUqutTTsLaFo/rmS+Sf04Nv9cUKbNjvxIyTDCh5kbadtk757wlp/DrN2yi7Sk+nutq9Yq8UbdqlBe6OaYWaf/zhQH8HlAM+DY/b3IOTfJOdfFOdelz+DfV06lhyil/RFsXJ/Fptz1hMMFLPn0A9K6nFamzY/rs0p+Xrl4AU1bFN2Styl3PZFIUU9gc94G8tatISGxObK3I9KOZl3WGjasy6KgoIB578/ilDPKvYsGgFvuuJ+X/j2bF6e+w5+uuZHe/c7j8quGV3HF1c+3G3fSvGFtEhvUIiZknHJYAl+s3VamTXzdX/6I7ZzamHVbdwFFQwtxMUVpU69WDEck1Wf9tnIvuFcrIbMKT9FQ3vDCpUCZvzuKB5cvNbMnq6yqKhATE8v5l1/P8/eOwBUWcmLPc0hu1Zb3/vUsKe2P4Mgup7Ng1ht8s/QLQjEx1G3QkMHXFA0t/PD1Uj6a9k9CMTFYKET/ocOp3yg+ykfkTzGxsVx9wyhG33gVhZFCzj5vAG3adeDFpybSMe1oTj2zByszlnH3qBvYvn0bC+fP46WnH2fSK29Eu/Rqo9DB859ncUvvdoTMmJe5ieytuxh0fHO+27iTxVnb6JuWyImpjYgUwk/5Yf7x6RoAWjauzcWd2+Fc0djnjBV5rN2yK8pHVLn8Prxgzrkq3cHr/1lftTsQOqckRLuEwLv1na+jXUKN8MolnQ45M1fl7Kxw5hyeXM/zjNYleBEJFH2IuYiIh/x+77FCV0QCxeeZq9AVkWCpyIeTR5NCV0QCxeeZq9AVkWDxeeYqdEUkYHyeugpdEQkU3TImIuIhjemKiHgopNAVEfGSv1NXoSsigaLhBRERD/k8cxW6IhIs6umKiHhIjwGLiHjI35Gr0BWRgPF5R1ehKyLBoifSRES85O/MVeiKSLD4PHMVuiISLNH6avWKUuiKSKD4PHMJRbsAEZGaRD1dEQkUv/d0FboiEii6ZUxExEPq6YqIeEihKyLiIQ0viIh4SD1dEREP+TxzFboiEjA+T12FrogEit8fAzbnXLRr8B0zG+acmxTtOoJM73HV03vsT3oMeN+GRbuAGkDvcdXTe+xDCl0REQ8pdEVEPKTQ3TeNg1U9vcdVT++xD+lCmoiIh9TTFRHxkEK3FDPrZ2YrzSzTzEZGu54gMrNnzSzXzJZFu5agMrNWZvahma0ws+Vmdn20a5JfaHihmJnFAKuAPkAWkA5c5JxbEdXCAsbMugE7gBedc8dEu54gMrMWQAvn3GIzawh8AQzQuewP6un+oiuQ6Zz71jmXD0wGLohyTYHjnPsI2BTtOoLMObfeObe4+OftQAaQEt2q5L8Uur9IAdaWms9CJ6pUc2bWBjgBWBjdSuS/FLoiAWVmDYCpwHDn3LZo1yNFFLq/yAZalZpPLV4mUu2YWRxFgfuKc+7f0a5HfqHQ/UU60NHM2ppZLWAIMD3KNYkcNDMz4Bkgwzk3Ptr1SFkK3WLOuTBwLTCbogsPU5xzy6NbVfCY2avAZ8ARZpZlZkOjXVMAnQ5cAvQys6+Kp3OjXZQU0S1jIiIeUk9XRMRDCl0REQ8pdEVEPKTQFRHxkEJXRMRDCl0REQ8pdEVEPKTQFRHx0P8De39dS03ACg4AAAAASUVORK5CYII=\n", 583 | "text/plain": [ 584 | "
" 585 | ] 586 | }, 587 | "metadata": { 588 | "needs_background": "light" 589 | }, 590 | "output_type": "display_data" 591 | } 592 | ], 593 | "source": [ 594 | "from sklearn.metrics import confusion_matrix\n", 595 | "\n", 596 | "cm = confusion_matrix(y_pred=y_pred, y_true=y_test)\n", 597 | " \n", 598 | "cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", 599 | " \n", 600 | "ax=sns.heatmap(cm, annot=True, xticklabels=[0,1,2], yticklabels=[0,1,2], cmap='Blues')\n", 601 | "ax.set_ylim(3.0, 0)\n", 602 | "plt.show()" 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": 29, 608 | "metadata": {}, 609 | "outputs": [ 610 | { 611 | "name": "stdout", 612 | "output_type": "stream", 613 | "text": [ 614 | " precision recall f1-score support\n", 615 | "\n", 616 | " 0.0 0.85 0.87 0.86 566555\n", 617 | " 1.0 0.81 0.95 0.87 304953\n", 618 | " 2.0 0.73 0.53 0.62 224973\n", 619 | "\n", 620 | "avg / total 0.82 0.82 0.82 1096481\n", 621 | "\n" 622 | ] 623 | } 624 | ], 625 | "source": [ 626 | "print(classification_report(y_test,y_pred))" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": null, 632 | "metadata": {}, 633 | "outputs": [], 634 | "source": [] 635 | } 636 | ], 637 | "metadata": { 638 | "kernelspec": { 639 | "display_name": "Python 3", 640 | "language": "python", 641 | "name": "python3" 642 | }, 643 | "language_info": { 644 | "codemirror_mode": { 645 | "name": "ipython", 646 | "version": 3 647 | }, 648 | "file_extension": ".py", 649 | "mimetype": "text/x-python", 650 | "name": "python", 651 | "nbconvert_exporter": "python", 652 | "pygments_lexer": "ipython3", 653 | "version": "3.5.3" 654 | } 655 | }, 656 | "nbformat": 4, 657 | "nbformat_minor": 2 658 | } 659 | -------------------------------------------------------------------------------- /Data_Filtering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import keras\n", 18 | "import pandas as pd\n", 19 | "import numpy as np\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import os" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "Data available: snapshot, ticker and trades\n", 29 | "\n", 30 | "Snapshot (100ms and 10s): gives the state of the orderbook at specified time interval. Includes 100 non-zero levels, 50 ask 50 bid, and their quantities.\n", 31 | "\n", 32 | "Ticker: gives the bid and ask price at each timestamp for which a trade has been made\n", 33 | "\n", 34 | "Trades: gives the time, side, price and quantity of every trade" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 181, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "df_snapshot = pd.read_parquet(r\"Data\\coinbase_btc_usd\\coinbase\\btc_usd\\l2_snapshots\\100ms\\coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0000_0100.parquet\")\n", 44 | "df_ticker = pd.read_parquet(r'Data\\coinbase_btc_usd\\coinbase\\btc_usd\\ticker\\coinbase_btc_usd_ticker_2019_11_12.parquet')\n", 45 | "df_trades = pd.read_parquet(r'Data\\coinbase_btc_usd\\coinbase\\btc_usd\\trades\\coinbase_btc_usd_trades_2019_11_12.parquet')" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 171, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/html": [ 56 | "
\n", 57 | "\n", 70 | "\n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | "
b1b2b3b4b5b6b7b8b9b10...aq41aq42aq43aq44aq45aq46aq47aq48aq49aq50
timestamp
2019-11-12 00:00:00.0008721.538720.598719.558719.508719.08718.028717.878717.858717.568716.06...8.2000002.2032.6000.0052.5000.8963210.7660000.0017371.8200000.933419
2019-11-12 00:00:00.1008721.538720.598719.568719.558719.08718.028717.878717.858717.568716.06...8.2000002.2032.6000.0052.5000.8963210.7660000.0017371.8200000.933419
2019-11-12 00:00:00.2008721.538720.598719.618719.568719.08718.028717.878717.858716.068716.00...0.0091358.2002.2032.6000.0052.5000000.8963210.7660000.0017371.820000
2019-11-12 00:00:00.3008721.538720.598719.618719.568719.08718.028718.008717.878717.858716.60...0.0091358.2002.2032.6000.0052.5000000.8963210.7660000.0017371.820000
2019-11-12 00:00:00.4008721.538720.598719.618719.568719.08718.998718.028718.008717.878717.85...0.0091358.2002.2032.6000.0052.5000000.8963210.7660000.0017371.820000
\n", 244 | "

5 rows × 200 columns

\n", 245 | "
" 246 | ], 247 | "text/plain": [ 248 | " b1 b2 b3 b4 b5 b6 \\\n", 249 | "timestamp \n", 250 | "2019-11-12 00:00:00.000 8721.53 8720.59 8719.55 8719.50 8719.0 8718.02 \n", 251 | "2019-11-12 00:00:00.100 8721.53 8720.59 8719.56 8719.55 8719.0 8718.02 \n", 252 | "2019-11-12 00:00:00.200 8721.53 8720.59 8719.61 8719.56 8719.0 8718.02 \n", 253 | "2019-11-12 00:00:00.300 8721.53 8720.59 8719.61 8719.56 8719.0 8718.02 \n", 254 | "2019-11-12 00:00:00.400 8721.53 8720.59 8719.61 8719.56 8719.0 8718.99 \n", 255 | "\n", 256 | " b7 b8 b9 b10 ... aq41 \\\n", 257 | "timestamp ... \n", 258 | "2019-11-12 00:00:00.000 8717.87 8717.85 8717.56 8716.06 ... 8.200000 \n", 259 | "2019-11-12 00:00:00.100 8717.87 8717.85 8717.56 8716.06 ... 8.200000 \n", 260 | "2019-11-12 00:00:00.200 8717.87 8717.85 8716.06 8716.00 ... 0.009135 \n", 261 | "2019-11-12 00:00:00.300 8718.00 8717.87 8717.85 8716.60 ... 0.009135 \n", 262 | "2019-11-12 00:00:00.400 8718.02 8718.00 8717.87 8717.85 ... 0.009135 \n", 263 | "\n", 264 | " aq42 aq43 aq44 aq45 aq46 aq47 \\\n", 265 | "timestamp \n", 266 | "2019-11-12 00:00:00.000 2.203 2.600 0.005 2.500 0.896321 0.766000 \n", 267 | "2019-11-12 00:00:00.100 2.203 2.600 0.005 2.500 0.896321 0.766000 \n", 268 | "2019-11-12 00:00:00.200 8.200 2.203 2.600 0.005 2.500000 0.896321 \n", 269 | "2019-11-12 00:00:00.300 8.200 2.203 2.600 0.005 2.500000 0.896321 \n", 270 | "2019-11-12 00:00:00.400 8.200 2.203 2.600 0.005 2.500000 0.896321 \n", 271 | "\n", 272 | " aq48 aq49 aq50 \n", 273 | "timestamp \n", 274 | "2019-11-12 00:00:00.000 0.001737 1.820000 0.933419 \n", 275 | "2019-11-12 00:00:00.100 0.001737 1.820000 0.933419 \n", 276 | "2019-11-12 00:00:00.200 0.766000 0.001737 1.820000 \n", 277 | "2019-11-12 00:00:00.300 0.766000 0.001737 1.820000 \n", 278 | "2019-11-12 00:00:00.400 0.766000 0.001737 1.820000 \n", 279 | "\n", 280 | "[5 rows x 200 columns]" 281 | ] 282 | }, 283 | "execution_count": 171, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "df_snapshot.head()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "# Example Bid Price, Moving Up" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "Neural network modelling p(Y = y1 |Y >= y1, X=x)\n", 304 | "\n", 305 | "There can be zero levels both in the spread as well as in the bid and ask sides. Furthermore, many changes are happening within our 100ms snapshot window. I thus see two ways by which to calculate the price level change.\n", 306 | "\n", 307 | "#### 1.\n", 308 | "As the price is to 0.01 accuracy one can simply take the difference in the best bid price and multiply by 100 to get it in the terms of how many $0.01$ levels it has increased by.\n", 309 | "\n", 310 | "#### 2.\n", 311 | "Or one can approximate the price level change by taking that which is closest to the current non-zero levels present.\n", 312 | " e.g. if the bid price moves up in the next step to 5.20 and at the current time step there are ask price levels at 5.15, 5.18, 5.22, ... then there will be a price level increase of 2 levels. This seems somewhat more arbitrary and heavily dependent on the current state of the orderbook." 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "Each snapshot file contains one hour of data at 100ms time interval. We will combine all this data into four separate dataframes (one to demonstrate the principles). One for where the bid price has increased, one where it has decreased and the same for the ask price. Thus for the example case of bid price increasing:" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 203, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0000_0100.parquet\n", 332 | "Non na entries: 28547\n", 333 | "Entries left: 3589\n", 334 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0100_0200.parquet\n", 335 | "Non na entries: 29365\n", 336 | "Entries left: 4635\n", 337 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0200_0300.parquet\n", 338 | "Non na entries: 28656\n", 339 | "Entries left: 3228\n", 340 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0300_0400.parquet\n", 341 | "Non na entries: 30871\n", 342 | "Entries left: 4663\n", 343 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0400_0500.parquet\n", 344 | "Non na entries: 29187\n", 345 | "Entries left: 3622\n", 346 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0500_0600.parquet\n", 347 | "Non na entries: 31956\n", 348 | "Entries left: 5930\n", 349 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0600_0700.parquet\n", 350 | "Non na entries: 23169\n", 351 | "Entries left: 3822\n", 352 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0700_0800.parquet\n", 353 | "Non na entries: 22659\n", 354 | "Entries left: 1894\n", 355 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0800_0900.parquet\n", 356 | "Non na entries: 19565\n", 357 | "Entries left: 1055\n", 358 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0900_1000.parquet\n", 359 | "Non na entries: 22605\n", 360 | "Entries left: 1782\n", 361 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1000_1100.parquet\n", 362 | "Non na entries: 17245\n", 363 | "Entries left: 1046\n", 364 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1100_1200.parquet\n", 365 | "Non na entries: 20052\n", 366 | "Entries left: 2119\n", 367 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1200_1300.parquet\n", 368 | "Non na entries: 31358\n", 369 | "Entries left: 4601\n", 370 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1300_1400.parquet\n", 371 | "Non na entries: 26075\n", 372 | "Entries left: 2978\n", 373 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1400_1500.parquet\n", 374 | "Non na entries: 22544\n", 375 | "Entries left: 1559\n", 376 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1500_1600.parquet\n", 377 | "Non na entries: 28214\n", 378 | "Entries left: 4909\n", 379 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1600_1700.parquet\n", 380 | "Non na entries: 25146\n", 381 | "Entries left: 2304\n", 382 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1700_1800.parquet\n", 383 | "Non na entries: 20224\n", 384 | "Entries left: 1736\n", 385 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1800_1900.parquet\n", 386 | "Non na entries: 24429\n", 387 | "Entries left: 1598\n", 388 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_1900_2000.parquet\n", 389 | "Non na entries: 27789\n", 390 | "Entries left: 2517\n", 391 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_2000_2100.parquet\n", 392 | "Non na entries: 27963\n", 393 | "Entries left: 3174\n", 394 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_2100_2200.parquet\n", 395 | "Non na entries: 27390\n", 396 | "Entries left: 3614\n", 397 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_2200_2300.parquet\n", 398 | "Non na entries: 26435\n", 399 | "Entries left: 2435\n", 400 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_2300_0000.parquet\n", 401 | "Non na entries: 27631\n", 402 | "Entries left: 3604\n", 403 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0000_0100.parquet\n", 404 | "Non na entries: 28699\n", 405 | "Entries left: 5762\n", 406 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0100_0200.parquet\n", 407 | "Non na entries: 27539\n", 408 | "Entries left: 1893\n", 409 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0200_0300.parquet\n", 410 | "Non na entries: 22991\n", 411 | "Entries left: 2141\n", 412 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0300_0400.parquet\n", 413 | "Non na entries: 26243\n", 414 | "Entries left: 1950\n", 415 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0400_0500.parquet\n", 416 | "Non na entries: 30195\n", 417 | "Entries left: 1472\n", 418 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0500_0600.parquet\n", 419 | "Non na entries: 23088\n", 420 | "Entries left: 3635\n", 421 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0600_0700.parquet\n", 422 | "Non na entries: 25195\n", 423 | "Entries left: 2649\n", 424 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0700_0800.parquet\n", 425 | "Non na entries: 28168\n", 426 | "Entries left: 872\n", 427 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0800_0900.parquet\n", 428 | "Non na entries: 28398\n", 429 | "Entries left: 451\n", 430 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_0900_1000.parquet\n", 431 | "Non na entries: 22992\n", 432 | "Entries left: 464\n", 433 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1000_1100.parquet\n", 434 | "Non na entries: 26048\n", 435 | "Entries left: 855\n", 436 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1100_1200.parquet\n", 437 | "Non na entries: 27290\n", 438 | "Entries left: 1647\n", 439 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1200_1300.parquet\n", 440 | "Non na entries: 25123\n", 441 | "Entries left: 1035\n", 442 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1300_1400.parquet\n", 443 | "Non na entries: 22972\n", 444 | "Entries left: 1174\n", 445 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1400_1500.parquet\n", 446 | "Non na entries: 26767\n", 447 | "Entries left: 872\n", 448 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1500_1600.parquet\n", 449 | "Non na entries: 26669\n", 450 | "Entries left: 527\n", 451 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1600_1700.parquet\n", 452 | "Non na entries: 23612\n", 453 | "Entries left: 492\n", 454 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1700_1800.parquet\n", 455 | "Non na entries: 27038\n", 456 | "Entries left: 838\n", 457 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1800_1900.parquet\n", 458 | "Non na entries: 22585\n", 459 | "Entries left: 1282\n", 460 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_1900_2000.parquet\n", 461 | "Non na entries: 25196\n", 462 | "Entries left: 848\n", 463 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_2000_2100.parquet\n", 464 | "Non na entries: 20146\n", 465 | "Entries left: 740\n", 466 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_2100_2200.parquet\n", 467 | "Non na entries: 16088\n", 468 | "Entries left: 608\n", 469 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_2200_2300.parquet\n", 470 | "Non na entries: 17470\n", 471 | "Entries left: 1683\n", 472 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_13_2300_0000.parquet\n", 473 | "Non na entries: 21760\n", 474 | "Entries left: 2280\n", 475 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0000_0100.parquet\n", 476 | "Non na entries: 26976\n", 477 | "Entries left: 1131\n", 478 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0100_0200.parquet\n", 479 | "Non na entries: 24938\n", 480 | "Entries left: 1166\n", 481 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0200_0300.parquet\n", 482 | "Non na entries: 27933\n", 483 | "Entries left: 1484\n", 484 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0300_0400.parquet\n", 485 | "Non na entries: 25346\n", 486 | "Entries left: 1344\n", 487 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0400_0500.parquet\n", 488 | "Non na entries: 32060\n", 489 | "Entries left: 2933\n", 490 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0500_0600.parquet\n", 491 | "Non na entries: 30437\n", 492 | "Entries left: 5557\n", 493 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0600_0700.parquet\n", 494 | "Non na entries: 26509\n", 495 | "Entries left: 2764\n", 496 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0700_0800.parquet\n", 497 | "Non na entries: 30204\n", 498 | "Entries left: 2073\n", 499 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0800_0900.parquet\n", 500 | "Non na entries: 30901\n", 501 | "Entries left: 4342\n", 502 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_0900_1000.parquet\n", 503 | "Non na entries: 22159\n", 504 | "Entries left: 1763\n", 505 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1000_1100.parquet\n", 506 | "Non na entries: 21288\n", 507 | "Entries left: 1621\n", 508 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1100_1200.parquet\n", 509 | "Non na entries: 21730\n", 510 | "Entries left: 2560\n", 511 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1200_1300.parquet\n", 512 | "Non na entries: 23288\n", 513 | "Entries left: 2786\n", 514 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1300_1400.parquet\n", 515 | "Non na entries: 27118\n", 516 | "Entries left: 4641\n", 517 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1400_1500.parquet\n", 518 | "Non na entries: 23464\n", 519 | "Entries left: 3516\n", 520 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1500_1600.parquet\n", 521 | "Non na entries: 23731\n", 522 | "Entries left: 3475\n", 523 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1600_1700.parquet\n", 524 | "Non na entries: 24287\n", 525 | "Entries left: 4047\n", 526 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1700_1800.parquet\n", 527 | "Non na entries: 17221\n", 528 | "Entries left: 1142\n", 529 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1800_1900.parquet\n", 530 | "Non na entries: 19754\n", 531 | "Entries left: 1411\n", 532 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_1900_2000.parquet\n", 533 | "Non na entries: 17295\n", 534 | "Entries left: 1696\n", 535 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_2000_2100.parquet\n", 536 | "Non na entries: 23238\n", 537 | "Entries left: 1227\n", 538 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_2100_2200.parquet\n", 539 | "Non na entries: 27619\n", 540 | "Entries left: 3305\n", 541 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_2200_2300.parquet\n", 542 | "Non na entries: 27030\n", 543 | "Entries left: 1587\n", 544 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_14_2300_0000.parquet\n", 545 | "Non na entries: 26169\n", 546 | "Entries left: 1979\n", 547 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0000_0100.parquet\n" 548 | ] 549 | }, 550 | { 551 | "name": "stdout", 552 | "output_type": "stream", 553 | "text": [ 554 | "Non na entries: 27521\n", 555 | "Entries left: 1087\n", 556 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0100_0200.parquet\n", 557 | "Non na entries: 24930\n", 558 | "Entries left: 2105\n", 559 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0200_0300.parquet\n", 560 | "Non na entries: 23995\n", 561 | "Entries left: 3622\n", 562 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0300_0400.parquet\n", 563 | "Non na entries: 23348\n", 564 | "Entries left: 3409\n", 565 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0400_0500.parquet\n", 566 | "Non na entries: 23337\n", 567 | "Entries left: 3592\n", 568 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0500_0600.parquet\n", 569 | "Non na entries: 20327\n", 570 | "Entries left: 1931\n", 571 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0600_0700.parquet\n", 572 | "Non na entries: 19324\n", 573 | "Entries left: 2648\n", 574 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0700_0800.parquet\n", 575 | "Non na entries: 19165\n", 576 | "Entries left: 1062\n", 577 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0800_0900.parquet\n", 578 | "Non na entries: 19953\n", 579 | "Entries left: 2897\n", 580 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0900_1000.parquet\n", 581 | "Non na entries: 28001\n", 582 | "Entries left: 4639\n", 583 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1000_1100.parquet\n", 584 | "Non na entries: 22787\n", 585 | "Entries left: 2844\n", 586 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1100_1200.parquet\n", 587 | "Non na entries: 23247\n", 588 | "Entries left: 4012\n", 589 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1200_1300.parquet\n", 590 | "Non na entries: 25178\n", 591 | "Entries left: 4377\n", 592 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1300_1400.parquet\n", 593 | "Non na entries: 23804\n", 594 | "Entries left: 5390\n", 595 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1400_1500.parquet\n", 596 | "Non na entries: 31365\n", 597 | "Entries left: 4904\n", 598 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1500_1600.parquet\n", 599 | "Non na entries: 28866\n", 600 | "Entries left: 4270\n", 601 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1600_1700.parquet\n", 602 | "Non na entries: 30040\n", 603 | "Entries left: 6297\n", 604 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1700_1800.parquet\n", 605 | "Non na entries: 22322\n", 606 | "Entries left: 2077\n", 607 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1800_1900.parquet\n", 608 | "Non na entries: 24886\n", 609 | "Entries left: 4068\n", 610 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_1900_2000.parquet\n", 611 | "Non na entries: 24876\n", 612 | "Entries left: 3511\n", 613 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_2000_2100.parquet\n", 614 | "Non na entries: 23560\n", 615 | "Entries left: 4554\n", 616 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_2100_2200.parquet\n", 617 | "Non na entries: 22572\n", 618 | "Entries left: 3836\n", 619 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_2200_2300.parquet\n", 620 | "Non na entries: 20768\n", 621 | "Entries left: 4600\n", 622 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_2300_0000.parquet\n", 623 | "Non na entries: 26129\n", 624 | "Entries left: 4102\n", 625 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0000_0100.parquet\n", 626 | "Non na entries: 23843\n", 627 | "Entries left: 4284\n", 628 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0100_0200.parquet\n", 629 | "Non na entries: 22470\n", 630 | "Entries left: 4241\n", 631 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0200_0300.parquet\n", 632 | "Non na entries: 25021\n", 633 | "Entries left: 6903\n", 634 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0300_0400.parquet\n", 635 | "Non na entries: 20527\n", 636 | "Entries left: 5106\n", 637 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0400_0500.parquet\n", 638 | "Non na entries: 21365\n", 639 | "Entries left: 4574\n", 640 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0500_0600.parquet\n", 641 | "Non na entries: 21585\n", 642 | "Entries left: 5946\n", 643 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0600_0700.parquet\n", 644 | "Non na entries: 20304\n", 645 | "Entries left: 5802\n", 646 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0700_0800.parquet\n", 647 | "Non na entries: 22604\n", 648 | "Entries left: 4469\n", 649 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0800_0900.parquet\n", 650 | "Non na entries: 19245\n", 651 | "Entries left: 3182\n", 652 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_0900_1000.parquet\n", 653 | "Non na entries: 22239\n", 654 | "Entries left: 4182\n", 655 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1000_1100.parquet\n", 656 | "Non na entries: 26071\n", 657 | "Entries left: 3290\n", 658 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1100_1200.parquet\n", 659 | "Non na entries: 21796\n", 660 | "Entries left: 5027\n", 661 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1200_1300.parquet\n", 662 | "Non na entries: 16273\n", 663 | "Entries left: 589\n", 664 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1300_1400.parquet\n", 665 | "Non na entries: 20885\n", 666 | "Entries left: 2690\n", 667 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1400_1500.parquet\n", 668 | "Non na entries: 24523\n", 669 | "Entries left: 4340\n", 670 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1500_1600.parquet\n", 671 | "Non na entries: 18443\n", 672 | "Entries left: 1550\n", 673 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1600_1700.parquet\n", 674 | "Non na entries: 18488\n", 675 | "Entries left: 1762\n", 676 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1700_1800.parquet\n", 677 | "Non na entries: 13384\n", 678 | "Entries left: 329\n", 679 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1800_1900.parquet\n", 680 | "Non na entries: 12533\n", 681 | "Entries left: 307\n", 682 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_1900_2000.parquet\n", 683 | "Non na entries: 13175\n", 684 | "Entries left: 1285\n", 685 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_2000_2100.parquet\n", 686 | "Non na entries: 12884\n", 687 | "Entries left: 729\n", 688 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_2100_2200.parquet\n", 689 | "Non na entries: 12538\n", 690 | "Entries left: 600\n", 691 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_2200_2300.parquet\n", 692 | "Non na entries: 15634\n", 693 | "Entries left: 1547\n", 694 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_16_2300_0000.parquet\n", 695 | "Non na entries: 10401\n", 696 | "Entries left: 373\n", 697 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0000_0100.parquet\n", 698 | "Non na entries: 17868\n", 699 | "Entries left: 2419\n", 700 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0100_0200.parquet\n", 701 | "Non na entries: 15572\n", 702 | "Entries left: 1242\n", 703 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0200_0300.parquet\n", 704 | "Non na entries: 21304\n", 705 | "Entries left: 7847\n", 706 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0300_0400.parquet\n", 707 | "Non na entries: 22867\n", 708 | "Entries left: 4924\n", 709 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0400_0500.parquet\n", 710 | "Non na entries: 18906\n", 711 | "Entries left: 4928\n", 712 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0500_0600.parquet\n", 713 | "Non na entries: 20459\n", 714 | "Entries left: 5253\n", 715 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0600_0700.parquet\n", 716 | "Non na entries: 28776\n", 717 | "Entries left: 1699\n", 718 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0700_0800.parquet\n", 719 | "Non na entries: 27138\n", 720 | "Entries left: 2122\n", 721 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0800_0900.parquet\n", 722 | "Non na entries: 26465\n", 723 | "Entries left: 5014\n", 724 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_0900_1000.parquet\n", 725 | "Non na entries: 24802\n", 726 | "Entries left: 6052\n", 727 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1000_1100.parquet\n", 728 | "Non na entries: 15558\n", 729 | "Entries left: 975\n", 730 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1100_1200.parquet\n", 731 | "Non na entries: 16872\n", 732 | "Entries left: 1729\n", 733 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1200_1300.parquet\n", 734 | "Non na entries: 20356\n", 735 | "Entries left: 2549\n", 736 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1300_1400.parquet\n", 737 | "Non na entries: 28232\n", 738 | "Entries left: 3189\n", 739 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1400_1500.parquet\n", 740 | "Non na entries: 19224\n", 741 | "Entries left: 3984\n", 742 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1500_1600.parquet\n", 743 | "Non na entries: 14991\n", 744 | "Entries left: 1114\n", 745 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1600_1700.parquet\n", 746 | "Non na entries: 19174\n", 747 | "Entries left: 1791\n", 748 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1700_1800.parquet\n", 749 | "Non na entries: 16804\n", 750 | "Entries left: 432\n", 751 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1800_1900.parquet\n", 752 | "Non na entries: 17178\n", 753 | "Entries left: 1167\n", 754 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_1900_2000.parquet\n", 755 | "Non na entries: 17797\n", 756 | "Entries left: 638\n", 757 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_2000_2100.parquet\n", 758 | "Non na entries: 22174\n", 759 | "Entries left: 1353\n", 760 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_2100_2200.parquet\n", 761 | "Non na entries: 19309\n", 762 | "Entries left: 1007\n", 763 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_2200_2300.parquet\n", 764 | "Non na entries: 16829\n", 765 | "Entries left: 1665\n", 766 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_17_2300_0000.parquet\n", 767 | "Non na entries: 22010\n", 768 | "Entries left: 2108\n", 769 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0000_0100.parquet\n", 770 | "Non na entries: 23182\n", 771 | "Entries left: 1040\n", 772 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0100_0200.parquet\n" 773 | ] 774 | }, 775 | { 776 | "name": "stdout", 777 | "output_type": "stream", 778 | "text": [ 779 | "Non na entries: 18296\n", 780 | "Entries left: 485\n", 781 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0200_0300.parquet\n", 782 | "Non na entries: 22012\n", 783 | "Entries left: 770\n", 784 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0300_0400.parquet\n", 785 | "Non na entries: 22133\n", 786 | "Entries left: 1118\n", 787 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0400_0500.parquet\n", 788 | "Non na entries: 21448\n", 789 | "Entries left: 330\n", 790 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0500_0600.parquet\n", 791 | "Non na entries: 19851\n", 792 | "Entries left: 579\n", 793 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0600_0700.parquet\n", 794 | "Non na entries: 19670\n", 795 | "Entries left: 521\n", 796 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0700_0800.parquet\n", 797 | "Non na entries: 18773\n", 798 | "Entries left: 302\n", 799 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0800_0900.parquet\n", 800 | "Non na entries: 19211\n", 801 | "Entries left: 741\n", 802 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0900_1000.parquet\n", 803 | "Non na entries: 15352\n", 804 | "Entries left: 137\n", 805 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1000_1100.parquet\n", 806 | "Non na entries: 19082\n", 807 | "Entries left: 184\n", 808 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1100_1200.parquet\n", 809 | "Non na entries: 20401\n", 810 | "Entries left: 298\n", 811 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1200_1300.parquet\n", 812 | "Non na entries: 24121\n", 813 | "Entries left: 925\n", 814 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1300_1400.parquet\n", 815 | "Non na entries: 21732\n", 816 | "Entries left: 1097\n", 817 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1400_1500.parquet\n", 818 | "Non na entries: 16748\n", 819 | "Entries left: 406\n", 820 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1500_1600.parquet\n", 821 | "Non na entries: 19776\n", 822 | "Entries left: 516\n", 823 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1600_1700.parquet\n", 824 | "Non na entries: 16940\n", 825 | "Entries left: 225\n", 826 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1700_1800.parquet\n", 827 | "Non na entries: 26566\n", 828 | "Entries left: 2181\n", 829 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1800_1900.parquet\n", 830 | "Non na entries: 31398\n", 831 | "Entries left: 5146\n", 832 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1900_2000.parquet\n", 833 | "Non na entries: 25755\n", 834 | "Entries left: 3526\n", 835 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_2000_2100.parquet\n", 836 | "Non na entries: 21328\n", 837 | "Entries left: 794\n", 838 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_2100_2200.parquet\n", 839 | "Non na entries: 22571\n", 840 | "Entries left: 866\n", 841 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_2200_2300.parquet\n", 842 | "Non na entries: 21180\n", 843 | "Entries left: 1593\n", 844 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_2300_0000.parquet\n", 845 | "Non na entries: 18567\n", 846 | "Entries left: 445\n", 847 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0000_0100.parquet\n", 848 | "Non na entries: 21472\n", 849 | "Entries left: 464\n", 850 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0100_0200.parquet\n", 851 | "Non na entries: 22268\n", 852 | "Entries left: 601\n", 853 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0200_0300.parquet\n", 854 | "Non na entries: 18284\n", 855 | "Entries left: 713\n", 856 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0300_0400.parquet\n", 857 | "Non na entries: 21178\n", 858 | "Entries left: 703\n", 859 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0400_0500.parquet\n", 860 | "Non na entries: 24032\n", 861 | "Entries left: 407\n", 862 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0500_0600.parquet\n", 863 | "Non na entries: 21356\n", 864 | "Entries left: 713\n", 865 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0600_0700.parquet\n", 866 | "Non na entries: 21864\n", 867 | "Entries left: 281\n", 868 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0700_0800.parquet\n", 869 | "Non na entries: 22011\n", 870 | "Entries left: 271\n", 871 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0800_0900.parquet\n", 872 | "Non na entries: 23630\n", 873 | "Entries left: 348\n", 874 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_0900_1000.parquet\n", 875 | "Non na entries: 19436\n", 876 | "Entries left: 155\n", 877 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1000_1100.parquet\n", 878 | "Non na entries: 18938\n", 879 | "Entries left: 348\n", 880 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1100_1200.parquet\n", 881 | "Non na entries: 28521\n", 882 | "Entries left: 1142\n", 883 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1200_1300.parquet\n", 884 | "Non na entries: 26141\n", 885 | "Entries left: 975\n", 886 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1300_1400.parquet\n", 887 | "Non na entries: 26839\n", 888 | "Entries left: 1325\n", 889 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1400_1500.parquet\n", 890 | "Non na entries: 25620\n", 891 | "Entries left: 1884\n", 892 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1500_1600.parquet\n", 893 | "Non na entries: 21827\n", 894 | "Entries left: 1053\n", 895 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1600_1700.parquet\n", 896 | "Non na entries: 23805\n", 897 | "Entries left: 1587\n", 898 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1700_1800.parquet\n", 899 | "Non na entries: 22752\n", 900 | "Entries left: 1485\n", 901 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1800_1900.parquet\n", 902 | "Non na entries: 21289\n", 903 | "Entries left: 851\n", 904 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_1900_2000.parquet\n", 905 | "Non na entries: 20266\n", 906 | "Entries left: 1935\n", 907 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_2000_2100.parquet\n", 908 | "Non na entries: 22559\n", 909 | "Entries left: 3647\n", 910 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_2100_2200.parquet\n", 911 | "Non na entries: 17410\n", 912 | "Entries left: 2039\n", 913 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_2200_2300.parquet\n", 914 | "Non na entries: 20513\n", 915 | "Entries left: 2285\n", 916 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_19_2300_0000.parquet\n", 917 | "Non na entries: 16284\n", 918 | "Entries left: 386\n", 919 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0000_0100.parquet\n", 920 | "Non na entries: 20526\n", 921 | "Entries left: 725\n", 922 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0100_0200.parquet\n", 923 | "Non na entries: 19294\n", 924 | "Entries left: 547\n", 925 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0200_0300.parquet\n", 926 | "Non na entries: 16827\n", 927 | "Entries left: 597\n", 928 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0300_0400.parquet\n", 929 | "Non na entries: 16628\n", 930 | "Entries left: 325\n", 931 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0400_0500.parquet\n", 932 | "Non na entries: 17306\n", 933 | "Entries left: 350\n", 934 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0500_0600.parquet\n", 935 | "Non na entries: 18860\n", 936 | "Entries left: 399\n", 937 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0600_0700.parquet\n", 938 | "Non na entries: 16651\n", 939 | "Entries left: 376\n", 940 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0700_0800.parquet\n", 941 | "Non na entries: 15018\n", 942 | "Entries left: 110\n", 943 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0800_0900.parquet\n", 944 | "Non na entries: 19297\n", 945 | "Entries left: 143\n", 946 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_0900_1000.parquet\n", 947 | "Non na entries: 18229\n", 948 | "Entries left: 173\n", 949 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1000_1100.parquet\n", 950 | "Non na entries: 21159\n", 951 | "Entries left: 65\n", 952 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1100_1200.parquet\n", 953 | "Non na entries: 14720\n", 954 | "Entries left: 102\n", 955 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1200_1300.parquet\n", 956 | "Non na entries: 20426\n", 957 | "Entries left: 281\n", 958 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1300_1400.parquet\n", 959 | "Non na entries: 16789\n", 960 | "Entries left: 182\n", 961 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1400_1500.parquet\n", 962 | "Non na entries: 19182\n", 963 | "Entries left: 364\n", 964 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1500_1600.parquet\n", 965 | "Non na entries: 26219\n", 966 | "Entries left: 1640\n", 967 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1600_1700.parquet\n", 968 | "Non na entries: 18598\n", 969 | "Entries left: 304\n", 970 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1700_1800.parquet\n", 971 | "Non na entries: 16120\n", 972 | "Entries left: 305\n", 973 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1800_1900.parquet\n", 974 | "Non na entries: 29157\n", 975 | "Entries left: 1472\n", 976 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_1900_2000.parquet\n", 977 | "Non na entries: 21949\n", 978 | "Entries left: 494\n", 979 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_2000_2100.parquet\n", 980 | "Non na entries: 21123\n", 981 | "Entries left: 558\n", 982 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_2100_2200.parquet\n", 983 | "Non na entries: 17445\n", 984 | "Entries left: 365\n", 985 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_2200_2300.parquet\n", 986 | "Non na entries: 22824\n", 987 | "Entries left: 777\n", 988 | "coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_20_2300_0000.parquet\n", 989 | "Non na entries: 21173\n", 990 | "Entries left: 1124\n" 991 | ] 992 | } 993 | ], 994 | "source": [ 995 | "i = 0\n", 996 | "for filename in os.listdir(r\"Data\\coinbase_btc_usd\\coinbase\\btc_usd\\l2_snapshots\\100ms\"):\n", 997 | " print(filename)\n", 998 | " \n", 999 | " df_snapshot = pd.read_parquet(os.path.join(r\"Data\\coinbase_btc_usd\\coinbase\\btc_usd\\l2_snapshots\\100ms\", filename))\n", 1000 | " \n", 1001 | " df_snapshot.dropna(inplace=True) #Drop na values\n", 1002 | " print('Non na entries:', np.shape(df_snapshot)[0])\n", 1003 | "\n", 1004 | " # Check no price levels with zero quantities:\n", 1005 | " assert np.count_nonzero(df_snapshot.values) == np.shape(df_snapshot)[0] * np.shape(df_snapshot)[1]\n", 1006 | "\n", 1007 | " df_snapshot['future_bid'] = df_snapshot['b1'].shift(-1)\n", 1008 | "\n", 1009 | " # Calculate how many levels the bid/ask price changed by\n", 1010 | " df_snapshot['bid_change_n'] = df_snapshot.apply(lambda x: 100*(x['future_bid'] - x['b1']), axis=1)\n", 1011 | "\n", 1012 | " #Delete last one as don't know how much it has changed by\n", 1013 | " df_snapshot.drop(df_snapshot.index[-1], axis=0, inplace=True)\n", 1014 | "\n", 1015 | " # Ignore the prices columns\n", 1016 | " df_snapshot_cut = df_snapshot[df_snapshot.columns[100:]]\n", 1017 | "\n", 1018 | " # Take only the values where the bid price increases\n", 1019 | " df_bid_up = df_snapshot_cut[df_snapshot_cut['bid_change_n'] > 0].drop(['future_bid'], axis=1)\n", 1020 | "\n", 1021 | " # How many entries in this hour left?\n", 1022 | " print('Entries left:', np.shape(df_bid_up)[0])\n", 1023 | " \n", 1024 | " if i==0:\n", 1025 | " df = df_bid_up\n", 1026 | " i += 1\n", 1027 | " else:\n", 1028 | " df = df.append(df_bid_up)\n" 1029 | ] 1030 | }, 1031 | { 1032 | "cell_type": "code", 1033 | "execution_count": 205, 1034 | "metadata": {}, 1035 | "outputs": [], 1036 | "source": [ 1037 | "df.to_csv(\"df_snapshot_100ms_bid_up.csv\")" 1038 | ] 1039 | }, 1040 | { 1041 | "cell_type": "code", 1042 | "execution_count": 21, 1043 | "metadata": {}, 1044 | "outputs": [ 1045 | { 1046 | "data": { 1047 | "text/plain": [ 1048 | "(array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,\n", 1049 | " 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,\n", 1050 | " 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", 1051 | " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,\n", 1052 | " 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,\n", 1053 | " 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,\n", 1054 | " 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,\n", 1055 | " 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,\n", 1056 | " 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,\n", 1057 | " 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,\n", 1058 | " 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,\n", 1059 | " 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,\n", 1060 | " 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,\n", 1061 | " 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,\n", 1062 | " 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165,\n", 1063 | " 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176,\n", 1064 | " 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187,\n", 1065 | " 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198,\n", 1066 | " 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,\n", 1067 | " 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,\n", 1068 | " 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231,\n", 1069 | " 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242,\n", 1070 | " 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,\n", 1071 | " 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264,\n", 1072 | " 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275,\n", 1073 | " 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286,\n", 1074 | " 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297,\n", 1075 | " 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308,\n", 1076 | " 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319,\n", 1077 | " 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330,\n", 1078 | " 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341,\n", 1079 | " 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352,\n", 1080 | " 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363,\n", 1081 | " 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,\n", 1082 | " 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385,\n", 1083 | " 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396,\n", 1084 | " 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407,\n", 1085 | " 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418,\n", 1086 | " 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429,\n", 1087 | " 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440,\n", 1088 | " 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451,\n", 1089 | " 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462,\n", 1090 | " 463, 464, 465, 466, 467, 468, 469, 472, 473, 474, 475,\n", 1091 | " 476, 477, 478, 479, 480, 481, 482, 483, 485, 486, 487,\n", 1092 | " 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498,\n", 1093 | " 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,\n", 1094 | " 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520,\n", 1095 | " 521, 522, 523, 525, 528, 530, 531, 532, 536, 537, 538,\n", 1096 | " 539, 540, 542, 543, 544, 546, 547, 549, 550, 551, 552,\n", 1097 | " 553, 554, 556, 557, 558, 559, 560, 566, 568, 569, 570,\n", 1098 | " 571, 572, 573, 574, 575, 576, 577, 578, 579, 582, 584,\n", 1099 | " 586, 587, 588, 589, 591, 594, 597, 598, 600, 601, 603,\n", 1100 | " 604, 607, 610, 611, 613, 615, 616, 620, 621, 622, 623,\n", 1101 | " 625, 630, 632, 634, 635, 642, 643, 645, 650, 651, 652,\n", 1102 | " 653, 655, 656, 663, 664, 667, 671, 673, 674, 676, 677,\n", 1103 | " 678, 684, 688, 691, 693, 696, 699, 700, 708, 714, 715,\n", 1104 | " 716, 718, 720, 722, 723, 727, 728, 733, 735, 736, 742,\n", 1105 | " 748, 749, 750, 751, 752, 762, 765, 768, 772, 774, 775,\n", 1106 | " 776, 777, 786, 787, 788, 790, 797, 799, 800, 801, 817,\n", 1107 | " 824, 828, 832, 837, 854, 857, 858, 860, 867, 874, 895,\n", 1108 | " 896, 901, 905, 913, 918, 929, 950, 953, 960, 979, 1002,\n", 1109 | " 1023, 1052, 1088, 1103, 1148, 1162, 1185, 1197, 1218, 1227, 1232,\n", 1110 | " 1234, 1264, 1330, 1364, 1373, 1495, 1499, 1586, 2564, 2733]),\n", 1111 | " array([105912, 106103, 92412, 69731, 13148, 8756, 3700, 1877,\n", 1112 | " 1493, 1745, 1480, 1163, 1050, 814, 731, 636,\n", 1113 | " 575, 542, 555, 603, 523, 479, 438, 449,\n", 1114 | " 474, 444, 450, 413, 420, 380, 375, 397,\n", 1115 | " 376, 351, 375, 360, 358, 362, 372, 382,\n", 1116 | " 349, 334, 363, 341, 360, 355, 376, 372,\n", 1117 | " 419, 572, 475, 373, 295, 303, 278, 298,\n", 1118 | " 306, 309, 289, 306, 333, 256, 266, 278,\n", 1119 | " 284, 239, 286, 250, 264, 250, 266, 280,\n", 1120 | " 250, 264, 243, 266, 377, 432, 335, 292,\n", 1121 | " 234, 244, 253, 237, 248, 250, 263, 269,\n", 1122 | " 293, 295, 288, 306, 357, 330, 294, 371,\n", 1123 | " 429, 490, 651, 1268, 237, 189, 140, 142,\n", 1124 | " 102, 116, 173, 118, 118, 108, 129, 101,\n", 1125 | " 101, 101, 106, 111, 109, 89, 85, 105,\n", 1126 | " 109, 99, 92, 90, 87, 78, 76, 82,\n", 1127 | " 87, 88, 88, 102, 86, 74, 90, 83,\n", 1128 | " 75, 86, 86, 76, 68, 73, 81, 75,\n", 1129 | " 66, 72, 76, 73, 79, 89, 91, 77,\n", 1130 | " 88, 63, 71, 77, 68, 83, 68, 53,\n", 1131 | " 57, 54, 59, 68, 62, 56, 79, 59,\n", 1132 | " 82, 81, 67, 54, 49, 63, 72, 71,\n", 1133 | " 52, 53, 64, 61, 64, 50, 46, 49,\n", 1134 | " 51, 54, 63, 52, 63, 47, 42, 48,\n", 1135 | " 43, 56, 51, 53, 62, 63, 77, 84,\n", 1136 | " 67, 53, 44, 39, 35, 50, 38, 42,\n", 1137 | " 30, 44, 35, 34, 48, 44, 43, 36,\n", 1138 | " 38, 33, 30, 39, 37, 33, 29, 24,\n", 1139 | " 31, 44, 49, 38, 40, 33, 34, 37,\n", 1140 | " 30, 26, 48, 28, 25, 31, 25, 27,\n", 1141 | " 17, 18, 27, 23, 26, 32, 21, 27,\n", 1142 | " 23, 29, 15, 34, 19, 28, 31, 18,\n", 1143 | " 25, 35, 21, 21, 26, 20, 21, 22,\n", 1144 | " 27, 30, 21, 22, 31, 17, 28, 17,\n", 1145 | " 25, 19, 23, 20, 10, 19, 11, 14,\n", 1146 | " 20, 21, 18, 29, 10, 21, 13, 16,\n", 1147 | " 20, 22, 16, 20, 18, 12, 21, 18,\n", 1148 | " 17, 28, 9, 16, 22, 15, 20, 16,\n", 1149 | " 15, 11, 18, 18, 6, 18, 13, 15,\n", 1150 | " 22, 7, 16, 8, 14, 11, 10, 27,\n", 1151 | " 12, 21, 23, 8, 13, 20, 14, 13,\n", 1152 | " 9, 11, 19, 8, 14, 12, 17, 31,\n", 1153 | " 23, 13, 21, 14, 14, 14, 19, 11,\n", 1154 | " 11, 12, 11, 13, 10, 4, 6, 11,\n", 1155 | " 6, 10, 10, 3, 8, 10, 6, 8,\n", 1156 | " 9, 9, 4, 7, 7, 4, 7, 6,\n", 1157 | " 4, 6, 1, 5, 6, 3, 7, 5,\n", 1158 | " 14, 10, 7, 4, 6, 4, 7, 6,\n", 1159 | " 7, 8, 6, 9, 5, 3, 4, 2,\n", 1160 | " 5, 12, 8, 6, 14, 7, 1, 9,\n", 1161 | " 1, 4, 4, 4, 2, 3, 3, 6,\n", 1162 | " 3, 7, 4, 6, 7, 6, 2, 6,\n", 1163 | " 6, 6, 5, 3, 2, 4, 4, 5,\n", 1164 | " 4, 6, 3, 6, 4, 1, 2, 7,\n", 1165 | " 8, 5, 5, 6, 6, 2, 4, 1,\n", 1166 | " 5, 4, 4, 1, 3, 2, 2, 3,\n", 1167 | " 1, 2, 4, 7, 2, 2, 5, 3,\n", 1168 | " 2, 3, 4, 2, 5, 3, 2, 3,\n", 1169 | " 3, 5, 2, 1, 1, 4, 1, 2,\n", 1170 | " 2, 3, 1, 3, 2, 1, 5, 3,\n", 1171 | " 2, 3, 1, 2, 3, 2, 2, 2,\n", 1172 | " 3, 2, 1, 1, 2, 1, 4, 2,\n", 1173 | " 4, 1, 5, 3, 1, 1, 2, 3,\n", 1174 | " 1, 4, 3, 1, 1, 3, 4, 1,\n", 1175 | " 1, 1, 2, 1, 2, 1, 1, 1,\n", 1176 | " 1, 3, 2, 1, 3, 2, 1, 2,\n", 1177 | " 2, 2, 1, 1, 2, 2, 1, 2,\n", 1178 | " 1, 1, 1, 1, 3, 4, 1, 2,\n", 1179 | " 1, 1, 3, 2, 3, 1, 2, 1,\n", 1180 | " 1, 1, 3, 1, 2, 2, 1, 1,\n", 1181 | " 1, 1, 2, 1, 1, 3, 1, 1,\n", 1182 | " 2, 3, 3, 1, 1, 1, 3, 1,\n", 1183 | " 1, 1, 1, 2, 1, 1, 2, 1,\n", 1184 | " 1, 1, 1, 2, 3, 1, 1, 3,\n", 1185 | " 2, 1, 1, 1, 2, 1, 1, 1,\n", 1186 | " 1, 1, 1, 1, 1, 2, 2, 1,\n", 1187 | " 2, 2, 1, 1, 1, 1, 1, 2,\n", 1188 | " 1, 1, 1, 1, 1, 1, 2, 1,\n", 1189 | " 2, 1, 1, 1, 1, 1, 1, 1,\n", 1190 | " 1, 1, 1, 1, 1, 1, 1, 1,\n", 1191 | " 1, 1, 1, 1, 1, 1, 1, 1,\n", 1192 | " 1, 1, 1, 1, 1, 1, 1, 2,\n", 1193 | " 1, 1, 1, 1, 1, 1, 1, 1,\n", 1194 | " 2, 1, 1, 1, 1, 1, 1, 1,\n", 1195 | " 1, 1, 1, 1, 1, 1, 1, 1,\n", 1196 | " 1, 1, 1, 1, 1, 1, 1, 1,\n", 1197 | " 1, 1, 1, 1], dtype=int64))" 1198 | ] 1199 | }, 1200 | "execution_count": 21, 1201 | "metadata": {}, 1202 | "output_type": "execute_result" 1203 | } 1204 | ], 1205 | "source": [ 1206 | "value_counts = np.unique(np.round(df_bid_up['bid_change_n'].values).astype(int), return_counts=True)\n", 1207 | "value_counts" 1208 | ] 1209 | }, 1210 | { 1211 | "cell_type": "code", 1212 | "execution_count": null, 1213 | "metadata": {}, 1214 | "outputs": [], 1215 | "source": [] 1216 | } 1217 | ], 1218 | "metadata": { 1219 | "kernelspec": { 1220 | "display_name": "Python 3", 1221 | "language": "python", 1222 | "name": "python3" 1223 | }, 1224 | "language_info": { 1225 | "codemirror_mode": { 1226 | "name": "ipython", 1227 | "version": 3 1228 | }, 1229 | "file_extension": ".py", 1230 | "mimetype": "text/x-python", 1231 | "name": "python", 1232 | "nbconvert_exporter": "python", 1233 | "pygments_lexer": "ipython3", 1234 | "version": "3.7.5" 1235 | } 1236 | }, 1237 | "nbformat": 4, 1238 | "nbformat_minor": 2 1239 | } 1240 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Globe Research 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning for Digital Asset Limit Order Books 2 | 3 | This paper shows that temporal CNNs accurately predict bitcoin spot price movements from limit order book data. On a 2 second prediction time horizon we achieve 76% walk-forward accuracy on the popular cryptocurrency exchange coinbase. Our model can be trained in less than a day on commodity GPUs which could be installed into colocation centers allowing for model sync with existing faster orderbook prediction models. 4 | 5 | See paper at https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3704098 6 | -------------------------------------------------------------------------------- /WalkForwardV2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import gc\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from tqdm import tqdm\n", 13 | "from google.cloud import storage\n", 14 | "from io import StringIO\n", 15 | "%matplotlib inline\n", 16 | "import seaborn as sns\n", 17 | "import matplotlib.pyplot as plt" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "client = storage.Client()\n", 27 | "bucket = client.get_bucket('bucket_l2_snapshot')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "blob = bucket.blob('l2_snapshot_100ms.csv')\n", 37 | "bt = blob.download_as_string()\n", 38 | "s = str(bt, 'utf-8')\n", 39 | "s = StringIO(s)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "df = pd.read_csv(s, index_col=0, infer_datetime_format=True)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 5, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 73 | "\n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | "
b1b2b3b4b5b6b7b8b9b10...aq1aq2aq3aq4aq5aq6aq7aq8aq9aq10
timestamp
2019-11-12 00:00:00.0008721.538720.598719.558719.508719.08718.028717.878717.858717.568716.06...5.8022042.8590.121890.1425750.0026920.460.0384680.2000000.0018901.00000
2019-11-12 00:00:00.1008721.538720.598719.568719.558719.08718.028717.878717.858717.568716.06...5.8022042.8590.121890.1425750.0026920.460.0384680.2000000.0018901.00000
2019-11-12 00:00:00.2008721.538720.598719.618719.568719.08718.028717.878717.858716.068716.00...5.8022042.8590.121890.1425750.0026920.460.0384680.2000001.0860160.00189
2019-11-12 00:00:00.3008721.538720.598719.618719.568719.08718.028718.008717.878717.858716.60...7.8022042.8590.121890.1425750.0026920.460.0384680.2000001.0860160.00189
2019-11-12 00:00:00.4008721.538720.598719.618719.568719.08718.998718.028718.008717.878717.85...7.8022042.8590.121890.1425750.0026920.460.2000000.0384681.0860160.00189
\n", 247 | "

5 rows × 40 columns

\n", 248 | "
" 249 | ], 250 | "text/plain": [ 251 | " b1 b2 b3 b4 b5 b6 \\\n", 252 | "timestamp \n", 253 | "2019-11-12 00:00:00.000 8721.53 8720.59 8719.55 8719.50 8719.0 8718.02 \n", 254 | "2019-11-12 00:00:00.100 8721.53 8720.59 8719.56 8719.55 8719.0 8718.02 \n", 255 | "2019-11-12 00:00:00.200 8721.53 8720.59 8719.61 8719.56 8719.0 8718.02 \n", 256 | "2019-11-12 00:00:00.300 8721.53 8720.59 8719.61 8719.56 8719.0 8718.02 \n", 257 | "2019-11-12 00:00:00.400 8721.53 8720.59 8719.61 8719.56 8719.0 8718.99 \n", 258 | "\n", 259 | " b7 b8 b9 b10 ... aq1 \\\n", 260 | "timestamp ... \n", 261 | "2019-11-12 00:00:00.000 8717.87 8717.85 8717.56 8716.06 ... 5.802204 \n", 262 | "2019-11-12 00:00:00.100 8717.87 8717.85 8717.56 8716.06 ... 5.802204 \n", 263 | "2019-11-12 00:00:00.200 8717.87 8717.85 8716.06 8716.00 ... 5.802204 \n", 264 | "2019-11-12 00:00:00.300 8718.00 8717.87 8717.85 8716.60 ... 7.802204 \n", 265 | "2019-11-12 00:00:00.400 8718.02 8718.00 8717.87 8717.85 ... 7.802204 \n", 266 | "\n", 267 | " aq2 aq3 aq4 aq5 aq6 aq7 \\\n", 268 | "timestamp \n", 269 | "2019-11-12 00:00:00.000 2.859 0.12189 0.142575 0.002692 0.46 0.038468 \n", 270 | "2019-11-12 00:00:00.100 2.859 0.12189 0.142575 0.002692 0.46 0.038468 \n", 271 | "2019-11-12 00:00:00.200 2.859 0.12189 0.142575 0.002692 0.46 0.038468 \n", 272 | "2019-11-12 00:00:00.300 2.859 0.12189 0.142575 0.002692 0.46 0.038468 \n", 273 | "2019-11-12 00:00:00.400 2.859 0.12189 0.142575 0.002692 0.46 0.200000 \n", 274 | "\n", 275 | " aq8 aq9 aq10 \n", 276 | "timestamp \n", 277 | "2019-11-12 00:00:00.000 0.200000 0.001890 1.00000 \n", 278 | "2019-11-12 00:00:00.100 0.200000 0.001890 1.00000 \n", 279 | "2019-11-12 00:00:00.200 0.200000 1.086016 0.00189 \n", 280 | "2019-11-12 00:00:00.300 0.200000 1.086016 0.00189 \n", 281 | "2019-11-12 00:00:00.400 0.038468 1.086016 0.00189 \n", 282 | "\n", 283 | "[5 rows x 40 columns]" 284 | ] 285 | }, 286 | "execution_count": 5, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "df.head()" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 6, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "df.index = pd.to_datetime(df.index)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 7, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "def concat_data(base_path, features):\n", 311 | " \"\"\"Concatenate all the files in basepath keeping only the\n", 312 | " columns specified by features.\n", 313 | " \"\"\"\n", 314 | " l2_snapshot = pd.DataFrame()\n", 315 | " for i, x in enumerate(tqdm(sorted(os.listdir(base_path)))):\n", 316 | " if base_path is None:\n", 317 | " path = x\n", 318 | " else:\n", 319 | " path = base_path + x\n", 320 | " df_hour = pd.read_parquet(path, columns=features)\n", 321 | " l2_snapshot = pd.concat([l2_snapshot, df_hour.dropna()])\n", 322 | " \n", 323 | " return l2_snapshot\n", 324 | "\n", 325 | "def extend_matrix(A, n):\n", 326 | " \"\"\"Extend a matrix A by duplicating rows as specified by the list n.\"\"\"\n", 327 | " n = n[1:] # Do not duplicate rows for the first day\n", 328 | " A = A[:-1] # Do not duplicate the last day's row\n", 329 | " A = np.repeat(A, repeats=n, axis=0)\n", 330 | " return A\n", 331 | "\n", 332 | "def normalise_data_per_day(df):\n", 333 | " df_mean = df.resample('D').mean()\n", 334 | " df_var = df.resample('D').var()\n", 335 | " \n", 336 | " timestamps_per_day = np.unique(df.index.date, return_counts=True)[1]\n", 337 | " mean_array = extend_matrix(df_mean.to_numpy(), timestamps_per_day)\n", 338 | " var_array = extend_matrix(df_var.to_numpy(), timestamps_per_day)\n", 339 | " \n", 340 | " # Drop the rows of the first day\n", 341 | " df = df[df.index.date != df.index[0].date()]\n", 342 | " \n", 343 | " df = (df - mean_array) / np.sqrt(var_array)\n", 344 | "\n", 345 | " return df\n", 346 | "\n", 347 | "def balance_classes(y):\n", 348 | " unique = np.unique(y, return_counts=True)\n", 349 | "\n", 350 | " # Take smallest number as class size\n", 351 | " class_size = np.min(unique[1])\n", 352 | " class_size_index = np.argmin(unique[1])\n", 353 | " timestamps = np.array([], dtype=int)\n", 354 | " for i, category in enumerate(unique[0]):\n", 355 | " if i == class_size_index:\n", 356 | " continue\n", 357 | " index = np.argwhere(y==category)\n", 358 | " index = index.reshape(len(index))\n", 359 | " random_timestamps = np.random.choice(index, (unique[1][i] - class_size), replace=False)\n", 360 | " timestamps = np.concatenate((timestamps, random_timestamps), axis=None)\n", 361 | " \n", 362 | " return timestamps\n", 363 | "\n", 364 | "def generate_y(df_snapshot, T=100, D=40, best_ask='a1', best_bid='b1', k=20, alpha=10e-5):\n", 365 | " \"\"\"Return X, y from the snapshot dataframe and the best ask/bid columns.\"\"\"\n", 366 | " df = pd.DataFrame()\n", 367 | " df['mid_price'] = (df_snapshot[best_ask].to_numpy()+df_snapshot[best_bid].to_numpy())/2\n", 368 | "\n", 369 | " # Create columns delayed by -k to k-1\n", 370 | " for i in range(-k, k):\n", 371 | " df[i] = df['mid_price'].shift(periods=i)\n", 372 | "\n", 373 | " # Drop first k-1 rows and last k rows\n", 374 | " df.drop(range(0,20), axis=0, inplace=True)\n", 375 | " df.drop(range(len(df_snapshot)-20,len(df_snapshot)), axis=0, inplace=True)\n", 376 | " \n", 377 | " # Compute mean of previous k and next k\n", 378 | " df['m_b'] = df[range(0,20)].mean(axis=1)\n", 379 | " df['m_a'] = df[range(-20,0)].mean(axis=1)\n", 380 | " \n", 381 | " # Compute label of increasing or decreasing\n", 382 | " y_increase = np.where(df['m_b'] > df['m_a'] * (1+alpha), 1, 0)\n", 383 | " y_decrease = np.where(df['m_b'] < df['m_a'] * (1-alpha), -1, 0)\n", 384 | " y = y_increase + y_decrease\n", 385 | "\n", 386 | " # 100 most recent limit orders used so ignore first 100 timesteps\n", 387 | " y = y[T:]\n", 388 | " y += 1\n", 389 | "\n", 390 | " return y\n", 391 | "\n", 392 | "def generate_preX(df_snapshot):\n", 393 | " # First and last 20 can't create labels for as previous and next k=20 needed\n", 394 | " preX = df_snapshot.to_numpy()[20:-20]\n", 395 | "\n", 396 | " return preX\n", 397 | "\n", 398 | "def generate_X(preX, T=100, D=40):\n", 399 | " # For each timestep create matrix of 100 most recent limit orders\n", 400 | " X = np.array([preX[t:t+T] for t in range(len(preX)-T)], dtype='float32')\n", 401 | " \n", 402 | " return X" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 8, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "l2_norm = normalise_data_per_day(df)" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 9, 417 | "metadata": {}, 418 | "outputs": [ 419 | { 420 | "data": { 421 | "text/html": [ 422 | "
\n", 423 | "\n", 436 | "\n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | "
b1b2b3b4b5b6b7b8b9b10...aq1aq2aq3aq4aq5aq6aq7aq8aq9aq10
timestamp
2019-11-13 00:00:00.0002.0696692.0662212.0493592.0321642.0457552.0561322.0621442.0728412.0644722.076723...-0.630327-0.585376-0.603213-0.584939-0.568453-0.547841-0.503235-0.459792-0.545609-0.581027
2019-11-13 00:00:00.1002.0965532.0990172.0990952.0932162.0766952.0601692.0739922.0844262.0906212.101545...-0.630484-0.585376-0.603213-0.584939-0.568453-0.547841-0.559809-0.556070-0.552547-0.576830
2019-11-13 00:00:00.2002.0965532.0990172.0990952.0932162.0766952.0601692.0739922.0844262.0906212.101545...-0.630484-0.585376-0.603213-0.584939-0.568453-0.547841-0.559809-0.556070-0.552547-0.576830
2019-11-13 00:00:00.3002.0965532.0855762.0966762.0628242.0460242.0599002.0702222.0763442.0871172.078882...-0.630484-0.585376-0.603213-0.584939-0.568453-0.547841-0.559809-0.556070-0.552547-0.576830
2019-11-13 00:00:00.4002.0965532.0855762.0966762.0628242.0460242.0599002.0702222.0763442.0644722.076723...-0.630484-0.585376-0.603213-0.584939-0.568453-0.547841-0.559809-0.556070-0.552547-0.576830
\n", 610 | "

5 rows × 40 columns

\n", 611 | "
" 612 | ], 613 | "text/plain": [ 614 | " b1 b2 b3 b4 b5 \\\n", 615 | "timestamp \n", 616 | "2019-11-13 00:00:00.000 2.069669 2.066221 2.049359 2.032164 2.045755 \n", 617 | "2019-11-13 00:00:00.100 2.096553 2.099017 2.099095 2.093216 2.076695 \n", 618 | "2019-11-13 00:00:00.200 2.096553 2.099017 2.099095 2.093216 2.076695 \n", 619 | "2019-11-13 00:00:00.300 2.096553 2.085576 2.096676 2.062824 2.046024 \n", 620 | "2019-11-13 00:00:00.400 2.096553 2.085576 2.096676 2.062824 2.046024 \n", 621 | "\n", 622 | " b6 b7 b8 b9 b10 \\\n", 623 | "timestamp \n", 624 | "2019-11-13 00:00:00.000 2.056132 2.062144 2.072841 2.064472 2.076723 \n", 625 | "2019-11-13 00:00:00.100 2.060169 2.073992 2.084426 2.090621 2.101545 \n", 626 | "2019-11-13 00:00:00.200 2.060169 2.073992 2.084426 2.090621 2.101545 \n", 627 | "2019-11-13 00:00:00.300 2.059900 2.070222 2.076344 2.087117 2.078882 \n", 628 | "2019-11-13 00:00:00.400 2.059900 2.070222 2.076344 2.064472 2.076723 \n", 629 | "\n", 630 | " ... aq1 aq2 aq3 aq4 \\\n", 631 | "timestamp ... \n", 632 | "2019-11-13 00:00:00.000 ... -0.630327 -0.585376 -0.603213 -0.584939 \n", 633 | "2019-11-13 00:00:00.100 ... -0.630484 -0.585376 -0.603213 -0.584939 \n", 634 | "2019-11-13 00:00:00.200 ... -0.630484 -0.585376 -0.603213 -0.584939 \n", 635 | "2019-11-13 00:00:00.300 ... -0.630484 -0.585376 -0.603213 -0.584939 \n", 636 | "2019-11-13 00:00:00.400 ... -0.630484 -0.585376 -0.603213 -0.584939 \n", 637 | "\n", 638 | " aq5 aq6 aq7 aq8 aq9 \\\n", 639 | "timestamp \n", 640 | "2019-11-13 00:00:00.000 -0.568453 -0.547841 -0.503235 -0.459792 -0.545609 \n", 641 | "2019-11-13 00:00:00.100 -0.568453 -0.547841 -0.559809 -0.556070 -0.552547 \n", 642 | "2019-11-13 00:00:00.200 -0.568453 -0.547841 -0.559809 -0.556070 -0.552547 \n", 643 | "2019-11-13 00:00:00.300 -0.568453 -0.547841 -0.559809 -0.556070 -0.552547 \n", 644 | "2019-11-13 00:00:00.400 -0.568453 -0.547841 -0.559809 -0.556070 -0.552547 \n", 645 | "\n", 646 | " aq10 \n", 647 | "timestamp \n", 648 | "2019-11-13 00:00:00.000 -0.581027 \n", 649 | "2019-11-13 00:00:00.100 -0.576830 \n", 650 | "2019-11-13 00:00:00.200 -0.576830 \n", 651 | "2019-11-13 00:00:00.300 -0.576830 \n", 652 | "2019-11-13 00:00:00.400 -0.576830 \n", 653 | "\n", 654 | "[5 rows x 40 columns]" 655 | ] 656 | }, 657 | "execution_count": 9, 658 | "metadata": {}, 659 | "output_type": "execute_result" 660 | } 661 | ], 662 | "source": [ 663 | "l2_norm.head()" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": 10, 669 | "metadata": {}, 670 | "outputs": [ 671 | { 672 | "name": "stdout", 673 | "output_type": "stream", 674 | "text": [ 675 | "preX Shape: (4220032, 40)\n", 676 | "X shape, y shape: (4219932, 100, 40) (4219932,)\n", 677 | "Unique y's: [0 1 2]\n" 678 | ] 679 | } 680 | ], 681 | "source": [ 682 | "T = 100\n", 683 | "D = 40\n", 684 | "y = generate_y(l2_norm, T=T, D=D, best_ask='a1', best_bid = 'b1', alpha=0.002)\n", 685 | "preX = generate_preX(l2_norm)\n", 686 | "print(\"preX Shape: \", preX.shape)\n", 687 | "X = generate_X(preX)\n", 688 | "print(\"X shape, y shape: \", X.shape, y.shape)\n", 689 | "print(\"Unique y's: \", np.unique(y))\n", 690 | "\n", 691 | "del preX\n", 692 | "gc.collect()\n", 693 | "\n", 694 | "# First and last 20 are removed to create labels and then last 100 as previous 100 is required for input matrix\n", 695 | "X_index = l2_norm[20:-120].index\n", 696 | "\n", 697 | "# Number of data points for the last day\n", 698 | "num_test = np.unique(X_index.day, return_counts=True)[1][-1]\n", 699 | "\n", 700 | "# Split the data into the first seven days and the last day\n", 701 | "X_train_val = X[:-num_test]\n", 702 | "y_train_val = y[:-num_test]\n", 703 | "X_test = X[-num_test:]\n", 704 | "y_test = y[-num_test:]" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 11, 710 | "metadata": {}, 711 | "outputs": [], 712 | "source": [ 713 | "import tensorflow as tf\n", 714 | "from tensorflow.keras.layers import Dense\n", 715 | "from tensorflow.keras import Input, Model\n", 716 | "from tcn import TCN, tcn_full_summary" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 12, 722 | "metadata": {}, 723 | "outputs": [], 724 | "source": [ 725 | "from tensorflow import keras" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": 13, 731 | "metadata": {}, 732 | "outputs": [ 733 | { 734 | "name": "stdout", 735 | "output_type": "stream", 736 | "text": [ 737 | "INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')\n", 738 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 739 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 740 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 741 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 742 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 743 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 744 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 745 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 746 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n", 747 | "INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).\n" 748 | ] 749 | } 750 | ], 751 | "source": [ 752 | "strategy = tf.distribute.MirroredStrategy()\n", 753 | "with strategy.scope():\n", 754 | " i = Input(batch_shape=(None, 100, 40))\n", 755 | " o = TCN(return_sequences=False, use_skip_connections=True, dropout_rate=0.4, dilations=[1, 2, 4, 8, 16, 32, 64], use_batch_norm=True)(i)\n", 756 | " o = Dense(3, activation='softmax')(o)\n", 757 | " m = Model(inputs=[i], outputs=[o])\n", 758 | " opt = keras.optimizers.Adam(learning_rate=0.01)\n", 759 | " m.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])" 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": 14, 765 | "metadata": {}, 766 | "outputs": [], 767 | "source": [ 768 | "from sklearn.utils.class_weight import compute_class_weight\n", 769 | "class_weights = compute_class_weight('balanced', np.unique(y_train_val), y_train_val)\n", 770 | "d_class_weights = dict(enumerate(class_weights))" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "execution_count": 15, 776 | "metadata": {}, 777 | "outputs": [ 778 | { 779 | "name": "stdout", 780 | "output_type": "stream", 781 | "text": [ 782 | "WARNING:tensorflow:sample_weight modes were coerced from\n", 783 | " ...\n", 784 | " to \n", 785 | " ['...']\n", 786 | "WARNING:tensorflow:sample_weight modes were coerced from\n", 787 | " ...\n", 788 | " to \n", 789 | " ['...']\n", 790 | "Train on 3754532 samples, validate on 465400 samples\n", 791 | "Epoch 1/1000\n", 792 | "INFO:tensorflow:batch_all_reduce: 60 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10\n", 793 | "INFO:tensorflow:batch_all_reduce: 60 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10\n", 794 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.7631 - accuracy: 0.6850\n", 795 | "Epoch 00001: val_loss improved from inf to 0.66100, saving model to model_resnet_walkforward_tcn_actual.h5\n", 796 | "3754532/3754532 [==============================] - 934s 249us/sample - loss: 0.7631 - accuracy: 0.6850 - val_loss: 0.6610 - val_accuracy: 0.6530\n", 797 | "Epoch 2/1000\n", 798 | "3754368/3754532 [============================>.] - ETA: 0s - loss: 0.7186 - accuracy: 0.7191\n", 799 | "Epoch 00002: val_loss improved from 0.66100 to 0.65249, saving model to model_resnet_walkforward_tcn_actual.h5\n", 800 | "3754532/3754532 [==============================] - 831s 221us/sample - loss: 0.7186 - accuracy: 0.7191 - val_loss: 0.6525 - val_accuracy: 0.6863\n", 801 | "Epoch 3/1000\n", 802 | "3754368/3754532 [============================>.] - ETA: 0s - loss: 0.7061 - accuracy: 0.7253\n", 803 | "Epoch 00003: val_loss improved from 0.65249 to 0.64854, saving model to model_resnet_walkforward_tcn_actual.h5\n", 804 | "3754532/3754532 [==============================] - 831s 221us/sample - loss: 0.7061 - accuracy: 0.7253 - val_loss: 0.6485 - val_accuracy: 0.7433\n", 805 | "Epoch 4/1000\n", 806 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6982 - accuracy: 0.7281\n", 807 | "Epoch 00004: val_loss did not improve from 0.64854\n", 808 | "3754532/3754532 [==============================] - 829s 221us/sample - loss: 0.6982 - accuracy: 0.7281 - val_loss: 0.6519 - val_accuracy: 0.6770\n", 809 | "Epoch 5/1000\n", 810 | "3754368/3754532 [============================>.] - ETA: 0s - loss: 0.6922 - accuracy: 0.7299\n", 811 | "Epoch 00005: val_loss improved from 0.64854 to 0.64811, saving model to model_resnet_walkforward_tcn_actual.h5\n", 812 | "3754532/3754532 [==============================] - 829s 221us/sample - loss: 0.6922 - accuracy: 0.7299 - val_loss: 0.6481 - val_accuracy: 0.6421\n", 813 | "Epoch 6/1000\n", 814 | "3754368/3754532 [============================>.] - ETA: 0s - loss: 0.6876 - accuracy: 0.7312\n", 815 | "Epoch 00006: val_loss improved from 0.64811 to 0.64290, saving model to model_resnet_walkforward_tcn_actual.h5\n", 816 | "3754532/3754532 [==============================] - 830s 221us/sample - loss: 0.6876 - accuracy: 0.7312 - val_loss: 0.6429 - val_accuracy: 0.6118\n", 817 | "Epoch 7/1000\n", 818 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6840 - accuracy: 0.7317\n", 819 | "Epoch 00007: val_loss did not improve from 0.64290\n", 820 | "3754532/3754532 [==============================] - 831s 221us/sample - loss: 0.6840 - accuracy: 0.7317 - val_loss: 0.6484 - val_accuracy: 0.7022\n", 821 | "Epoch 8/1000\n", 822 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6812 - accuracy: 0.7331\n", 823 | "Epoch 00008: val_loss improved from 0.64290 to 0.63999, saving model to model_resnet_walkforward_tcn_actual.h5\n", 824 | "3754532/3754532 [==============================] - 831s 221us/sample - loss: 0.6812 - accuracy: 0.7331 - val_loss: 0.6400 - val_accuracy: 0.6446\n", 825 | "Epoch 9/1000\n", 826 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6784 - accuracy: 0.7341\n", 827 | "Epoch 00009: val_loss improved from 0.63999 to 0.63913, saving model to model_resnet_walkforward_tcn_actual.h5\n", 828 | "3754532/3754532 [==============================] - 832s 222us/sample - loss: 0.6784 - accuracy: 0.7341 - val_loss: 0.6391 - val_accuracy: 0.6525\n", 829 | "Epoch 10/1000\n", 830 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6759 - accuracy: 0.7348\n", 831 | "Epoch 00010: val_loss improved from 0.63913 to 0.63433, saving model to model_resnet_walkforward_tcn_actual.h5\n", 832 | "3754532/3754532 [==============================] - 830s 221us/sample - loss: 0.6759 - accuracy: 0.7348 - val_loss: 0.6343 - val_accuracy: 0.7053\n", 833 | "Epoch 11/1000\n", 834 | "3754368/3754532 [============================>.] - ETA: 0s - loss: 0.6736 - accuracy: 0.7358\n", 835 | "Epoch 00011: val_loss did not improve from 0.63433\n", 836 | "3754532/3754532 [==============================] - 830s 221us/sample - loss: 0.6737 - accuracy: 0.7358 - val_loss: 0.6431 - val_accuracy: 0.6656\n", 837 | "Epoch 12/1000\n", 838 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6717 - accuracy: 0.7367\n", 839 | "Epoch 00012: val_loss improved from 0.63433 to 0.62916, saving model to model_resnet_walkforward_tcn_actual.h5\n", 840 | "3754532/3754532 [==============================] - 831s 221us/sample - loss: 0.6717 - accuracy: 0.7367 - val_loss: 0.6292 - val_accuracy: 0.7084\n", 841 | "Epoch 13/1000\n", 842 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6704 - accuracy: 0.7368\n", 843 | "Epoch 00013: val_loss did not improve from 0.62916\n", 844 | "3754532/3754532 [==============================] - 831s 221us/sample - loss: 0.6704 - accuracy: 0.7368 - val_loss: 0.6409 - val_accuracy: 0.7432\n", 845 | "Epoch 14/1000\n", 846 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6688 - accuracy: 0.7373\n", 847 | "Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.\n", 848 | "\n", 849 | "Epoch 00014: val_loss did not improve from 0.62916\n", 850 | "3754532/3754532 [==============================] - 832s 222us/sample - loss: 0.6688 - accuracy: 0.7373 - val_loss: 0.6355 - val_accuracy: 0.6914\n", 851 | "Epoch 15/1000\n", 852 | "3754240/3754532 [============================>.] - ETA: 0s - loss: 0.6536 - accuracy: 0.7419\n", 853 | "Epoch 00015: val_loss did not improve from 0.62916\n", 854 | "3754532/3754532 [==============================] - 832s 221us/sample - loss: 0.6536 - accuracy: 0.7419 - val_loss: 0.6311 - val_accuracy: 0.6917\n", 855 | "Epoch 16/1000\n", 856 | "3754368/3754532 [============================>.] - ETA: 0s - loss: 0.6521 - accuracy: 0.7426\n", 857 | "Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.\n", 858 | "\n", 859 | "Epoch 00016: val_loss did not improve from 0.62916\n", 860 | "3754532/3754532 [==============================] - 829s 221us/sample - loss: 0.6521 - accuracy: 0.7426 - val_loss: 0.6410 - val_accuracy: 0.7048\n" 861 | ] 862 | } 863 | ], 864 | "source": [ 865 | "EPOCHS = 1000\n", 866 | "model_filename = \"model_resnet_walkforward_tcn_actual.h5\"\n", 867 | "hist_filename = \"hist_model_walkforward_tcn_actual.csv\"\n", 868 | "early_stop = keras.callbacks.EarlyStopping(\n", 869 | " monitor='val_loss',\n", 870 | " patience=4)\n", 871 | "model_save_checkpoint = keras.callbacks.ModelCheckpoint(\n", 872 | " filepath= model_filename,\n", 873 | " save_best_only=True,\n", 874 | " monitor='val_loss',\n", 875 | " verbose=1)\n", 876 | "reduce_lr = keras.callbacks.ReduceLROnPlateau(\n", 877 | " monitor='val_loss',\n", 878 | " factor=0.5,\n", 879 | " patience=2,\n", 880 | " verbose=1,\n", 881 | " min_lr=0.0001)\n", 882 | "callbacks = [reduce_lr, model_save_checkpoint, early_stop]\n", 883 | "history = m.fit(\n", 884 | " X_train_val, y_train_val,\n", 885 | " epochs=EPOCHS,\n", 886 | " batch_size=128,\n", 887 | " callbacks=callbacks,\n", 888 | " class_weight = d_class_weights,\n", 889 | " validation_data = (X_test, y_test),\n", 890 | ")\n" 891 | ] 892 | }, 893 | { 894 | "cell_type": "code", 895 | "execution_count": 16, 896 | "metadata": {}, 897 | "outputs": [], 898 | "source": [ 899 | "y_pred = m.predict(X_test).argmax(axis=1)" 900 | ] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "execution_count": 17, 905 | "metadata": {}, 906 | "outputs": [ 907 | { 908 | "data": { 909 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAAD8CAYAAABAWd66AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd8VGXaxvHfnYKA0klCVTpSXFzBAqxSFEVRWCuW3VVXxd5YVlEUFde1rLrqWrF3VvRdREERFRALQlCRJiwCSihJKAKulGRyv3/MEJNQMoEkc2Zyff2cj3POec6c+4x48cxzypi7IyIisZcU6wJERCRMgSwiEhAKZBGRgFAgi4gEhAJZRCQgFMgiIgGhQBYRCQgFsohIQCiQRUQCIqWid9Duhvd1K2AFGzm4c6xLSHh9W6fHuoQqoUndarav71Hjt1dFnTlbvn50n/dXntRDFhEJiArvIYuIVCqL336mAllEEktScqwr2GsKZBFJLBaoYeEyUSCLSGLRkIWISECohywiEhDqIYuIBIR6yCIiAaGrLEREAkJDFiIiAaEhCxGRgFAPWUQkIBTIIiIBkayTeiIiwaAxZBGRgNCQhYhIQKiHLCISEOohi4gEhHrIIiIBoVunRUQCQkMWIiIBoSELEZGAUA9ZRCQgFMgiIgGhk3oiIgGhMWQRkYDQkIWISECohywiEgymQBYRCQYFsohIQFiSAjkuHN2uISMGdSDZYOzMLEZPXVZs/aldm3LjgPZkb9oKwCuf/8jYmVkALLznBBav2QzAqp+2cvkLX1Vu8XFkyZyZTHrpMbyggN/2OYmeA88ptn72h+8wa/LbJCUlUW2/Ggy4+HrSmrXgl80befPhO1j1/SK6HHMCJ154TYyOIPhmfvEpjz54L6GCEAMGnsa5519cbP2crzN57J/38f2SxYy88z56HXs8AGtWr2LkjddRUFBAfn4+p511LgNPOysWh1Bh1EOOA0kGt53akQufnsWajVt56+rufLQgh+9z/les3cQ5qxn19sKdtt+aF2LQQ59XVrlxq6AgxPvPP8J5N91H7QZpPHPLFbQ7rDtpzVoUtuncoy9djzsFgEWzP2fyK09y7vB7SEmtRu8zLiQ3azk5K5btZg8SCoV4+B938Y9/jSYtvRGXXXA2PY7uQ4tWrQvbZGQ05sZb7+Tfr75YbNsGDdN49JlXqFatGlt++YULzz2VHkf3pmFaemUfRoWJ50CO3+tDyug3zevyw9pfWLF+C3khZ8KcNRzXKSPWZSWcVUu+o15GU+plNCE5JZVO3fuwaHbxv8j2q7l/4eu8bVsh8v9Pteo1OPDgQ0hJTa3MkuPOdwvm0qTZgTRp2pzU1FT69juRzz6ZUqxNoyZNad22PUklvr6npqZSrVo1ALbnbccLCiqt7spiZlFPQRN1D9nMegAtim7j7i9VQE0VIqPOfqzZuKVwfs3GrXRpXmendscfkkG3VvVZnvs//v7Od6zZGB6+2C8libeu6U4o5IyeupQP5+dUWu3xZNOGtdRukFY4X7t+GiuX7PyNY9YH4/hy4puE8vP5w4j7K7PEuLc2J4f0jEaF82npGSyc/23U2+dkr+GmoVewcsUKLr16aEL1joHCv+DjUVSBbGYvA62Bb4BQZLEDcRPI0ZiyMId3v1lFXsgZfGRz7h18COePngVAn7unkb1pG83r1+DFIUewaPVmVqzfUso7yu4cfvzvOfz43zP3s4/4dNwrDLp8eKxLqjLSMxrx7Kv/x9rcHG694Vp69e1H/QYNY11WuQlizzda0Q5ZdAN6uvsV7n51ZNrtGRczG2JmmWaWuXHOxPKpdB9lb9xGozo1Cucb1alO9qZtxdr89EseeSEHYOzMFXRuWvvX7SNtV6zfwsyl6+lYZJ38qna9hmxal1s4v2l9LrXq7/5/9s7d+7AoU2PzZdEwPZ2c7DWF87k52TRMK/vwW8O0dFq0asPcbxLrBHVSUlLUU9BEW9E8oFGprSLcfbS7d3P3bnW6nLR3lZWzuVkbadGwJs3q1SA12RjQpREfLSg+7JBWa7/C18d2TC884Ve7RgqpyeG/devVTOWwFnVZkv1z5RUfR5q0Ppj1a1ayIWc1ofw85n8xhXZdexRrs251VuHr/349g/qNmlZ2mXHt4A6dWbniB1avyiIvL4+PJ79Hj2N6R7VtbvYatm0ND8Nt3rSReXO+pvlBLSqu2BioCmPIDYEFZjYTKOxWuvvACqmqAoQKnFFvL+DZi7uRnGS8OSuLJdk/c83xbZiXtZGPF+Typ54H0bdjGqEC56cteQx/Yy4ArdMPYNRpnXB3zIzRU5budHWGhCUlJ9P/gqt57Z4b8YICuvQ+kfRmLZg69nkat2pP+649yPxgHEvnfUVySgrV9z+AgZffWLj9I9ecy7YtvxDKz2PR7M84b/i9xa7QEEhOSeGaYTdzwzWXUVAQ4sRTTqVlqzY899SjtO/QiZ7H9OG7BfO49YZr+XnzZr6YPo3nn36cF8aM44flS3nikfsJD7Q6Z513Pq3atIv1IZWv4OVs1MzdS29k1mtXy919Wmnbtrvh/dJ3IPtk5ODOsS4h4fVtnWAnvgKqSd1q+xynDS8YE3XmrH3h7D3uz8z6Aw8DycAz7n7PLtqcBdxO+LzaHHc/N7L8fOCWSLO/ufuLJbctKdoechvgE3f/b5TtRURioryGIswsGXgM6AdkAbPMbLy7LyjSpi1wE+FzbBvMLD2yvD5wG+Hzbw7Mjmy7YU/7jHYM+UDgKTNbamZjzexqMzu0rAcoIlLRLMminkpxBLDE3Ze6+3ZgDDCoRJtLgMd2BK277zgxdQIw2d3XR9ZNBvqXtsOoAtndb3P3vkAnYDrwV2B2NNuKiFSmcjyp1xRYUWQ+K7KsqHZAOzP7zMxmRIY4ot12J9Feh3wL0BM4APgaGEY4mEVEAqUsQxZmNgQYUmTRaHcfXYbdpQBtgd5AM+ATMzukDNvv9GbROA3IByYA04Av3H3bnjcREal8ZQnkSPjuLoBXAs2LzDeLLCsqC/jS3fOAZWa2mHBAryQc0kW3nVpaPdEOWRwGHAfMJDzAPdfMPo1mWxGRylSOQxazgLZm1tLMqgFnA+NLtBlHJHjNrCHhIYylwCTgeDOrZ2b1gOMjy/Yo2iGLzsDRQC/CZw1XoCELEQmicroO2d3zzewqwkGaDDzn7vPNbBSQ6e7j+TV4FxB+rMRf3X0dgJndSTjUAUa5+/rS9hntkMU9hAP4EWBWpHsuIhI45XlLtLtPBCaWWDayyGsHhkamkts+BzxXlv1FFcjufnKky94OaG9mixTKIhJEQbwlOlrRDln0Ivxkt+WEvxA0N7Pz3f2TCqxNRKTs4jePox6yeBA43t0XAZhZO+B1oGtFFSYisjcSvocMpO4IYwB3X2xm+lkHEQmcqhDImWb2DPBKZP48ILNiShIR2XtVIZAvB64EdjyUfjrweIVUJCKyD6J4RkVgRXuVxbbIzzi97O65pW4gIhIj8dxD3uMFexZ2u5mtBRYBi8ws18xG7mk7EZFYiedfDCntCurrCT9U6HB3r+/u9YEjgZ5mdn2FVyciUkZm0U9BU1og/xE4x92X7Vjg7kuBPwB/qsjCRET2Rjz3kEsbQ05197UlF7p7ri57E5EgSkrgk3rb93KdiEhMBLDjG7XSArmLmW3axXIDqldAPSIi+yRhe8junlxZhYiIlIdE7iGLiMSVIJ6si5YCWUQSShznsQJZRBJLeT6gvrIpkEUkoaiHLCISEBpDFhEJiDjOYwWyiCQW9ZBFRAIijvNYgSwiiSVh79QrD1/e3q+id1HlNel5baxLSHjzPvhHrEuQKGnIQkQkIOI4jxXIIpJY1EMWEQmIOM5jBbKIJBad1BMRCQgNWYiIBIQCWUQkIOI4jxXIIpJY1EMWEQmIOM5jBbKIJBZdZSEiEhBJcdxFViCLSEKJ4zxWIItIYtFJPRGRgIjjIWQFsogkFp3UExEJCEOBLCISCHHcQSYp1gWIiJQnM4t6iuK9+pvZIjNbYmbD99DudDNzM+sWmW9hZlvM7JvI9GQ0tauHLCIJpbwusjCzZOAxoB+QBcwys/HuvqBEu1rAtcCXJd7ie3c/tCz7VA9ZRBJKklnUUymOAJa4+1J33w6MAQbtot2dwL3A1n2ufV/fQEQkSJKSLOrJzIaYWWaRaUiRt2oKrCgynxVZVsjMDgOau/uEXZTS0sy+NrNpZnZ0NLVryEJEEkpZhizcfTQweu/2Y0nAg8AFu1i9GjjQ3deZWVdgnJl1cvdNe3pP9ZBFJKGU45DFSqB5kflmkWU71AI6A1PNbDlwFDDezLq5+zZ3Xwfg7rOB74F2pdYe9VGKiMQBK8NUillAWzNraWbVgLOB8TtWuvtGd2/o7i3cvQUwAxjo7plmlhY5KYiZtQLaAktL26GGLEQkoZTXsyzcPd/MrgImAcnAc+4+38xGAZnuPn4Pmx8DjDKzPKAAuMzd15e2TwWyiCSU8rwxxN0nAhNLLBu5m7a9i7x+C3irrPtTIItIQtGzLEREAkKP3xQRCYg47iArkEUksaiHLCISEPEbx1XsOuQvPpvOWb8/iTMGnsBLzz290/qvZ2fyp3NOp2e3Q/h48qRi6667cgjHHX0kf7nm8soqN27169GBOf+5lXlv38awC/vttP6+v5zGjDHDmTFmON+OG8nqT+4D4MDG9fj8tRuZMWY4s98cwcVn/K6yS48bmTM+45JzBnHR4FN44+Xndlo/95vZXP3nszm5V1c+nTK52LqcNasZcf1lXHreqVz6h9PIXr1yp+3jWXKSRT0FTZXpIYdCIe6/52888sQzpGdkcOF5gzm6Vx9atm5T2CajcWNuvePvvPbS8zttf96fLmTr1q2Me+uNyiw77iQlGQ8NP4sBlz/Kyuyf+PTVv/LutLl8t3RNYZsbHvi/wteXn92LLu2bAbA6dxO9z3+A7Xn57F+jGrPfHMGEaXNZnbux0o8jyEKhEI8/eDd3/fNJGqZncN3F53HU73pxYMvWhW3SMxox9OZRvPX6Sztt/8DfbmHw+Rdz2OHd2fLLL1gAg2lfxPOQRZXpIS+YN5dmzQ+kabPmpKZWo98JJ/LJ1I+LtWnSpClt27XHknb+WA4/sjs199+/ssqNW4d3bsH3K9ayfOU68vJDjJ30FSf3/s1u25/VvytvvD8bgLz8ENvz8gHYr1pqXP+ce0VavHAeTZo1p3HTZqSmpnLMcSfwxadTi7XJaNyUlm3a7XQJ2I/LvicUCnHY4d0BqFGzJtWr16is0iuFWfRT0FSZQM7NySY9o1HhfHpGI3Jzc2JYUWJqkl6HrOwNhfMrszfQNK3OLtse2LgeBzVpwNRZiwqXNcuoy8x/38R/37uTB174UL3jXViXm0PD9F//LDdMy2BdlH+Ws1b8wP61avG3m4dy1YWDefaxBwmFQhVVakyU47MsKl1UgWxm7czsIzObF5n/jZndUrGlSaI784SujPvoGwoKvHBZVvZPHDH4bjoPuoM/nHIE6fVrxbDCxFMQCjF/ztdcdOVQHn76VVavWsmH7+3pDuD4UxV6yE8DNwF5AO7+LeEHbexS0WeMvrCLk2exkJaeQU72r+OYOdlrSEtLj2FFiWlVzkaaZdQrnG+aUY+Vu+nlnnFCV954P3OX61bnbmT+ktX0PKz1LtdXZQ3S0lmb8+uf5bW52TSI8s9yw7QMWrVtT+OmzUhOSaH70X1YsmhhRZUaE+X5E06VLdpArunuM0ssy99dY3cf7e7d3L3bBX++ZO+rK0cdOnVmxY8/sGplFnl525k86T2O7t0n1mUlnMz5P9DmwDQOatKA1JRkzjzhMCZM/Xandu1aZFCvdk1mzFlWuKxpel2q75cKQN1aNejx29YsXq5hpZLaHdyJVSt+ZM2qleTl5fHJh5M4qmevqLZt26ET/9u8mY0bws+5mfPVTA5s0aoiy610yWZRT0ET7VUWa82sNeAAZnYG4Qcwx42UlBSG3TiCa6+4hIKCAk4edCqtWrdl9OP/4uCOnTimd18WzJ/LjUOvYfOmTXz6yRSefvJRXn/rHQAu/fMf+GHZMrZs+YVTTujDiNvu5KgeuiyrpFCogOvvfYN3Hr+S5CTjxbdnsHDpGm69fABfLfiRCdPmAuHhirGTZhfbtn3LRtwz9FQcxzAeeukj5i9ZFYvDCLTklBQuHzqcW4ZeTkFBAccPGMRBrdrw8jOP0/bgjhz1u94sXjiPO28eys+bN/HlZ5/wyrNP8OQr/0dycjIXXXU9N113Ke5O2/Yd6D/w9FgfUrmK54tGzN1LbxR+nudooAewAVgG/MHdl5e27YZfQqXvQPZJk57XxrqEhDfvg3/EuoQqoXVajX2O06Hjv4s6cx4ceHCg4juqHrK7LwWOM7P9gSR331yxZYmI7J0gjg1Ha4+BbGZDd7McAHd/sAJqEhHZa/E8ZFFaD1nXHIlIXInjDvKeA9nd76isQkREykNKHCdytDeGtDKzd8ws18xyzOztyIk+EZFAqQo3hrwGvAE0BpoAY4HXK6ooEZG9lfC3ThO+MeRld8+PTK8A1SuyMBGRvRHPPeTSrrKoH3n5npkNB8YQvjlkMCV+iVVEJAgS+SqL2YQDeMchXlpknRN+voWISGAE8cHz0SrtKouWlVWIiEh5iOM8jv4XQ8ysM9CRImPH7r7zzxGIiMSQxfGv6kUVyGZ2G9CbcCBPBE4EPgUUyCISKPHcQ472KoszgGOBNe5+IdAF2PXPQIiIxFCSRT8FTbRDFlvcvcDM8s2sNpADNK/AukRE9krCPlyoiEwzq0v4l0NmAz8DX1RYVSIieyk5jn8pNNrHb14Refmkmb0P1I78jJOISKAE8Q68aEX7LIuPdrx29+Xu/m3RZSIiQZGwY8hmVh2oCTQ0s3r8eoNIbaBpBdcmIlJmcdxBLnXI4lLgOsIPFCr6A2ibgUcrqigRkb2VFMfXIZc2ZPE54d/RG+burYA7gHnANMJPgBMRCZR4frhQaYH8FLDN3f9lZscAdwMvAhsJ/+ipiEigpCRZ1FPQlDZkkezu6yOvBwOj3f0t4C0z+6ZiSxMRKbsg9nyjVVoPOdnMdoT2scDHRdZF/RwMEZHKEs8PqC8tVF8HppnZWmALMB3AzNoQHrYQEQmUAOZs1Ep7/OZdkeuNGwMfuLtHViUBV1d0cSIiZRXHN+qVPuzg7jN2sWxxxZQjIrJvgjgUEa14/stERGQn5TmGbGb9zWyRmS2J/IxdyfWXmdlcM/vGzD41s45F1t0U2W6RmZ0QVe1lOlIRkYCzMkx7fB+zZOAxws9/7wicUzRwI15z90Pc/VDgPuDByLYdgbOBTkB/4PHI++2RAllEEko53hhyBLDE3Ze6+3bCP/I8qGgDd99UZHZ/wr81SqTdGHff5u7LgCWR99sjXbomIgmlHJ+H3BRYUWQ+CzhyF/u7EhgKVAP6Ftm26Pm3LKJ4/o96yCKSUJLKMJnZEDPLLDINKev+3P0xd28N3Ajcsi+1q4csIgmlLFdZuPtodv8YiJUU/2WkZpFluzMGeGIvtwUqIZBrVCt1HFv20Wfj/h7rEhJe51Nuj3UJVcKWGffu83uU45DFLKCtmbUkHKZnA+eW2Fdbd/9vZHYAsOP1eOA1M3uQ8NMy2wIzS9uhesgiklDKaxzW3fPN7CpgEpAMPOfu881sFJDp7uOBq8zsOCAP2ACcH9l2vpm9ASwA8oEr3T1U2j4VyCKSUMrzR07dfSIwscSykUVeX7uHbe8C7irL/hTIIpJQ4vc+PQWyiCSY5Di+dVqBLCIJJY7zWIEsIonF4njQQoEsIglFPWQRkYCI51+dViCLSEJRD1lEJCDi+QH1CmQRSShJ8ZvHCmQRSSy6ykJEJCDieMRCgSwiiUU9ZBGRgNAYsohIQOgqCxGRgIjfOFYgi0iCUQ9ZRCQg4jeOFcgikmjiOJEVyCKSUDRkISISEPEbxwpkEUk0cZzICmQRSSi6U09EJCDieAhZgSwiiSWO81iBLCKJxeK4i6xAFpGEEsd5rEAWkcQSx3msQBaRBBPHiaxAFpGEosveAuyz6Z9w7z13URAq4NTTz+SiS4YUW799+3ZG3HQDC+fPp07dutz3wD9p2rQZE94dz4vPPVvYbvHiRYwZ+x8O7tCBiy74I7m5OVTfrzoATzz9HA0aNKjU4wqyb2Z9zktPPEBBQQF9+g9i0NkXFFs/4c1XmfL+2yQlJ1O7Tl0u/ctI0jIaA3D3zVezZOE82nc+lBvu/GcMqo8P/Y5qx/3XDyQ5yXhh/Czuf3lqsfX3XXsyx3RtDUDN6qmk1TuAxv1uB+Dtf/6ZIzofyOdzlnP6sBcqt/BKoDHkgAqFQvz9rlE89fTzZGRkcO7gM+jdpy+t27QpbPOft8ZSu3Zt3n1/Mu9NnMBDD97PPx54iAEnD2TAyQMB+O/iRVx3zZUc3KFD4XZ333s/nTofUunHFHQFoRDPP3ofN9/zKA0aZjDi6vPp2v0Ymh3UqrBNizbtuevRl9ivenUmv/Mmrz3zCNeOuBuAU878I9u2buWjif+J1SEEXlKS8dCw3zPgmmdYmbORT5+/inenL+C75TmFbW54+N3C15ef2YMu7ZoUzv/z1WnUrF6Ni35/ZKXWXVniOZCTYl1ARZo391uaNz+IZs2bk1qtGv1PGsDUKR8VazPl448ZOOhUAPodfwIzZ3yBuxdr897ECfQ/cUCl1R3PliyaT6Mmzclo3IyU1FS69+pH5ufTirXpdGg39qse/nbRpsMhrM/9NUg6//YIatTcv1JrjjeHd2zO91nrWL5qPXn5IcZOnsPJx3Tcbfuz+h3KG5PnFM5Pzfyezb9sq4xSY8LK8E/QlCmQzay2mdWqqGLKW052No0aNyqcT8/IIDs7u3ibnGwaNQp/XU5JSeGAWrX46acNxdpMen8i/U8qHsgjb7mZs04bxFNPPLZTgFdlG9bm0iAto3C+QVoGG9bl7rb91PffpsvhPSqjtITRJK0OWTk/Fc6vzNlI07Q6u2x7YKO6HNSkHlMzl1RWeTFnFv0UNFEFspkdbmZzgW+BeWY2x8y6VmxpwfDtt3OoXr0Gbdu2K1z293vv561x7/D8y6/y1VezeXf82zGsMH5N/3AiSxcv5JQz/xjrUhLWmf26MG7KXAoKqk6nwcowBU20PeRngSvcvYW7HwRcCTy/u8ZmNsTMMs0s89mnR5dHnXslPSODNavXFM7nZGeTkZFRvE16BmvWrAYgPz+fnzdvpm7deoXrJ02cwIklesc73mP//Q/gpJNOZu7cbyvqEOJOvYZprMv99VvIutxs6jVI26nd3K++ZNzrzzPsjgdIrVatMkuMe6tyN9IsvW7hfNP0OqzM3bjLtmcc14U3Ppizy3UJK44TOdpADrn79B0z7v4pkL+7xu4+2t27uXu3klc1VKZOnQ/hxx+Xk5W1grzt23l/4gR69elbrE3vPn0Z/3b4BNLkDyZxxJFHFd56WVBQwKRJ7xUbP87Pz2fDhvUA5OXl8cm0qbRp27aSjij4WrfvyJqVP5KzeiX5eXl8MW0yXbsfU6zNsiWLeObhuxk26gHq1Ksfo0rjV+bCLNo0b8BBjeuRmpLMmf26MGH6wp3atTsojXq1azBj7g8xqDJ2ksyinoIm2qssppnZU8DrgAODgalmdhiAu39VQfXtk5SUFG4aMZLLh1xMQUGI3596Om3atOWxfz1Mp06d6d33WE49/QxGDP8rJ/fvR+06dbjv/l8vtZqdOYtGjRrTrHnzwmXbt2/n8iEXk5+fRyhUwFHdu3P6GWfF4vACKTk5hQuuuoG7b76GgoIQvU8YSPMWrRn74pO0bNeBbt178drTD7N1yxYevnM4AA3SG/HXUQ8CcPvQS1i1Yjlbt2zhynMHMGToLXTp1j2WhxQ4oVAB19//Nu88fBHJSUm8+O4sFi7L5tZL+vHVd1mF4Xxmvy6Mnbxz7/jDJy+j3UFpHFBjP5aMv5nL7nqTD79cXNmHUWGCF7PRs2hOSJnZlMjLHY2LHrO7e192Y2s+VWfwKkYWrNwU6xISXs/Bd8W6hCphy4x79zlPF2f/EnXmtMuoGaj83mMP2cyGRl7uuKjRgVzgU3dfVpGFiYjsjSBezhat0saQa0WmAyJTLaAb8J6ZnV3BtYmIlFk8X/a2xx6yu9+xq+VmVh/4EBhTEUWJiOytAOZs1PbqTj13X098H7eIJCgzi3qK4r36m9kiM1tiZsN3sf4YM/vKzPLN7IwS60Jm9k1kGh9N7Xv1LAsz6wNsKLWhiEglK6+hCDNLBh4D+gFZwCwzG+/uC4o0+xG4ABi2i7fY4u6HlmWfpZ3Umws7XSVRH1gF/KksOxIRqQzl+NX9CGCJuy8FMLMxwCCgMJDdfXlkXUF57LC0HvLJJeYdWOfu/yuPnYuIlLvyS+SmwIoi81lAWR6RV93MMgnfRHePu48rbYPSTupVrVt8RCTuleWyNzMbAhS9nXi0u5fX8x4OcveVZtYK+NjM5rr793vaIKGfhywiVU9ZxpAj4bu7AF4JNC8y3yyyLNr3Xhn591Izmwr8FthjICf085BFpOpJsuinUswC2ppZSzOrBpwNRHW1hJnVM7P9Iq8bAj0pMva829qjeXMRkfhRPo97c/d84CpgErAQeMPd55vZKDMbCIWPJs4CzgSeMrP5kc07AJlmNgeYQngMudRA1pCFiCSU8rwDz90nAhNLLBtZ5PUswkMZJbf7HCjzb7wpkEUkocTzHWsKZBFJKEF8RkW0FMgiklCiuSU6qBTIIpJQ4jeOFcgikmDiuIOsQBaRxBLPD6hXIItIYonfPFYgi0hiieM8ViCLSGJJiuNBZAWyiCSUOM5jPctCRCQo1EMWkYQSzz1kBbKIJBRd9iYiEhDqIYuIBIQCWUQkIDRkISISEOohi4gERBznsQJZRBJMHCeyAllEEko83zpt7h7rGgLHzIa4++hY15HI9BlXPH3G8Ue3Tu/akFgXUAXoM654+ozjjAJZRCQgFMgiIgGhQN41jbtVPH3GFU+fcZzRST0RkYBQD1lEJCASPpDNLGRm35jZfDObY2Z/MbOEP+7KYmYjIp/tt5HP+Ugzu87Makax7XIza7g2cB93AAADYUlEQVSL5beb2bCKqTjxmFkLM5tXYpk+wzhUFW4M2eLuhwKYWTrwGlAbuC2mVSUAM+sOnAwc5u7bIuFaDfg38ArwSyzrE4k3Vaqn6O45hK/NvMrCqpvZ82Y218y+NrM+AGY2wcx+E3n9tZmNjLweZWaXmFlvM5tqZm+a2Xdm9qpZHN8etPcaA2vdfRuAu68FzgCaAFPMbAqAmT1hZpmRnvQdJd7jhsjnP9PM2pTcgZm1NrP3zWy2mU03s4Mr+JgSSuTP6cORby/zzOyIWNcku1elAhnA3ZcCyUA6cGV4kR8CnAO8aGbVgenA0WZWB8gHekY2Pxr4JPL6t8B1QEegVZE2VckHQHMzW2xmj5tZL3d/BFgF9HH3PpF2I9y9G/AboNeOv+wiNkY+/0eBh3axj9HA1e7eFRgGPF5hR5O4aka+JV4BPBfrYmT3qlwgl/A7wl+tcffvgB+AdoQD+RjCITsBOCAyJtrS3RdFtp3p7lnuXgB8A7So5Npjzt1/BroS/taRC/zbzC7YRdOzzOwr4GugE+G/xHZ4vci/uxfdyMwOAHoAY83sG+Apwr1yKW53l0rtWP46gLt/AtQ2s7qVUpWUWVUYQy7GzFoBISBnD81mAd2ApcBkoCFwCTC7SJttRV6HqIKfJYC7h4CpwFQzmwucX3S9mbUk3LM93N03mNkLQPWib7Gb1xDuMPy04xyA7NY6oF6JZfWBZZHXJT9XXesaUFWqh2xmacCTwKMevgB7OnBeZF074EBgkbtvB1YAZwJfRNoN49fhCgHMrL2ZtS2y6FDC3zI2A7Uiy2oD/wM2mlkGcGKJtxlc5N9fFF3h7puAZWZ2ZmR/ZmZdyvco4l/km8pqM+sLYGb1gf7Ap5EmgyPLf0d4iGhjTAqVUlWFXl2NyNfdVMLjwS8DD0bWPQ48EenZ5QMX7DhBRTiEj3X3LWY2HWgWWSa/OgD4V+QrcD6whPDwxTnA+2a2yt37mNnXwHeE/5L7rMR71DOzbwl/4zhnF/s4j/B/o1sI/zccA8ypkKOJb38CHjOzHX+273D37yPnmrdG/hukAn+OVYFSOt2pJ5LAzGwqMMzdM2Ndi5SuSg1ZiIgEmXrIIiIBoR6yiEhAKJBFRAJCgSwiEhAKZBGRgFAgi4gEhAJZRCQg/h/kayUA3ZOihAAAAABJRU5ErkJggg==\n", 910 | "text/plain": [ 911 | "
" 912 | ] 913 | }, 914 | "metadata": { 915 | "needs_background": "light" 916 | }, 917 | "output_type": "display_data" 918 | } 919 | ], 920 | "source": [ 921 | "from sklearn.metrics import confusion_matrix\n", 922 | "\n", 923 | "cm = confusion_matrix(y_pred=y_pred, y_true=y_test)\n", 924 | " \n", 925 | "cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", 926 | "\n", 927 | "ax=sns.heatmap(cm, annot=True, xticklabels=['Down','Stable','Up'], yticklabels=['Down','Stable','Up'], cmap='Blues')\n", 928 | "ax.set_ylim(3.0, 0)\n", 929 | "plt.savefig('image')" 930 | ] 931 | }, 932 | { 933 | "cell_type": "code", 934 | "execution_count": 18, 935 | "metadata": {}, 936 | "outputs": [ 937 | { 938 | "name": "stdout", 939 | "output_type": "stream", 940 | "text": [ 941 | " precision recall f1-score support\n", 942 | "\n", 943 | " Down 0.45 0.55 0.50 61142\n", 944 | " Stable 0.89 0.73 0.80 350499\n", 945 | " Up 0.37 0.71 0.49 53759\n", 946 | "\n", 947 | "avg / total 0.77 0.70 0.73 465400\n", 948 | "\n" 949 | ] 950 | } 951 | ], 952 | "source": [ 953 | "from sklearn.metrics import classification_report\n", 954 | "print(classification_report(y_true = y_test,y_pred = y_pred, target_names=['Down', 'Stable', 'Up']))" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": null, 960 | "metadata": {}, 961 | "outputs": [], 962 | "source": [] 963 | } 964 | ], 965 | "metadata": { 966 | "kernelspec": { 967 | "display_name": "Python 3", 968 | "language": "python", 969 | "name": "python3" 970 | }, 971 | "language_info": { 972 | "codemirror_mode": { 973 | "name": "ipython", 974 | "version": 3 975 | }, 976 | "file_extension": ".py", 977 | "mimetype": "text/x-python", 978 | "name": "python", 979 | "nbconvert_exporter": "python", 980 | "pygments_lexer": "ipython3", 981 | "version": "3.5.3" 982 | } 983 | }, 984 | "nbformat": 4, 985 | "nbformat_minor": 4 986 | } 987 | -------------------------------------------------------------------------------- /Walkthrough.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import gc\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from tqdm import tqdm\n", 13 | "from google.cloud import storage\n", 14 | "from io import StringIO\n", 15 | "%matplotlib inline\n", 16 | "import seaborn as sns\n", 17 | "import matplotlib.pyplot as plt" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "client = storage.Client()\n", 27 | "bucket = client.get_bucket('bucket_l2_snapshot')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "blob = bucket.blob('l2_snapshot_100ms.csv')\n", 37 | "bt = blob.download_as_string()\n", 38 | "s = str(bt, 'utf-8')\n", 39 | "s = StringIO(s)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "df = pd.read_csv(s, index_col=0, infer_datetime_format=True)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 5, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 73 | "\n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | "
b1b2b3b4b5b6b7b8b9b10...aq1aq2aq3aq4aq5aq6aq7aq8aq9aq10
timestamp
2019-11-12 00:00:00.0008721.538720.598719.558719.508719.08718.028717.878717.858717.568716.06...5.8022042.8590.121890.1425750.0026920.460.0384680.2000000.0018901.00000
2019-11-12 00:00:00.1008721.538720.598719.568719.558719.08718.028717.878717.858717.568716.06...5.8022042.8590.121890.1425750.0026920.460.0384680.2000000.0018901.00000
2019-11-12 00:00:00.2008721.538720.598719.618719.568719.08718.028717.878717.858716.068716.00...5.8022042.8590.121890.1425750.0026920.460.0384680.2000001.0860160.00189
2019-11-12 00:00:00.3008721.538720.598719.618719.568719.08718.028718.008717.878717.858716.60...7.8022042.8590.121890.1425750.0026920.460.0384680.2000001.0860160.00189
2019-11-12 00:00:00.4008721.538720.598719.618719.568719.08718.998718.028718.008717.878717.85...7.8022042.8590.121890.1425750.0026920.460.2000000.0384681.0860160.00189
\n", 247 | "

5 rows × 40 columns

\n", 248 | "
" 249 | ], 250 | "text/plain": [ 251 | " b1 b2 b3 b4 b5 b6 \\\n", 252 | "timestamp \n", 253 | "2019-11-12 00:00:00.000 8721.53 8720.59 8719.55 8719.50 8719.0 8718.02 \n", 254 | "2019-11-12 00:00:00.100 8721.53 8720.59 8719.56 8719.55 8719.0 8718.02 \n", 255 | "2019-11-12 00:00:00.200 8721.53 8720.59 8719.61 8719.56 8719.0 8718.02 \n", 256 | "2019-11-12 00:00:00.300 8721.53 8720.59 8719.61 8719.56 8719.0 8718.02 \n", 257 | "2019-11-12 00:00:00.400 8721.53 8720.59 8719.61 8719.56 8719.0 8718.99 \n", 258 | "\n", 259 | " b7 b8 b9 b10 ... aq1 \\\n", 260 | "timestamp ... \n", 261 | "2019-11-12 00:00:00.000 8717.87 8717.85 8717.56 8716.06 ... 5.802204 \n", 262 | "2019-11-12 00:00:00.100 8717.87 8717.85 8717.56 8716.06 ... 5.802204 \n", 263 | "2019-11-12 00:00:00.200 8717.87 8717.85 8716.06 8716.00 ... 5.802204 \n", 264 | "2019-11-12 00:00:00.300 8718.00 8717.87 8717.85 8716.60 ... 7.802204 \n", 265 | "2019-11-12 00:00:00.400 8718.02 8718.00 8717.87 8717.85 ... 7.802204 \n", 266 | "\n", 267 | " aq2 aq3 aq4 aq5 aq6 aq7 \\\n", 268 | "timestamp \n", 269 | "2019-11-12 00:00:00.000 2.859 0.12189 0.142575 0.002692 0.46 0.038468 \n", 270 | "2019-11-12 00:00:00.100 2.859 0.12189 0.142575 0.002692 0.46 0.038468 \n", 271 | "2019-11-12 00:00:00.200 2.859 0.12189 0.142575 0.002692 0.46 0.038468 \n", 272 | "2019-11-12 00:00:00.300 2.859 0.12189 0.142575 0.002692 0.46 0.038468 \n", 273 | "2019-11-12 00:00:00.400 2.859 0.12189 0.142575 0.002692 0.46 0.200000 \n", 274 | "\n", 275 | " aq8 aq9 aq10 \n", 276 | "timestamp \n", 277 | "2019-11-12 00:00:00.000 0.200000 0.001890 1.00000 \n", 278 | "2019-11-12 00:00:00.100 0.200000 0.001890 1.00000 \n", 279 | "2019-11-12 00:00:00.200 0.200000 1.086016 0.00189 \n", 280 | "2019-11-12 00:00:00.300 0.200000 1.086016 0.00189 \n", 281 | "2019-11-12 00:00:00.400 0.038468 1.086016 0.00189 \n", 282 | "\n", 283 | "[5 rows x 40 columns]" 284 | ] 285 | }, 286 | "execution_count": 5, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "df.head()" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 6, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "df.index = pd.to_datetime(df.index)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 7, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "def concat_data(base_path, features):\n", 311 | " \"\"\"Concatenate all the files in basepath keeping only the\n", 312 | " columns specified by features.\n", 313 | " \"\"\"\n", 314 | " l2_snapshot = pd.DataFrame()\n", 315 | " for i, x in enumerate(tqdm(sorted(os.listdir(base_path)))):\n", 316 | " if base_path is None:\n", 317 | " path = x\n", 318 | " else:\n", 319 | " path = base_path + x\n", 320 | " df_hour = pd.read_parquet(path, columns=features)\n", 321 | " l2_snapshot = pd.concat([l2_snapshot, df_hour.dropna()])\n", 322 | " \n", 323 | " return l2_snapshot\n", 324 | "\n", 325 | "def extend_matrix(A, n):\n", 326 | " \"\"\"Extend a matrix A by duplicating rows as specified by the list n.\"\"\"\n", 327 | " n = n[1:] # Do not duplicate rows for the first day\n", 328 | " A = A[:-1] # Do not duplicate the last day's row\n", 329 | " A = np.repeat(A, repeats=n, axis=0)\n", 330 | " return A\n", 331 | "\n", 332 | "def normalise_data_per_day(df):\n", 333 | " df_mean = df.resample('D').mean()\n", 334 | " df_var = df.resample('D').var()\n", 335 | " \n", 336 | " timestamps_per_day = np.unique(df.index.date, return_counts=True)[1]\n", 337 | " mean_array = extend_matrix(df_mean.to_numpy(), timestamps_per_day)\n", 338 | " var_array = extend_matrix(df_var.to_numpy(), timestamps_per_day)\n", 339 | " \n", 340 | " # Drop the rows of the first day\n", 341 | " df = df[df.index.date != df.index[0].date()]\n", 342 | " \n", 343 | " df = (df - mean_array) / np.sqrt(var_array)\n", 344 | "\n", 345 | " return df\n", 346 | "\n", 347 | "def balance_classes(y):\n", 348 | " unique = np.unique(y, return_counts=True)\n", 349 | "\n", 350 | " # Take smallest number as class size\n", 351 | " class_size = np.min(unique[1])\n", 352 | " class_size_index = np.argmin(unique[1])\n", 353 | " timestamps = np.array([], dtype=int)\n", 354 | " for i, category in enumerate(unique[0]):\n", 355 | " if i == class_size_index:\n", 356 | " continue\n", 357 | " index = np.argwhere(y==category)\n", 358 | " index = index.reshape(len(index))\n", 359 | " random_timestamps = np.random.choice(index, (unique[1][i] - class_size), replace=False)\n", 360 | " timestamps = np.concatenate((timestamps, random_timestamps), axis=None)\n", 361 | " \n", 362 | " return timestamps\n", 363 | "\n", 364 | "def generate_y(df_snapshot, T=100, D=40, best_ask='a1', best_bid='b1', k=20, alpha=10e-5):\n", 365 | " \"\"\"Return X, y from the snapshot dataframe and the best ask/bid columns.\"\"\"\n", 366 | " df = pd.DataFrame()\n", 367 | " df['mid_price'] = (df_snapshot[best_ask].to_numpy()+df_snapshot[best_bid].to_numpy())/2\n", 368 | "\n", 369 | " # Create columns delayed by -k to k-1\n", 370 | " for i in range(-k, k):\n", 371 | " df[i] = df['mid_price'].shift(periods=i)\n", 372 | "\n", 373 | " # Drop first k-1 rows and last k rows\n", 374 | " df.drop(range(0,20), axis=0, inplace=True)\n", 375 | " df.drop(range(len(df_snapshot)-20,len(df_snapshot)), axis=0, inplace=True)\n", 376 | " \n", 377 | " # Compute mean of previous k and next k\n", 378 | " df['m_b'] = df[range(0,20)].mean(axis=1)\n", 379 | " df['m_a'] = df[range(-20,0)].mean(axis=1)\n", 380 | " \n", 381 | " # Compute label of increasing or decreasing\n", 382 | " y_increase = np.where(df['m_b'] > df['m_a'] * (1+alpha), 1, 0)\n", 383 | " y_decrease = np.where(df['m_b'] < df['m_a'] * (1-alpha), -1, 0)\n", 384 | " y = y_increase + y_decrease\n", 385 | "\n", 386 | " # 100 most recent limit orders used so ignore first 100 timesteps\n", 387 | " y = y[T:]\n", 388 | " y += 1\n", 389 | "\n", 390 | " return y\n", 391 | "\n", 392 | "def generate_preX(df_snapshot):\n", 393 | " # First and last 20 can't create labels for as previous and next k=20 needed\n", 394 | " preX = df_snapshot.to_numpy()[20:-20]\n", 395 | "\n", 396 | " return preX\n", 397 | "\n", 398 | "def generate_X(preX, T=100, D=40):\n", 399 | " # For each timestep create matrix of 100 most recent limit orders\n", 400 | " X = np.array([preX[t:t+T] for t in range(len(preX)-T)], dtype='float32')\n", 401 | " \n", 402 | " return X" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 8, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "l2_norm = normalise_data_per_day(df)" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 9, 417 | "metadata": {}, 418 | "outputs": [ 419 | { 420 | "data": { 421 | "text/html": [ 422 | "
\n", 423 | "\n", 436 | "\n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | "
b1b2b3b4b5b6b7b8b9b10...aq1aq2aq3aq4aq5aq6aq7aq8aq9aq10
timestamp
2019-11-13 00:00:00.0002.0696692.0662212.0493592.0321642.0457552.0561322.0621442.0728412.0644722.076723...-0.630327-0.585376-0.603213-0.584939-0.568453-0.547841-0.503235-0.459792-0.545609-0.581027
2019-11-13 00:00:00.1002.0965532.0990172.0990952.0932162.0766952.0601692.0739922.0844262.0906212.101545...-0.630484-0.585376-0.603213-0.584939-0.568453-0.547841-0.559809-0.556070-0.552547-0.576830
2019-11-13 00:00:00.2002.0965532.0990172.0990952.0932162.0766952.0601692.0739922.0844262.0906212.101545...-0.630484-0.585376-0.603213-0.584939-0.568453-0.547841-0.559809-0.556070-0.552547-0.576830
2019-11-13 00:00:00.3002.0965532.0855762.0966762.0628242.0460242.0599002.0702222.0763442.0871172.078882...-0.630484-0.585376-0.603213-0.584939-0.568453-0.547841-0.559809-0.556070-0.552547-0.576830
2019-11-13 00:00:00.4002.0965532.0855762.0966762.0628242.0460242.0599002.0702222.0763442.0644722.076723...-0.630484-0.585376-0.603213-0.584939-0.568453-0.547841-0.559809-0.556070-0.552547-0.576830
\n", 610 | "

5 rows × 40 columns

\n", 611 | "
" 612 | ], 613 | "text/plain": [ 614 | " b1 b2 b3 b4 b5 \\\n", 615 | "timestamp \n", 616 | "2019-11-13 00:00:00.000 2.069669 2.066221 2.049359 2.032164 2.045755 \n", 617 | "2019-11-13 00:00:00.100 2.096553 2.099017 2.099095 2.093216 2.076695 \n", 618 | "2019-11-13 00:00:00.200 2.096553 2.099017 2.099095 2.093216 2.076695 \n", 619 | "2019-11-13 00:00:00.300 2.096553 2.085576 2.096676 2.062824 2.046024 \n", 620 | "2019-11-13 00:00:00.400 2.096553 2.085576 2.096676 2.062824 2.046024 \n", 621 | "\n", 622 | " b6 b7 b8 b9 b10 \\\n", 623 | "timestamp \n", 624 | "2019-11-13 00:00:00.000 2.056132 2.062144 2.072841 2.064472 2.076723 \n", 625 | "2019-11-13 00:00:00.100 2.060169 2.073992 2.084426 2.090621 2.101545 \n", 626 | "2019-11-13 00:00:00.200 2.060169 2.073992 2.084426 2.090621 2.101545 \n", 627 | "2019-11-13 00:00:00.300 2.059900 2.070222 2.076344 2.087117 2.078882 \n", 628 | "2019-11-13 00:00:00.400 2.059900 2.070222 2.076344 2.064472 2.076723 \n", 629 | "\n", 630 | " ... aq1 aq2 aq3 aq4 \\\n", 631 | "timestamp ... \n", 632 | "2019-11-13 00:00:00.000 ... -0.630327 -0.585376 -0.603213 -0.584939 \n", 633 | "2019-11-13 00:00:00.100 ... -0.630484 -0.585376 -0.603213 -0.584939 \n", 634 | "2019-11-13 00:00:00.200 ... -0.630484 -0.585376 -0.603213 -0.584939 \n", 635 | "2019-11-13 00:00:00.300 ... -0.630484 -0.585376 -0.603213 -0.584939 \n", 636 | "2019-11-13 00:00:00.400 ... -0.630484 -0.585376 -0.603213 -0.584939 \n", 637 | "\n", 638 | " aq5 aq6 aq7 aq8 aq9 \\\n", 639 | "timestamp \n", 640 | "2019-11-13 00:00:00.000 -0.568453 -0.547841 -0.503235 -0.459792 -0.545609 \n", 641 | "2019-11-13 00:00:00.100 -0.568453 -0.547841 -0.559809 -0.556070 -0.552547 \n", 642 | "2019-11-13 00:00:00.200 -0.568453 -0.547841 -0.559809 -0.556070 -0.552547 \n", 643 | "2019-11-13 00:00:00.300 -0.568453 -0.547841 -0.559809 -0.556070 -0.552547 \n", 644 | "2019-11-13 00:00:00.400 -0.568453 -0.547841 -0.559809 -0.556070 -0.552547 \n", 645 | "\n", 646 | " aq10 \n", 647 | "timestamp \n", 648 | "2019-11-13 00:00:00.000 -0.581027 \n", 649 | "2019-11-13 00:00:00.100 -0.576830 \n", 650 | "2019-11-13 00:00:00.200 -0.576830 \n", 651 | "2019-11-13 00:00:00.300 -0.576830 \n", 652 | "2019-11-13 00:00:00.400 -0.576830 \n", 653 | "\n", 654 | "[5 rows x 40 columns]" 655 | ] 656 | }, 657 | "execution_count": 9, 658 | "metadata": {}, 659 | "output_type": "execute_result" 660 | } 661 | ], 662 | "source": [ 663 | "l2_norm.head()" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": 10, 669 | "metadata": {}, 670 | "outputs": [ 671 | { 672 | "name": "stdout", 673 | "output_type": "stream", 674 | "text": [ 675 | "preX Shape: (4220032, 40)\n", 676 | "X shape, y shape: (4219932, 100, 40) (4219932,)\n", 677 | "Unique y's: [0 1 2]\n" 678 | ] 679 | } 680 | ], 681 | "source": [ 682 | "T = 100\n", 683 | "D = 40\n", 684 | "y = generate_y(l2_norm, T=T, D=D, best_ask='a1', best_bid = 'b1', alpha=0.002)\n", 685 | "preX = generate_preX(l2_norm)\n", 686 | "print(\"preX Shape: \", preX.shape)\n", 687 | "X = generate_X(preX)\n", 688 | "print(\"X shape, y shape: \", X.shape, y.shape)\n", 689 | "print(\"Unique y's: \", np.unique(y))\n", 690 | "\n", 691 | "del preX\n", 692 | "gc.collect()\n", 693 | "\n", 694 | "# First and last 20 are removed to create labels and then last 100 as previous 100 is required for input matrix\n", 695 | "X_index = l2_norm[20:-120].index\n", 696 | "\n", 697 | "# Number of data points for the last day\n", 698 | "num_test = np.unique(X_index.day, return_counts=True)[1][-1]\n", 699 | "\n", 700 | "# Split the data into the first seven days and the last day\n", 701 | "X_train_val = X[:-num_test]\n", 702 | "y_train_val = y[:-num_test]\n", 703 | "X_test = X[-num_test:]\n", 704 | "y_test = y[-num_test:]" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 11, 710 | "metadata": {}, 711 | "outputs": [], 712 | "source": [ 713 | "import tensorflow as tf\n", 714 | "from tensorflow.keras.layers import Dense\n", 715 | "from tensorflow.keras import Input, Model\n", 716 | "from tcn import TCN, tcn_full_summary" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 12, 722 | "metadata": {}, 723 | "outputs": [], 724 | "source": [ 725 | "from tensorflow import keras" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": 22, 731 | "metadata": {}, 732 | "outputs": [ 733 | { 734 | "name": "stdout", 735 | "output_type": "stream", 736 | "text": [ 737 | "INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')\n" 738 | ] 739 | } 740 | ], 741 | "source": [ 742 | "strategy = tf.distribute.MirroredStrategy()\n", 743 | "with strategy.scope():\n", 744 | " i = Input(batch_shape=(None, 100, 40))\n", 745 | " o = TCN(return_sequences=False, use_skip_connections=True, dropout_rate=0.2)(i)\n", 746 | " o = Dense(3, activation='softmax')(o)\n", 747 | " m = Model(inputs=[i], outputs=[o])\n", 748 | " opt = keras.optimizers.Adam(learning_rate=0.001)\n", 749 | " m.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 23, 755 | "metadata": {}, 756 | "outputs": [], 757 | "source": [ 758 | "from sklearn.utils.class_weight import compute_class_weight\n", 759 | "class_weights = compute_class_weight('balanced', np.unique(y_train_val), y_train_val)\n", 760 | "d_class_weights = dict(enumerate(class_weights))" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 24, 766 | "metadata": {}, 767 | "outputs": [ 768 | { 769 | "name": "stdout", 770 | "output_type": "stream", 771 | "text": [ 772 | "WARNING:tensorflow:sample_weight modes were coerced from\n", 773 | " ...\n", 774 | " to \n", 775 | " ['...']\n", 776 | "WARNING:tensorflow:sample_weight modes were coerced from\n", 777 | " ...\n", 778 | " to \n", 779 | " ['...']\n", 780 | "Train on 3754532 samples, validate on 465400 samples\n", 781 | "Epoch 1/1000\n", 782 | "INFO:tensorflow:batch_all_reduce: 28 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10\n", 783 | "INFO:tensorflow:batch_all_reduce: 28 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10\n", 784 | "3754240/3754532 [============================>.] - ETA: 0s - loss: 0.7837 - accuracy: 0.6785\n", 785 | "Epoch 00001: val_loss improved from inf to 0.64774, saving model to model_resnet_walkforward_tcn.h5\n", 786 | "3754532/3754532 [==============================] - 506s 135us/sample - loss: 0.7837 - accuracy: 0.6785 - val_loss: 0.6477 - val_accuracy: 0.7247\n", 787 | "Epoch 2/1000\n", 788 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6901 - accuracy: 0.7326\n", 789 | "Epoch 00002: val_loss improved from 0.64774 to 0.63731, saving model to model_resnet_walkforward_tcn.h5\n", 790 | "3754532/3754532 [==============================] - 433s 115us/sample - loss: 0.6901 - accuracy: 0.7326 - val_loss: 0.6373 - val_accuracy: 0.7113\n", 791 | "Epoch 3/1000\n", 792 | "3754112/3754532 [============================>.] - ETA: 0s - loss: 0.6794 - accuracy: 0.7365\n", 793 | "Epoch 00003: val_loss did not improve from 0.63731\n", 794 | "3754532/3754532 [==============================] - 434s 116us/sample - loss: 0.6794 - accuracy: 0.7365 - val_loss: 0.6388 - val_accuracy: 0.7474\n", 795 | "Epoch 4/1000\n", 796 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6734 - accuracy: 0.7384\n", 797 | "Epoch 00004: val_loss improved from 0.63731 to 0.63683, saving model to model_resnet_walkforward_tcn.h5\n", 798 | "3754532/3754532 [==============================] - 434s 116us/sample - loss: 0.6734 - accuracy: 0.7384 - val_loss: 0.6368 - val_accuracy: 0.7244\n", 799 | "Epoch 5/1000\n", 800 | "3754112/3754532 [============================>.] - ETA: 0s - loss: 0.6691 - accuracy: 0.7396\n", 801 | "Epoch 00005: val_loss improved from 0.63683 to 0.62580, saving model to model_resnet_walkforward_tcn.h5\n", 802 | "3754532/3754532 [==============================] - 433s 115us/sample - loss: 0.6691 - accuracy: 0.7396 - val_loss: 0.6258 - val_accuracy: 0.7162\n", 803 | "Epoch 6/1000\n", 804 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6663 - accuracy: 0.7403\n", 805 | "Epoch 00006: val_loss did not improve from 0.62580\n", 806 | "3754532/3754532 [==============================] - 434s 116us/sample - loss: 0.6663 - accuracy: 0.7403 - val_loss: 0.6288 - val_accuracy: 0.7256\n", 807 | "Epoch 7/1000\n", 808 | "3754112/3754532 [============================>.] - ETA: 0s - loss: 0.6634 - accuracy: 0.7409\n", 809 | "Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.\n", 810 | "\n", 811 | "Epoch 00007: val_loss did not improve from 0.62580\n", 812 | "3754532/3754532 [==============================] - 434s 115us/sample - loss: 0.6634 - accuracy: 0.7409 - val_loss: 0.6315 - val_accuracy: 0.7172\n", 813 | "Epoch 8/1000\n", 814 | "3754112/3754532 [============================>.] - ETA: 0s - loss: 0.6404 - accuracy: 0.7496\n", 815 | "Epoch 00008: val_loss did not improve from 0.62580\n", 816 | "3754532/3754532 [==============================] - 436s 116us/sample - loss: 0.6404 - accuracy: 0.7496 - val_loss: 0.6264 - val_accuracy: 0.7098\n", 817 | "Epoch 9/1000\n", 818 | "3754496/3754532 [============================>.] - ETA: 0s - loss: 0.6339 - accuracy: 0.7503\n", 819 | "Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0001.\n", 820 | "\n", 821 | "Epoch 00009: val_loss did not improve from 0.62580\n", 822 | "3754532/3754532 [==============================] - 435s 116us/sample - loss: 0.6339 - accuracy: 0.7503 - val_loss: 0.6287 - val_accuracy: 0.6852\n" 823 | ] 824 | } 825 | ], 826 | "source": [ 827 | "EPOCHS = 1000\n", 828 | "model_filename = \"model_resnet_walkforward_tcn.h5\"\n", 829 | "hist_filename = \"hist_model_walkforward_tcn.csv\"\n", 830 | "early_stop = keras.callbacks.EarlyStopping(\n", 831 | " monitor='val_loss',\n", 832 | " patience=4)\n", 833 | "model_save_checkpoint = keras.callbacks.ModelCheckpoint(\n", 834 | " filepath= model_filename,\n", 835 | " save_best_only=True,\n", 836 | " monitor='val_loss',\n", 837 | " verbose=1)\n", 838 | "reduce_lr = keras.callbacks.ReduceLROnPlateau(\n", 839 | " monitor='val_loss',\n", 840 | " factor=0.2,\n", 841 | " patience=2,\n", 842 | " verbose=1,\n", 843 | " min_lr=0.0001)\n", 844 | "callbacks = [reduce_lr, model_save_checkpoint, early_stop]\n", 845 | "history = m.fit(\n", 846 | " X_train_val, y_train_val,\n", 847 | " epochs=EPOCHS,\n", 848 | " batch_size=128,\n", 849 | " callbacks=callbacks,\n", 850 | " class_weight = d_class_weights,\n", 851 | " validation_data = (X_test, y_test),\n", 852 | ")\n" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": 25, 858 | "metadata": {}, 859 | "outputs": [], 860 | "source": [ 861 | "y_pred = m.predict(X_test).argmax(axis=1)" 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": 32, 867 | "metadata": {}, 868 | "outputs": [ 869 | { 870 | "data": { 871 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAAD8CAYAAABAWd66AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VOX1x/HPmQRElJ0k7LKjogiyurILrf6wVFzrz62VatG2WvSn0rpgqXVHBRXclypuVVFQRAUEV4Ki7IKIQhDCDiICSc7vjxliEpZMIMncO/m++7ov59773LnnDvRw5rnPfcbcHRERSbxIogMQEZEoJWQRkYBQQhYRCQglZBGRgFBCFhEJCCVkEZGAUEIWEQkIJWQRkYBQQhYRCYjUsj5B1TMe16OAZeyNEQMSHULS69a8TqJDqBAOqWx2oO9xcIcr4s45274YdcDnK02qkEVEAqLMK2QRkXJl4a0zlZBFJLlEUhIdwX5TQhaR5HLg3dAJo4QsIslFXRYiIgGhCllEJCBUIYuIBIQqZBGRgNAoCxGRgFCXhYhIQKjLQkQkIFQhi4gEhBKyiEhApOimnohIMKgPWUQkINRlISISEKqQRUQCQhWyiEhAqEIWEQkIPTotIhIQ6rIQEQkIdVmIiASEKmQRkYBQQhYRCQjd1BMRCQj1IYuIBIS6LEREAkIVsohIMJgSsohIMCghi4gEhEWUkEOhb/uG3HlJN1IixpPvfc3dr361W5vfHt+MYWe1x4E5y9Zz8chpNE47hBeu7U3EjNTUCA9PnM+j7ywq/wsIiXmff8JLj4zE8/I4vu//0G/Q/xba/97r4/jwnTeIpKRQrUZNzr/yBuqk12PRV7N45fH789utWvE9lwy9hfbdTi7vSwi8D2dM567bR5Cbm8fA3w7i4j8MLrR/VuZM7r7jNhZ/vYjb7ribPqf0z9835LI/MOerL2nf4VjuHz2mvEMvc6qQQyASMe699DhOGz6JrHVbmX77ACbM/J6FKzbmt2lRvzrXDGxH72ET2Lh1B2nVqwCwasM2elz/Jjty8jikSiqZ9w5kwszv+WHDtkRdTmDl5ebywpi7+fMtI6lZJ53bh/6Bdl1OpH6TZvltGjVrxXX3PEblg6rwwVuv8uqTo/nDtbfSpl1Hbhj5FABbt2zmpsvO4sgOXRJ1KYGVm5vL7SOG8+DYx8mol8H555xJ9569aN6iZX6b+vXrc/Ott/HMU4/vdvwFF/2en3/exisvvVCeYZebMCfk8I4PKaFOLevyzarNLFu9hZ05ebw8YymndW5SqM3FfVoz5u0FbNy6A4A1m38GYGdOHjty8gA4KDWFSIj/wMvassULSKvXiLr1GpJaqRIdT+rNl59NL9SmTbuOVD4o+o9dszZt2bhuzW7v88VHU2h7bLf8dvKLuXO+olGTJjRq3JhKlSrT71e/ZuqU9wq1adCwEa3btNnj39Wu3Y7jkEMOKa9wy52Zxb3E8V79zWyRmS0xs+v20uYsM5tvZvPM7LkC2y80s8Wx5cJ4Yo+7Qjaz44GmBY9x96fjPT7RGtQ+hKy1W/PXs9ZvpXOrtEJtWjWoAcB7I04lJWKMeOELJs/OAqBhnUP477C+tKhXnWFPz1R1vBcb162hVt30/PVaddJZ9vW8vbb/aPIbtO3YbbftmdPfpffp55RJjGG3Jns19erVz19Pz6jH3K++TGBEAVNK9ZKZpQCjgb7ACmCmmY139/kF2rQCrgdOcPcNZpYe214buAnoBDgwK3bshn2dM64K2cyeAe4CTgQ6x5ZOJby+wEuNGC3r16DfjRO58N6pjL78BGpUrQxA1rqtdL36NY4e8hK/69GS9Bqq3A7Up1Mn8d2ShfQZeF6h7ZvWr2Xld0s5skPXBEUmYVaKFXIXYIm7L3X3HcA44PQibS4FRu9KtO6eHdveD5js7utj+yYD/SlGvBVyJ+BId/d4GpvZYGAwQKUOF5DarHucpyk7K9dvpWHdX76mNax9CCvX/VSoTda6n5i5eA05uc532T+yeOVmWtavzqxv1ua3+WHDNuZ/v4Hjj6jHa58sK6/wQ6NmnTQ2rM3OX9+wLpsaddJ2a7dw9kzefukprh4xmkqVKhfaN+vD9zmm28mkpFaYWxwlkpaewapVP+SvZ69eRXpGRgIjCpZIpNR6YhsCywusrwCKVgmtAczsQyAFuNnd397LsQ2LO2G8kc8F6sXZFncf6+6d3L1TEJIxwKwla2lZvwaHpR9KpdQIg05szoTM7wu1eeOz7zipbfQy61Q7iFYNqvPt6i00rF2VKpWjE5bUPKQyxx2RweKVm8r9GsLgsFaHk/3DCtauXknOzp3Mmv4e7bqcWKjN8qVf89xDd3D5sNupVrPWbu+R+cFkOp3Up7xCDp22Rx3N8u++I2vFCnbu3MGktybSvUevRIcVGCWpkM1ssJllFlgGF3+GQlKBVkAP4FzgETOrub+xx1uC1AXmm9lnwPZdG919wP6euLzl5jlXP/ox4//Rj5SI8fT7i1mwfCP/OKcDny9Zy4TM5UyenUXv9g2ZNXIguXnODU/PZP2P2+nVrgG3XdQF9+hTmfeNn8u87/fZFVRhpaSkcvbgqxh189Xk5eVyXO/TaNCkOW/85xEOa3k47bqexH+fGM32bdt49I6/A1CrbgaX//0OANat/oENa7NpdVSHRF5GoKWmpvJ/N/yDIZf9nrzcPAYMPIMWLVvx0Kj7ObLtUXTv2Yt5c+fwt79cweYtm/lg2hQefnAUL7/2JgCXXPg7ln27lG0//UT/3t25cfg/Of6EkxJ8VaWoBH3I7j4WGLuX3VlA4wLrjWLbCloBfOruO4Fvzexrogk6i2iSLnjs1OLisXh6Icxsj2Wuu08r7tiqZzweVzeH7L83RoTm38XQ6ta8TqJDqBAOqXzgQ5jqXjQu7pyz9slz9no+M0sFvgZ6E02wM4Hz3H1egTb9gXPd/UIzqwt8AbQndiMPODbW9HOgo7uv31c88VbILYEP3H1xnO1FRBKitMYhu3uOmV0BTCLaP/y4u88zs+FApruPj+07xczmA7nANe6+LhbHrUSTOMDw4pIxxJ+QmwBjzKwp0az/ATDd3WfHfXUiIuWgNB+ddveJwMQi224s8NqBq2NL0WMfB3Z/Mmcf4krI7n4TgJkdTHSYxzXASKL/aoiIBEaYn9SLKyGb2d+BE4BDifaRDAWm7/MgEZEESPqEDPwWyAEmANOAj919+74PEREpf2FOyHGNQ3b3Y4E+wGdEHyOcY2YzyjIwEZH9UZpzWZS3eLssjgJOAroTfWpvOeqyEJEgCl6ejVu8XRb/JpqA7wdmxgZBi4gETik+Ol3u4h1lcZqZVSb63HYbM1ukpCwiQRTEroh4xdtl0R14GlhG9AtBYzO70N0/KMPYRERKLrz5OO4ui3uAU9x9EYCZtQaeBzqWVWAiIvsj6StkoNKuZAzg7l+bWaUyiklEZL9VhIScaWaPAs/G1n8HZJZNSCIi+68iJOTLgSHAn2Pr04EHyyQiEZEDUJpzWZS3eEdZbI/9jNMz7r77L1KKiAREmCvkfQ7Ys6ibzWwtsAhYZGZrzOzGfR0nIpIoYX5Sr7gR1FcRnVSos7vXdvfaRH9T6gQzu6rMoxMRKSGz+JegKS4h/y/R2fC/3bXB3ZcC5wMXlGVgIiL7I8wVcnF9yJXcfW3Rje6+RsPeRCSIIkl8U2/Hfu4TEUmIABa+cSsuIR9jZpv3sN2AKmUQj4jIAUnaCtnd9RNNIhIqyVwhi4iEShBv1sVLCVlEkkqI87ESsogkl6SfoF5EJCxUIYuIBIT6kEVEAiLE+VgJWUSSiypkEZGACHE+VkIWkeSStE/qlYbvntSkcGWtSe/rEh1C0lsw8Z+JDqFCOKTugc/IoC4LEZGACHE+VkIWkeSiCllEJCBCnI+VkEUkueimnohIQKjLQkQkIJSQRUQCIsT5WAlZRJKLKmQRkYAIcT5WQhaR5KJRFiIiAREJcYmshCwiSSXE+VgJWUSSi27qiYgERIi7kJWQRSS5hPmmXnh/L1tEZA+sBP8r9r3M+pvZIjNbYmZ7nXjczM4wMzezTrH1pma2zcxmx5aH44ldFbKIJJXSKpDNLAUYDfQFVgAzzWy8u88v0q4a8Bfg0yJv8Y27ty/JOVUhi0hSMbO4l2J0AZa4+1J33wGMA07fQ7tbgduBnw80diVkEUkqZiVZbLCZZRZYBhd4q4bA8gLrK2LbCpzLjgUau/uEPYTSzMy+MLNpZnZSPLGry0JEkkpJHgxx97HA2P05j5lFgHuAi/aw+wegibuvM7OOwGtm1tbdN+/rPZWQRSSplOIoiyygcYH1RrFtu1QDjgKmxro/6gHjzWyAu2cC2wHcfZaZfQO0BjL3GXtpRS4iEgQl6bIoxkyglZk1M7PKwDnA+F073X2Tu9d196bu3hT4BBjg7plmlha7KYiZNQdaAUuLO6EqZBFJKqU1l4W755jZFcAkIAV43N3nmdlwINPdx+/j8JOB4Wa2E8gDLnP39cWdUwlZRJJKaT4W4u4TgYlFtt24l7Y9Crx+BXilpOdTQhaRpKK5LEREAiLET04rIYtIcgnzXBZKyCKSVNRlISISECEukJWQRSS5qEIWEQmI8KbjCvak3icfTefc357K2b/pzzNPPrLb/tmfZ3LJ7wbRvWs7prw7abf9W3/8kYG/7sU9t/+zPMINrb7d2vDlS9cy95XrGHpBzz22OaPPMXw+7hpmjRvKk7eel7/9n1ecSubzQ8l8fiiD+hxTXiGHzsxPPuT35wzgorNO44VnHttt/5zZsxhy8dn86uRjmT5lcv722bM+4/ILz8pfTuvZmY8+eL88Qy9zKRGLewmaClMh5+bmcs/tI7h39COkZ2TwhwvO5sSTe9Ksecv8Nhn16nPDzSN4/pkn9/gejzz8AMd06FhOEYdTJGKMvHYgp14xlqzsTcx46i+8OX0+C79dnd+mReO6DL2wF70uHcXGLdtIq3UoAP1POIL2bRrS9fx7OKhSKu88fDmTPl7Ilq3bE3U5gZSbm8vou//FbSPHUDc9gyv/cB7dTuzBYc1a5LdJy6jH34bdysvPP1Xo2PYdu/DQUy8CsHnzJi4+6zSO7XJcucZf1sLcZVFhKuQF8+bQqHFjGjZqTKVKlelzyq+ZMW1KoTb1GzSkZas2exw2s3DBPDasW0eXbseXV8ih1LltE75ZsY5lK9ezMyeXl96ZzWknty3U5pLfdGXMyx+yccs2ANZs+BGAI5plMOOLpeTm5vHTzzuYs2Qlpxx3eLlfQ9AtWjCXBo0aU79hIypVqkSP3v35ePrUQm3q1W9I85atidje/y8+Y8pkOnc7kSpVDi7jiMtXKc5lUe4qTEJek72a9Iz6+etp6RmsyV69jyN+kZeXx6h772TIX4eWVXhJo0FaDVas3pi/npW9kYZpNQq1adUkjVZN0nj/kSFMe+xK+nZrA8BXi1dyynFtOPigStSpUZXuHVvSKL1mucYfBuvWZJOWXi9/vW56OmvXxPd3uaCp775Nj779SzO0QIiYxb0ETVxdFmbWGngIyHD3o8ysHdFZjSpEZ+qrLz3PcSecRHpGveIbS7FSUiK0bFyXUy57iIYZNXl3zJ/odO5dvPfp13Q8sjFTHruCtRu28umc78jNy0t0uElp3do1LFu6hE5dk+8bXwDzbNzirZAfAa4HdgK4+1dEp6Lbo4Kz8D/9xO43zxIhLT2D7NU/5K+vyV5NWnpGXMfOnfMlr7z4HIP+py+jR97F2xPH89AD95RVqKG2cs0mGmX8UtU2TK9J1ppNhdpkZW/izQ/mk5Obx3cr17P4+zW0bJwGwB1PvEe38+/ltCvHYgaLv19TrvGHQZ20dNZkr8pfX5udTd20+P4u7/LB++9w/Mm9SE2tVNrhJVwp/oRTuYs3IVd198+KbMvZW2N3H+vundy90wUXX7r/0ZWiw488iuXLv2dl1gp27tzBu+9M5IST9zwCoKib/nkH/53wHi+/MZkhfx1K/18P4PIrry7jiMMpc/5yWjauy2ENalMpNYUzT2nPhOnzCrV5Y+pcTu4YvQFVp0ZVWjVJ49uV64hEjNo1qgJwVMv6HNWyAe9++nW5X0PQtTm8LVkrvmfVyhXs3LmTqe+9TbcTu5foPaZOfosefZKvuwIgxSzuJWjiHWWx1sxaAA5gZoOI/kRJaKSmpnL1NcO4+srB5OXmceqAgTRv0ZJHH36Aw49oy4nde7Fg3hxuuOYvbNm8mQ+nT+WxsaN59sV9TXkqReXm5nHVna/yxv2XkhIxnnpjJguWruYfg/vx+YLlTJg+n8mfLKJPt9Z8Pu4acvPyuOH+N1m/6ScOqpzKu2OGALBl689ccuNz5Oaqy6KolNRUhlx1PTdcfTl5uXmcctpvaNq8JU89MprWh7fluJN6sGjBXIZffxVbtmzmkw+n8fSjD/LIf14FYNUPWazJXkW7Dp0SfCVlI4Cj2eJm7l58o+iM92OB44ENwLfA+e6+rLhj12zJKf4EckCa9L4u0SEkvQUTK8TtkoRrWrfKAafTq8cvjDvn3DPg8ECl77gqZHdfCvQxs0OAiLtvKduwRET2TxD7huO1z4RsZnvsKN11we6uO1siEihh7rIorkKuVi5RiIiUkhAXyPtOyO5+S3kFIiJSGlJDnJHjGvZmZs3N7A0zW2Nm2Wb2euxGn4hIoFSER6efA14E6gMNgJeA58sqKBGR/RXmR6dL8mDIM+6eE1ueBaqUZWAiIvsjzBVycaMsasdevmVm1wHjiD4ccjYwsYxjExEpsWQeZTGLaALedYl/LLDPic5vISISGEGceD5exY2yaFZegYiIlIYQ5+P4fzHEzI4CjqRA37G7P10WQYmI7C8L8a/qxTsf8k1AD6IJeSLwK2AGoIQsIoES5go53lEWg4DewCp3vxg4Bqix70NERMpfxOJfgibeLott7p5nZjlmVh3IBhqXYVwiIvslaScXKiDTzGoS/eWQWcCPwMdlFpWIyH5KCfEvhcY7/eafYi8fNrO3geqxn3ESEQmUID6BF69457J4b9drd1/m7l8V3CYiEhRJ24dsZlWAqkBdM6vFLw+IVAcalnFsIiIlFuICudguiz8CfyU6odCsAtu3AKPKKigRkf0VCfE45OK6LD4i+jt6Q929OXALMBeYRnQGOBGRQAnz5ELFJeQxwHZ3f8DMTgZuA54CNhH90VMRkUBJjVjcS9AU12WR4u7rY6/PBsa6+yvAK2Y2u2xDExEpuSBWvvEqrkJOMbNdSbs38H6BfXHPgyEiUl7CPEF9cUn1eWCama0FtgHTAcysJdFuCxGRQAlgno1bcdNvjoiNN64PvOPuHtsVAa4s6+BEREoqxA/qFd/t4O6f7GHb12UTjojIgQliV0S81A8sIklFCVlEJCDCm47D3d0iIrKb0nwwxMz6m9kiM1sS+6HnovsvM7M5ZjbbzGaY2ZEF9l0fO26RmfWLJ3ZVyCKSVEprPmQzSwFGA32BFcBMMxvv7vMLNHvO3R+OtR8A3AP0jyXmc4C2RKeeeNfMWrt77r7OqQpZRJJKpARLMboAS9x9qbvvAMYBpxds4O6bC6weAuwaiXY6MM7dt7v7t8CS2PvtkypkEUkqpXhTryGwvMD6CqBr0UZmNgS4GqgM9CpwbMERaiuIY4bMMk/I1Q5Wzi9rn792S6JDSHpH9B2a6BAqhG1fHPgkkiXpsjCzwcDgApvGunuJ5ulx99HAaDM7D/g7cGFJji9I2VJEkkpJ+mFjyXdvCTiLwr8d2ii2bW/GAQ/t57GA+pBFJMmYWdxLMWYCrcysmZlVJnqTbnyRc7UqsHoqsDj2ejxwjpkdZGbNgFbAZ8WdUBWyiCSV0upBdvccM7sCmASkAI+7+zwzGw5kuvt44Aoz6wPsBDYQ666ItXsRmA/kAEOKG2EBSsgikmRSSvFJPXefCEwssu3GAq//so9jRwAjSnI+JWQRSSohfnJaCVlEkouF+OFpJWQRSSqqkEVEAiLMvzqthCwiSUUVsohIQGg+ZBGRgIiENx8rIYtIctEoCxGRgAhxj4USsogkF1XIIiIBoT5kEZGA0CgLEZGACG86VkIWkSSjCllEJCDCm46VkEUk2YQ4Iyshi0hSUZeFiEhAhDcdKyGLSLIJcUZWQhaRpKIn9UREAiLEXchKyCKSXEKcj5WQRSS5WIhLZCVkEUkqIc7HSsgiklxCnI+VkEUkyYQ4Iyshi0hS0bC3APtw+gfc/u8R5OXmMfCMM/n9pYML7d+xYwfDrr+WBfPmUaNmTe64+14aNmzEzh07GH7LTcyfN5eIGddeP4zOXboCsHPHDm4bcSszZ35GJGJc+eer6HNKv0RcXuB9/tmHPDrqLvJyc+l76kDOOO/iQvtff/FZJk98lZSUFKrXqMWV195Eer0GCYo2PPoefwR3XTOIlEiEJ1/7iLuemFxo/x1/+y0nd24NQNUqlUmrfSj1T74WgNdH/Yku7Zry0RdLOeMvD5d77GVNfcgBlZuby79GDGfMI0+QkZHBeWcPokfPXrRo2TK/zauvvET16tV58+3JvDVxAiPvuYs77x7JKy+/BMArr73BunXrGHLZpTz3wstEIhEeGfswtWvX5o2Jk8jLy2PTpo2JusRAy83NZcx9t3PLnQ9SJy2Day47ny7Hd6dx0+b5bZq3asPdDz/LQVUO5q3XX+KpMfdxzU23JzDq4ItEjJHXncWpl48ia/VGZvznGt6cNoeFS1flt7n27v/mv778nO4c06ZR/vq9T79L1SqV+f0ZJ5Zr3OUlzAk5kugAytLcOV/RuPFhNGrcmEqVK9P/16cydcp7hdpMef99Bpw+EIC+p/Tjs08+xt1Z+s0SunSNVsR16tShWrVqzJs7F4DXXn2FSy79IwCRSIRatWqX41WFx+KFc6nfoBH1GjSiUqVKnNirH59+OLVQm6M7dOagKgcD0ObIo1m3JjsBkYZL56Oa8s3ytSzLWsfOnFxemvQ5p/Vot9f2Z/XvyItvz8pfn/rZ12zZur08Qk0IK8H/gqZECdnMqptZtbIKprRlr15Nvfr18tfTMzJYvXp14TbZq6lXrz4AqampHFqtGhs3bqB1m8OZNuV9cnJyWLFiOQvmz2P1qh/YvHkzAKMfuI+zBw1k6FV/Zt3ateV3USGyfu0a6qb/8vnXSUtn/dq9J9x3J77GsV1PKI/QQq1Beg1WrN6Qv561egMN02rssW2T+rU4rEEdps5cVF7hJZxZ/EvQxJWQzayzmc0BvgLmmtmXZtaxbENLrN/89gwyMupx3llncOe//8Ux7TsQSUkhNzeH1atW0b59B154+VXaHdOBu+/SV+wDNXXyBJYsms/Asy9IdChJ5cx+HXntvdnk5XmiQyk3VoIlaOKtkB8D/uTuTd39MGAI8MTeGpvZYDPLNLPMxx4ZWxpx7pf0jAxW/fBLv1r26tVkZGQUbpOewapVPwCQk5PDj1u2ULNmLVJTU7nmuht48b+vc9+oh9iyZQuHHdaUmjVrUeXgg+nd9xQATunXnwXz55ffRYVI7bpprM3+5fNftyab2nXTd2v35axPefnZx7hhxEgqVa5cniGG0srsTTTKqJW/3jCjFllrNu2x7aB+HXnx7czyCi0YQpyR403Iue4+fdeKu88AcvbW2N3Hunsnd+9UdFRDeWp71NF8//0yVqxYzs4dO3h74gS69+xVqE2Pnr0Y//qrAEx+ZxJdunbDzNi2bRs//fQTAB9/9CEpKSm0aNkSM6N7j57M/OxTAD795GNatGhRvhcWEq0Ob8sPWctZ/UMWO3fuZMb7k+hyfPdCbZYuXsiD94zghhEjqam++LhkzvuOlk3SOKxBHSqlpnBmv2OZMPWr3dq1bppBrepV+eTLbxMQZeJEzOJegsbci/8qY2YjgYOB5wEHzgZ+Bp4FcPfP93bszzkk9LvS9A+mcce//0VeXi6/GXgGl/7xckY/cB9t2x5Fj1692b59O8Ouu4aFCxZQvUYN7rjrXho1bkxW1gouH/x7IpEI6ekZ3HzrCBo0aAjAypVZDLvuWrZs2UytWrUZ/s/bqN8gcUO1vs3emrBzFyfzkxk8PvoucvPy6POrAZx5/h947vGHaNnmSLqc0J0b/3YZ3327hFq16wKQllGPYSNGJjjq3R176v8lOoRC+p14JHcOHURKxHjq9U+447FJ/OPyU/l8/vdMmDYHgGF//DVVDkrlH/ePL3Tsu4/9ldbNMjj04INYv2krl93yHO9+vCARl7GbbV+MOuAs+fWqn+LOOa3rVQ1UVo43IU+JvdzVuOBFuLv3Yi8SnZArgiAn5GQRtIScrEolIa8uQULOCFZC3uc4ZDO7Ovbyzdh/HVgDzHD3ivU9SERCIYjD2eJVXB9ytdhyaGypBnQC3jKzc8o4NhGREgvzsLd9VsjufsuetptZbeBdYFxZBCUisr8CmGfjtl+PTrv7egvzLNAikrTCnJr2KyGbWU9gQ7ENRUTKWYjzcbE39ebAbqMkagMrAT1SJSKBE+J8XGyFfFqRdQfWubvGWYlIMIU4Ixd3U++78gpERKQ0hHnYW1LPhywiFU+Y+5CTej5kEal4Ihb/Uhwz629mi8xsiZldt4f9J5vZ52aWY2aDiuzLNbPZsWV80WP3RBWyiCSZ0imRzSwFGA30BVYAM81svLsXnN7xe+AiYOge3mKbu7cvyTmVkEUkqZRil0UXYIm7L42+r40DTgfyE7K7L4vtyyuNE6rLQkSSSkmmQy44d3tsKThfcENgeYH1FbFt8aoSe89PzOw38RygCllEkkpJKmR3HwuU1a9oHObuWWbWHHjfzOa4+zf7OkAVsogkFTOLeylGFtC4wHqj2La4uHtW7L9LgalAh+KOUUIWkaRSir/gNBNoZWbNzKwycA4Q12gJM6tlZgfFXtcFTqBA3/PeKCGLSFIprek33T0HuAKYBCwAXnT3eWY23MwGRM9lnc1sBXAmMMbM5sUOPwLINLMvgSnAv4uMztgj9SGLSFIpzSf13H0iMLHIthsLvJ5JtCuj6HEfAUeX9HxKyCKSXEL8pJ4SsogklRDnYyVkEUkukRBPZqGELCJJJcT5WKMsRESCQhWyiCSVMFdkrKY2AAAEEElEQVTISsgiklQ0Qb2ISECoQhYRCQglZBGRgFCXhYhIQKhCFhEJiBDnYyVkEUkyIc7ISsgiklTC/Oi0uXuiYwgcMxsc+2kXKSP6jMuePuPw0aPTeza4+CZygPQZlz19xiGjhCwiEhBKyCIiAaGEvGfqdyt7+ozLnj7jkNFNPRGRgFCFLCISEEmfkM0s18xmm9k8M/vSzP5mZkl/3eXFzIbFPtuvYp9zVzP7q5lVjePYZWZWdw/bbzazoWUTcfIxs6ZmNrfINn2GIVQRHgzZ5u7tAcwsHXgOqA7clNCokoCZHQecBhzr7ttjybUy8ALwLPBTIuMTCZsKVSm6ezbRsZlXWFQVM3vCzOaY2Rdm1hPAzCaYWbvY6y/M7MbY6+FmdqmZ9TCzqWb2spktNLP/mIX48aD9Vx9Y6+7bAdx9LTAIaABMMbMpAGb2kJllxirpW4q8x7Wxz/8zM2tZ9ARm1sLM3jazWWY23cwOL+NrSiqxv6f3xb69zDWzLomOSfauQiVkAHdfCqQA6cCQ6CY/GjgXeMrMqgDTgZPMrAaQA5wQO/wk4IPY6w7AX4EjgeYF2lQk7wCNzexrM3vQzLq7+/3ASqCnu/eMtRvm7p2AdkD3Xf/YxWyKff6jgJF7OMdY4Ep37wgMBR4ss6tJXlVj3xL/BDye6GBk7ypcQi7iRKJfrXH3hcB3QGuiCflkokl2AnBorE+0mbsvih37mbuvcPc8YDbQtJxjTzh3/xHoSPRbxxrgBTO7aA9NzzKzz4EvgLZE/xHb5fkC/z2u4EFmdihwPPCSmc0GxhCtyqWwvQ2V2rX9eQB3/wCobmY1yyUqKbGK0IdciJk1B3KB7H00mwl0ApYCk4G6wKXArAJtthd4nUsF/CwB3D0XmApMNbM5wIUF95tZM6KVbWd332BmTwJVCr7FXl5DtGDYuOsegOzVOqBWkW21gW9jr4t+rhrrGlAVqkI2szTgYWCURwdgTwd+F9vXGmgCLHL3HcBy4Ezg41i7ofzSXSGAmbUxs1YFNrUn+i1jC1Attq06sBXYZGYZwK+KvM3ZBf77ccEd7r4Z+NbMzoydz8zsmNK9ivCLfVP5wcx6AZhZbaA/MCPW5OzY9hOJdhFtSkigUqyKUNUdHPu6W4lof/AzwD2xfQ8CD8Uquxzgol03qIgm4d7uvs3MpgONYtvkF4cCD8S+AucAS4h2X5wLvG1mK929p5l9ASwk+o/ch0Xeo5aZfUX0G8e5ezjH74j+Gf2d6J/hOODLMrmacLsAGG1mu/5u3+Lu38TuNf8c+zOoBFySqACleHpSTySJmdlUYKi7ZyY6FileheqyEBEJMlXIIiIBoQpZRCQglJBFRAJCCVlEJCCUkEVEAkIJWUQkIJSQRUQC4v8BvTJQJ8HquMAAAAAASUVORK5CYII=\n", 872 | "text/plain": [ 873 | "
" 874 | ] 875 | }, 876 | "metadata": { 877 | "needs_background": "light" 878 | }, 879 | "output_type": "display_data" 880 | } 881 | ], 882 | "source": [ 883 | "from sklearn.metrics import confusion_matrix\n", 884 | "\n", 885 | "cm = confusion_matrix(y_pred=y_pred, y_true=y_test)\n", 886 | " \n", 887 | "cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", 888 | "\n", 889 | "ax=sns.heatmap(cm, annot=True, xticklabels=['Down','Stable','Up'], yticklabels=['Down','Stable','Up'], cmap='Blues')\n", 890 | "ax.set_ylim(3.0, 0)\n", 891 | "plt.savefig('image')" 892 | ] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": 27, 897 | "metadata": {}, 898 | "outputs": [ 899 | { 900 | "name": "stdout", 901 | "output_type": "stream", 902 | "text": [ 903 | " precision recall f1-score support\n", 904 | "\n", 905 | " 0 0.42 0.63 0.50 61142\n", 906 | " 1 0.90 0.69 0.78 350499\n", 907 | " 2 0.36 0.71 0.48 53759\n", 908 | "\n", 909 | "avg / total 0.77 0.69 0.71 465400\n", 910 | "\n" 911 | ] 912 | } 913 | ], 914 | "source": [ 915 | "from sklearn.metrics import classification_report\n", 916 | "print(classification_report(y_test,y_pred))" 917 | ] 918 | }, 919 | { 920 | "cell_type": "code", 921 | "execution_count": 19, 922 | "metadata": {}, 923 | "outputs": [ 924 | { 925 | "data": { 926 | "text/plain": [ 927 | "array([1, 1, 1, ..., 1, 1, 1])" 928 | ] 929 | }, 930 | "execution_count": 19, 931 | "metadata": {}, 932 | "output_type": "execute_result" 933 | } 934 | ], 935 | "source": [ 936 | "y_pred" 937 | ] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "execution_count": null, 942 | "metadata": {}, 943 | "outputs": [], 944 | "source": [] 945 | } 946 | ], 947 | "metadata": { 948 | "kernelspec": { 949 | "display_name": "Python 3", 950 | "language": "python", 951 | "name": "python3" 952 | }, 953 | "language_info": { 954 | "codemirror_mode": { 955 | "name": "ipython", 956 | "version": 3 957 | }, 958 | "file_extension": ".py", 959 | "mimetype": "text/x-python", 960 | "name": "python", 961 | "nbconvert_exporter": "python", 962 | "pygments_lexer": "ipython3", 963 | "version": "3.5.3" 964 | } 965 | }, 966 | "nbformat": 4, 967 | "nbformat_minor": 4 968 | } 969 | -------------------------------------------------------------------------------- /paper/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[11pt,a4paper,english]{article} 2 | \usepackage[T1]{fontenc} 3 | \usepackage[utf8]{inputenc} 4 | \usepackage{babel} 5 | \usepackage{blindtext} 6 | 7 | \title{Deep learning orderbook models for digital assets} 8 | \author{% 9 | Mattijs de-Paepe \\ 10 | \small Cambridge University\\ 11 | \and 12 | Vivek \\ 13 | \small Cambridge University\\ 14 | \and 15 | Rakshit Jha \\ 16 | \small Cambridge University\\ 17 | \and 18 | Ruizhou \\ 19 | \small Cambridge University\\ 20 | \and 21 | Samuel Holt \\ 22 | \small Fifthrow Technologies\\ 23 | \and 24 | James West\\ 25 | \small Globe Research\\ 26 | } 27 | 28 | \begin{document} 29 | \maketitle 30 | 31 | \begin{abstract} 32 | Digital assets (cryptocurrencies) have a far more relaxed regulatory oversight than traditional financial instruments, which means that their orderbooks have a variety of complex phenomena that don’t occur in traditional markets. This is an investigation on how to build a deep learning model to forecast price action based on order books for cryptocurrencies. 33 | \end{abstract} 34 | \newpage 35 | 36 | \tableofcontents\newpage 37 | \section{Introduction} 38 | Digital assets (cryptocurrencies) have a far more relaxed regulatory oversight than traditional financial instruments, which means that their orderbooks have a variety of complex phenomena that don’t occur in traditional markets. 39 | This is an investigation on how to build a deep learning model to forecast price action based on order books for cryptocurrencies. 40 | Firstly implementing prior work, and investigating how to improve such models. 41 | 42 | 43 | Extension, determine features that are relevant and are the most important influencing factors for this model. 44 | \section{Data} 45 | \section{Methodology} 46 | \section{Empirical Study} 47 | \section{Conclusion} 48 | \end{document} 49 | --------------------------------------------------------------------------------