├── .gitignore ├── .whitesource ├── LICENSE ├── README.md ├── notebook ├── [Experiment] Long Short Term Memory - Multi-dimensional - Validation.ipynb ├── [Experiment] Long Short Term Memory - Sanity Check - 1 feature - Absolute Return.ipynb ├── [Experiment] Long Short Term Memory - Sanity Check - 1 feature.ipynb ├── [Experiment] Long Short Term Memory - Sanity Check - Absolute Return.ipynb ├── [Experiment] Long Short Term Memory - Sanity Check.ipynb ├── [Experiment] Long Short Term Memory - Stateful vs Stateless - Multi-dimensional.ipynb ├── [Experiment] Long Short Term Memory - Stateful vs Stateless.ipynb ├── [Experiment] Long Short Term Memory - Training - 1 feature - Absolute Return.ipynb ├── [Experiment] Long Short Term Memory - Training - 1 feature.ipynb ├── [Experiment] Long Short Term Memory - Training - 1 ticker - Absolute Return.ipynb ├── [Experiment] Long Short Term Memory - Training - Absolute Return.ipynb ├── [Experiment] Long Short Term Memory - Training.ipynb ├── [Official] Backtesting - Upper bound.ipynb ├── [Official] Backtesting LSTM - 1 feature - Absolute Return.ipynb ├── [Official] Backtesting LSTM - 1 feature - Dropout.ipynb ├── [Official] Backtesting LSTM - 1 feature.ipynb ├── [Official] Backtesting LSTM - Absolute Return.ipynb ├── [Official] Backtesting LSTM.ipynb ├── [Official] Backtesting.ipynb ├── [Official] Benchmark Machine Learning.ipynb ├── [Official] Data Preparation - Frankfurt Stock Exchange.ipynb ├── [Official] Data Preparation and Visualization.ipynb ├── [Official] Long Short Term Memory - 1 feature.ipynb ├── [Official] Long Short Term Memory - Price.ipynb ├── [Official] Long Short Term Memory.ipynb ├── [Official] Trading Algorithm.ipynb ├── [Official] [Trading Algorithm - Baseline 1] Equally weighted portfolio.ipynb ├── [Trading Algorithm - Baseline 1] Random strategy.ipynb ├── [Trading Algorithm - Baseline 1] Statistics.ipynb └── best_model.h5 └── src ├── calculate_returns.py ├── divide_period.py ├── make_dataframe.py ├── make_dataset.py ├── random_forest.py ├── random_strategy.py ├── train.py ├── train_one_ticker.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | data/* 107 | # !data/dowjones_calculated 108 | # !data/dowjones_calculated/* 109 | model/* 110 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "scanSettings": { 3 | "baseBranches": [] 4 | }, 5 | "checkRunSettings": { 6 | "vulnerableCheckRunConclusionLevel": "failure", 7 | "displayMode": "diff" 8 | }, 9 | "issueSettings": { 10 | "minSeverityLevel": "LOW" 11 | } 12 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 tqa236 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Algorithmic trading using LSTM 2 | Reproduce the result of the paper "Deep Learning with Long Short-Term Memory Networks for Financial Market Prediction" 3 | -------------------------------------------------------------------------------- /notebook/[Experiment] Long Short Term Memory - Multi-dimensional - Validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-03-10T15:15:02.021024Z", 9 | "start_time": "2019-03-10T15:15:01.029972Z" 10 | }, 11 | "scrolled": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "# List all device\n", 16 | "from tensorflow.python.client import device_lib\n", 17 | "# print(device_lib.list_local_devices())" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2019-03-10T15:15:02.340559Z", 26 | "start_time": "2019-03-10T15:15:02.022700Z" 27 | }, 28 | "scrolled": true 29 | }, 30 | "outputs": [ 31 | { 32 | "name": "stderr", 33 | "output_type": "stream", 34 | "text": [ 35 | "Using TensorFlow backend.\n" 36 | ] 37 | }, 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "['/job:localhost/replica:0/task:0/device:GPU:0']" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "# Check available GPU\n", 51 | "from keras import backend as K\n", 52 | "K.tensorflow_backend._get_available_gpus()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2019-03-10T15:15:02.345722Z", 61 | "start_time": "2019-03-10T15:15:02.342072Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "import os\n", 67 | "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n", 68 | "# The GPU id to use, usually either \"0\" or \"1\";\n", 69 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"; " 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2019-03-10T15:15:02.747170Z", 78 | "start_time": "2019-03-10T15:15:02.347906Z" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "# Importing the libraries\n", 84 | "import numpy as np\n", 85 | "import pandas as pd\n", 86 | "from keras.models import Sequential\n", 87 | "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda, GRU, BatchNormalization, Bidirectional\n", 88 | "from keras.preprocessing.sequence import TimeseriesGenerator\n", 89 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 90 | "from keras.activations import softmax\n", 91 | "from keras.optimizers import SGD, RMSprop\n", 92 | "import math\n", 93 | "import pickle\n", 94 | "import matplotlib.pyplot as plt\n", 95 | "from keras.utils import to_categorical\n", 96 | "from sklearn.preprocessing import StandardScaler" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "ExecuteTime": { 104 | "end_time": "2019-03-10T15:15:02.753249Z", 105 | "start_time": "2019-03-10T15:15:02.749539Z" 106 | } 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "def calculate_class(returns):\n", 111 | " \"\"\"Find the class for each LSTM sequence based on the median returns.\"\"\"\n", 112 | " median_returns = returns.median(axis=1)\n", 113 | " labels = returns.iloc[:, :].apply(lambda x: np.where\n", 114 | " (x >= median_returns, 1, 0), axis=0)\n", 115 | " return labels" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 6, 121 | "metadata": { 122 | "ExecuteTime": { 123 | "end_time": "2019-03-10T15:15:02.840157Z", 124 | "start_time": "2019-03-10T15:15:02.754637Z" 125 | } 126 | }, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "(750, 31)" 132 | ] 133 | }, 134 | "execution_count": 6, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "np.random.seed(2)\n", 141 | "feature = 31\n", 142 | "x_train = np.random.rand(750, feature)\n", 143 | "x_train.shape" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 7, 149 | "metadata": { 150 | "ExecuteTime": { 151 | "end_time": "2019-03-10T15:15:02.913412Z", 152 | "start_time": "2019-03-10T15:15:02.845965Z" 153 | } 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "(750, 31, 2)" 160 | ] 161 | }, 162 | "execution_count": 7, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "y_train = to_categorical(calculate_class(pd.DataFrame(x_train)).values, 2)\n", 169 | "y_train.shape" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 8, 175 | "metadata": { 176 | "ExecuteTime": { 177 | "end_time": "2019-03-10T15:15:02.977431Z", 178 | "start_time": "2019-03-10T15:15:02.914749Z" 179 | } 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "x_test = np.random.rand(750, feature)\n", 184 | "y_test = to_categorical(calculate_class(pd.DataFrame(x_test)).values, 2)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 9, 190 | "metadata": { 191 | "ExecuteTime": { 192 | "end_time": "2019-03-10T15:15:03.040801Z", 193 | "start_time": "2019-03-10T15:15:02.978941Z" 194 | } 195 | }, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/plain": [ 200 | "array([0.4359949 , 0.02592623, 0.54966248, 0.43532239, 0.4203678 ,\n", 201 | " 0.33033482, 0.20464863, 0.61927097, 0.29965467, 0.26682728,\n", 202 | " 0.62113383, 0.52914209, 0.13457995, 0.51357812, 0.18443987,\n", 203 | " 0.78533515, 0.85397529, 0.49423684, 0.84656149, 0.07964548,\n", 204 | " 0.50524609, 0.0652865 , 0.42812233, 0.09653092, 0.12715997,\n", 205 | " 0.59674531, 0.226012 , 0.10694568, 0.22030621, 0.34982629,\n", 206 | " 0.46778748])" 207 | ] 208 | }, 209 | "execution_count": 9, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": [ 215 | "x_train[0]" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 10, 221 | "metadata": { 222 | "ExecuteTime": { 223 | "end_time": "2019-03-10T15:15:03.110679Z", 224 | "start_time": "2019-03-10T15:15:03.044348Z" 225 | } 226 | }, 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "array([0.14867022, 0.13899823, 0.10494235, 0.87939913, 0.18732867,\n", 232 | " 0.22262717, 0.7317139 , 0.77066332, 0.10318812, 0.38814662,\n", 233 | " 0.56174004, 0.35915058, 0.41018272, 0.8014471 , 0.81498221,\n", 234 | " 0.87985186, 0.85469715, 0.81734218, 0.66587059, 0.85641202,\n", 235 | " 0.54491559, 0.67412301, 0.34791387, 0.87840982, 0.60886867,\n", 236 | " 0.4042137 , 0.12943719, 0.66850456, 0.93534669, 0.88344742,\n", 237 | " 0.57987801])" 238 | ] 239 | }, 240 | "execution_count": 10, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "x_test[0]" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 11, 252 | "metadata": { 253 | "ExecuteTime": { 254 | "end_time": "2019-03-10T15:15:03.176886Z", 255 | "start_time": "2019-03-10T15:15:03.112562Z" 256 | } 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "timestep = 240" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 12, 266 | "metadata": { 267 | "ExecuteTime": { 268 | "end_time": "2019-03-10T15:15:03.290749Z", 269 | "start_time": "2019-03-10T15:15:03.179221Z" 270 | } 271 | }, 272 | "outputs": [ 273 | { 274 | "name": "stdout", 275 | "output_type": "stream", 276 | "text": [ 277 | "x shape: (509, 240, 31)\n", 278 | "y shape: (509, 31, 2)\n" 279 | ] 280 | } 281 | ], 282 | "source": [ 283 | "x_series = [x_train[i:i+timestep, :] for i in range(1, x_train.shape[0] - timestep)]\n", 284 | "y_series = [y_train[i+timestep] for i in range(y_train.shape[0] - timestep - 1)]\n", 285 | "x = np.array(x_series)\n", 286 | "y = np.array(y_series)\n", 287 | "print(f\"x shape: {x.shape}\")\n", 288 | "print(f\"y shape: {y.shape}\")" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 28, 294 | "metadata": { 295 | "ExecuteTime": { 296 | "end_time": "2019-03-10T15:25:47.817681Z", 297 | "start_time": "2019-03-10T15:25:47.779716Z" 298 | } 299 | }, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "x1 shape: (509, 240, 31)\n", 306 | "y1 shape: (509, 31, 2)\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "x_series1 = [x_test[i:i+timestep, :] for i in range(1, x_test.shape[0] - timestep)]\n", 312 | "y_series1 = [y_test[i+timestep] for i in range(y_test.shape[0] - timestep - 1)]\n", 313 | "x1 = np.array(x_series1)\n", 314 | "y1 = np.array(y_series1)\n", 315 | "print(f\"x1 shape: {x1.shape}\")\n", 316 | "print(f\"y1 shape: {y1.shape}\")" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 24, 322 | "metadata": { 323 | "ExecuteTime": { 324 | "end_time": "2019-03-10T15:16:57.614508Z", 325 | "start_time": "2019-03-10T15:16:57.365758Z" 326 | } 327 | }, 328 | "outputs": [ 329 | { 330 | "name": "stdout", 331 | "output_type": "stream", 332 | "text": [ 333 | "_________________________________________________________________\n", 334 | "Layer (type) Output Shape Param # \n", 335 | "=================================================================\n", 336 | "lstm_2 (LSTM) (None, 25) 5700 \n", 337 | "_________________________________________________________________\n", 338 | "dense_3 (Dense) (None, 62) 1612 \n", 339 | "_________________________________________________________________\n", 340 | "reshape_2 (Reshape) (None, 31, 2) 0 \n", 341 | "_________________________________________________________________\n", 342 | "dense_4 (Dense) (None, 31, 2) 6 \n", 343 | "=================================================================\n", 344 | "Total params: 7,318\n", 345 | "Trainable params: 7,318\n", 346 | "Non-trainable params: 0\n", 347 | "_________________________________________________________________\n" 348 | ] 349 | } 350 | ], 351 | "source": [ 352 | "regressor = Sequential()\n", 353 | "regressor.add(LSTM(units=25, input_shape=(timestep, feature)))\n", 354 | "regressor.add(Dense(feature * 2, activation='relu'))\n", 355 | "regressor.add(Reshape((feature, 2)))\n", 356 | "# regressor.add(Lambda(lambda x: softmax(x, axis=-1)))\n", 357 | "regressor.add(Dense(2, activation='softmax'))\n", 358 | "regressor.compile(loss='binary_crossentropy',\n", 359 | " optimizer='rmsprop',\n", 360 | " metrics=['accuracy'])\n", 361 | "regressor.summary()" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 27, 367 | "metadata": { 368 | "ExecuteTime": { 369 | "end_time": "2019-03-10T15:21:12.670058Z", 370 | "start_time": "2019-03-10T15:20:59.345962Z" 371 | } 372 | }, 373 | "outputs": [ 374 | { 375 | "name": "stdout", 376 | "output_type": "stream", 377 | "text": [ 378 | "Train on 509 samples, validate on 509 samples\n", 379 | "Epoch 1/100\n", 380 | "509/509 [==============================] - 4s 7ms/step - loss: 0.4044 - acc: 0.8138 - val_loss: 0.4569 - val_acc: 0.7725\n", 381 | "Epoch 2/100\n", 382 | "509/509 [==============================] - 4s 7ms/step - loss: 0.4009 - acc: 0.8156 - val_loss: 0.4536 - val_acc: 0.7772\n", 383 | "Epoch 3/100\n", 384 | "509/509 [==============================] - 4s 7ms/step - loss: 0.3970 - acc: 0.8186 - val_loss: 0.4505 - val_acc: 0.7782\n", 385 | "Epoch 4/100\n", 386 | "384/509 [=====================>........] - ETA: 0s - loss: 0.3954 - acc: 0.8165" 387 | ] 388 | }, 389 | { 390 | "ename": "KeyboardInterrupt", 391 | "evalue": "", 392 | "output_type": "error", 393 | "traceback": [ 394 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 395 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 396 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# result = regressor.fit_generator(train_gen, steps_per_epoch=len(train_gen), epochs=1000)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m result = regressor.fit(x, y, epochs=100, validation_data=(x1, y1), callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=10),\n\u001b[0;32m----> 3\u001b[0;31m ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)])\n\u001b[0m", 397 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[1;32m 1037\u001b[0m \u001b[0minitial_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minitial_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1038\u001b[0m \u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1039\u001b[0;31m validation_steps=validation_steps)\n\u001b[0m\u001b[1;32m 1040\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1041\u001b[0m def evaluate(self, x=None, y=None,\n", 398 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training_arrays.py\u001b[0m in \u001b[0;36mfit_loop\u001b[0;34m(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 199\u001b[0;31m \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 200\u001b[0m \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout_labels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 399 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 2713\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_legacy_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2714\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2715\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2716\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2717\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpy_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 400 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 2673\u001b[0m \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_metadata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2674\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2675\u001b[0;31m \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2676\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfetched\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 401 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1437\u001b[0m ret = tf_session.TF_SessionRunCallable(\n\u001b[1;32m 1438\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1439\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 1440\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1441\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 402 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 403 | ] 404 | } 405 | ], 406 | "source": [ 407 | "# result = regressor.fit_generator(train_gen, steps_per_epoch=len(train_gen), epochs=1000)\n", 408 | "result = regressor.fit(x, y, epochs=100, validation_data=(x1, y1), callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=10),\n", 409 | " ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)])" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 26, 415 | "metadata": { 416 | "ExecuteTime": { 417 | "end_time": "2019-03-10T15:20:39.149785Z", 418 | "start_time": "2019-03-10T15:20:39.137993Z" 419 | } 420 | }, 421 | "outputs": [ 422 | { 423 | "ename": "NameError", 424 | "evalue": "name 'result' is not defined", 425 | "output_type": "error", 426 | "traceback": [ 427 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 428 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 429 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"acc\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"val_acc\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 430 | "\u001b[0;31mNameError\u001b[0m: name 'result' is not defined" 431 | ] 432 | } 433 | ], 434 | "source": [ 435 | "plt.plot(result.history[\"acc\"])\n", 436 | "plt.plot(result.history[\"val_acc\"])" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": { 443 | "ExecuteTime": { 444 | "end_time": "2019-03-10T15:15:05.223053Z", 445 | "start_time": "2019-03-10T15:15:01.084Z" 446 | } 447 | }, 448 | "outputs": [], 449 | "source": [ 450 | "plt.plot(result.history[\"loss\"])\n", 451 | "plt.plot(result.history[\"val_loss\"])" 452 | ] 453 | } 454 | ], 455 | "metadata": { 456 | "kernelspec": { 457 | "display_name": "projet_S5", 458 | "language": "python", 459 | "name": "projet_s5" 460 | }, 461 | "language_info": { 462 | "codemirror_mode": { 463 | "name": "ipython", 464 | "version": 3 465 | }, 466 | "file_extension": ".py", 467 | "mimetype": "text/x-python", 468 | "name": "python", 469 | "nbconvert_exporter": "python", 470 | "pygments_lexer": "ipython3", 471 | "version": "3.6.8" 472 | }, 473 | "toc": { 474 | "base_numbering": 1, 475 | "nav_menu": {}, 476 | "number_sections": true, 477 | "sideBar": true, 478 | "skip_h1_title": false, 479 | "title_cell": "Table of Contents", 480 | "title_sidebar": "Contents", 481 | "toc_cell": false, 482 | "toc_position": {}, 483 | "toc_section_display": true, 484 | "toc_window_display": false 485 | } 486 | }, 487 | "nbformat": 4, 488 | "nbformat_minor": 2 489 | } 490 | -------------------------------------------------------------------------------- /notebook/[Official] Backtesting - Upper bound.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-03-04T22:20:00.623019Z", 9 | "start_time": "2019-03-04T22:20:00.432075Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import pandas as pd" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "ExecuteTime": { 22 | "end_time": "2019-03-04T22:20:01.712331Z", 23 | "start_time": "2019-03-04T22:20:00.624514Z" 24 | } 25 | }, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | "
NameAABAAAPLAMZNAXPBACATCSCOCVXDISGE...MSFTNKEPFEPGTRVUNHUTXVZWMTXOM
Date
2006-01-0340.9110.6847.5852.5870.4457.8017.4559.0824.4035.37...26.8410.7423.7858.7845.9961.7356.5330.3846.2358.47
2006-01-0440.9710.7147.2551.9571.1759.2717.8558.9123.9935.32...26.9710.6924.5558.8946.5061.8856.1931.2746.3258.57
2006-01-0541.5310.6347.6552.5070.3359.2718.3558.1924.4135.23...26.9910.7624.5858.7046.9561.6955.9831.6345.6958.28
2006-01-0643.2110.9047.8752.6869.3560.4518.7759.2524.7435.47...26.9110.7224.8558.6447.2162.9056.1631.3545.8859.43
2006-01-0943.4210.8647.0853.9968.7761.5519.0658.9525.0035.38...26.8610.8824.8559.0847.2361.4056.8031.4845.7159.40
\n", 218 | "

5 rows × 31 columns

\n", 219 | "
" 220 | ], 221 | "text/plain": [ 222 | "Name AABA AAPL AMZN AXP BA CAT CSCO CVX DIS \\\n", 223 | "Date \n", 224 | "2006-01-03 40.91 10.68 47.58 52.58 70.44 57.80 17.45 59.08 24.40 \n", 225 | "2006-01-04 40.97 10.71 47.25 51.95 71.17 59.27 17.85 58.91 23.99 \n", 226 | "2006-01-05 41.53 10.63 47.65 52.50 70.33 59.27 18.35 58.19 24.41 \n", 227 | "2006-01-06 43.21 10.90 47.87 52.68 69.35 60.45 18.77 59.25 24.74 \n", 228 | "2006-01-09 43.42 10.86 47.08 53.99 68.77 61.55 19.06 58.95 25.00 \n", 229 | "\n", 230 | "Name GE ... MSFT NKE PFE PG TRV UNH UTX \\\n", 231 | "Date ... \n", 232 | "2006-01-03 35.37 ... 26.84 10.74 23.78 58.78 45.99 61.73 56.53 \n", 233 | "2006-01-04 35.32 ... 26.97 10.69 24.55 58.89 46.50 61.88 56.19 \n", 234 | "2006-01-05 35.23 ... 26.99 10.76 24.58 58.70 46.95 61.69 55.98 \n", 235 | "2006-01-06 35.47 ... 26.91 10.72 24.85 58.64 47.21 62.90 56.16 \n", 236 | "2006-01-09 35.38 ... 26.86 10.88 24.85 59.08 47.23 61.40 56.80 \n", 237 | "\n", 238 | "Name VZ WMT XOM \n", 239 | "Date \n", 240 | "2006-01-03 30.38 46.23 58.47 \n", 241 | "2006-01-04 31.27 46.32 58.57 \n", 242 | "2006-01-05 31.63 45.69 58.28 \n", 243 | "2006-01-06 31.35 45.88 59.43 \n", 244 | "2006-01-09 31.48 45.71 59.40 \n", 245 | "\n", 246 | "[5 rows x 31 columns]" 247 | ] 248 | }, 249 | "execution_count": 2, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n", 256 | "stocks = stocks[[\"Close\", \"Name\"]]\n", 257 | "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n", 258 | "stocks.head()" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 3, 264 | "metadata": { 265 | "ExecuteTime": { 266 | "end_time": "2019-03-04T22:20:01.755068Z", 267 | "start_time": "2019-03-04T22:20:01.713442Z" 268 | } 269 | }, 270 | "outputs": [ 271 | { 272 | "data": { 273 | "text/html": [ 274 | "
\n", 275 | "\n", 288 | "\n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | "
NameAABAAAPLAMZNAXPBACATCSCOCVXDISGE...MSFTNKEPFEPGTRVUNHUTXVZWMTXOM
Date
2006-01-040.0014670.002809-0.006936-0.0119820.0103630.0254330.022923-0.002877-0.016803-0.001414...0.004844-0.0046550.0323800.0018710.0110890.002430-0.0060150.0292960.0019470.001710
2006-01-050.013669-0.0074700.0084660.010587-0.0118030.0000000.028011-0.0122220.017507-0.002548...0.0007420.0065480.001222-0.0032260.009677-0.003070-0.0037370.011513-0.013601-0.004951
2006-01-060.0404530.0254000.0046170.003429-0.0139340.0199090.0228880.0182160.0135190.006812...-0.002964-0.0037170.010985-0.0010220.0055380.0196140.003215-0.0088520.0041580.019732
2006-01-090.004860-0.003670-0.0165030.024867-0.0083630.0181970.015450-0.0050630.010509-0.002537...-0.0018580.0149250.0000000.0075030.000424-0.0238470.0113960.004147-0.003705-0.000505
2006-01-10-0.0101340.063536-0.030374-0.0029640.004799-0.004062-0.0047220.0049190.012800-0.005370...0.0052120.001838-0.016499-0.003893-0.0071990.0197070.0007040.0041300.0032820.007744
\n", 462 | "

5 rows × 31 columns

\n", 463 | "
" 464 | ], 465 | "text/plain": [ 466 | "Name AABA AAPL AMZN AXP BA CAT \\\n", 467 | "Date \n", 468 | "2006-01-04 0.001467 0.002809 -0.006936 -0.011982 0.010363 0.025433 \n", 469 | "2006-01-05 0.013669 -0.007470 0.008466 0.010587 -0.011803 0.000000 \n", 470 | "2006-01-06 0.040453 0.025400 0.004617 0.003429 -0.013934 0.019909 \n", 471 | "2006-01-09 0.004860 -0.003670 -0.016503 0.024867 -0.008363 0.018197 \n", 472 | "2006-01-10 -0.010134 0.063536 -0.030374 -0.002964 0.004799 -0.004062 \n", 473 | "\n", 474 | "Name CSCO CVX DIS GE ... MSFT \\\n", 475 | "Date ... \n", 476 | "2006-01-04 0.022923 -0.002877 -0.016803 -0.001414 ... 0.004844 \n", 477 | "2006-01-05 0.028011 -0.012222 0.017507 -0.002548 ... 0.000742 \n", 478 | "2006-01-06 0.022888 0.018216 0.013519 0.006812 ... -0.002964 \n", 479 | "2006-01-09 0.015450 -0.005063 0.010509 -0.002537 ... -0.001858 \n", 480 | "2006-01-10 -0.004722 0.004919 0.012800 -0.005370 ... 0.005212 \n", 481 | "\n", 482 | "Name NKE PFE PG TRV UNH UTX \\\n", 483 | "Date \n", 484 | "2006-01-04 -0.004655 0.032380 0.001871 0.011089 0.002430 -0.006015 \n", 485 | "2006-01-05 0.006548 0.001222 -0.003226 0.009677 -0.003070 -0.003737 \n", 486 | "2006-01-06 -0.003717 0.010985 -0.001022 0.005538 0.019614 0.003215 \n", 487 | "2006-01-09 0.014925 0.000000 0.007503 0.000424 -0.023847 0.011396 \n", 488 | "2006-01-10 0.001838 -0.016499 -0.003893 -0.007199 0.019707 0.000704 \n", 489 | "\n", 490 | "Name VZ WMT XOM \n", 491 | "Date \n", 492 | "2006-01-04 0.029296 0.001947 0.001710 \n", 493 | "2006-01-05 0.011513 -0.013601 -0.004951 \n", 494 | "2006-01-06 -0.008852 0.004158 0.019732 \n", 495 | "2006-01-09 0.004147 -0.003705 -0.000505 \n", 496 | "2006-01-10 0.004130 0.003282 0.007744 \n", 497 | "\n", 498 | "[5 rows x 31 columns]" 499 | ] 500 | }, 501 | "execution_count": 3, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)\n", 508 | "real_returns = real_returns.dropna()\n", 509 | "real_returns.head()" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 4, 515 | "metadata": { 516 | "ExecuteTime": { 517 | "end_time": "2019-03-04T22:20:01.793446Z", 518 | "start_time": "2019-03-04T22:20:01.757070Z" 519 | } 520 | }, 521 | "outputs": [], 522 | "source": [ 523 | "labels = pd.read_csv(\"../data/dowjones_calculated/labels.csv\", index_col='Date', parse_dates=['Date'])" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 5, 529 | "metadata": { 530 | "ExecuteTime": { 531 | "end_time": "2019-03-04T22:20:01.883028Z", 532 | "start_time": "2019-03-04T22:20:01.795329Z" 533 | } 534 | }, 535 | "outputs": [ 536 | { 537 | "data": { 538 | "text/plain": [ 539 | "(3015, 31)" 540 | ] 541 | }, 542 | "execution_count": 5, 543 | "metadata": {}, 544 | "output_type": "execute_result" 545 | } 546 | ], 547 | "source": [ 548 | "labels.shape" 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": 6, 554 | "metadata": { 555 | "ExecuteTime": { 556 | "end_time": "2019-03-04T22:20:01.951594Z", 557 | "start_time": "2019-03-04T22:20:01.884435Z" 558 | } 559 | }, 560 | "outputs": [], 561 | "source": [ 562 | "k = 10" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": 7, 568 | "metadata": { 569 | "ExecuteTime": { 570 | "end_time": "2019-03-04T22:20:02.021016Z", 571 | "start_time": "2019-03-04T22:20:01.955397Z" 572 | } 573 | }, 574 | "outputs": [], 575 | "source": [ 576 | "final_returns = real_returns[750:3000].mul(labels[750:3000])" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": 8, 582 | "metadata": { 583 | "ExecuteTime": { 584 | "end_time": "2019-03-04T22:20:02.090299Z", 585 | "start_time": "2019-03-04T22:20:02.024960Z" 586 | } 587 | }, 588 | "outputs": [ 589 | { 590 | "data": { 591 | "text/plain": [ 592 | "42347.751327396145" 593 | ] 594 | }, 595 | "execution_count": 8, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "(final_returns + 1).product().sum()/(2 * k)" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 9, 607 | "metadata": { 608 | "ExecuteTime": { 609 | "end_time": "2019-03-04T22:20:02.156823Z", 610 | "start_time": "2019-03-04T22:20:02.092813Z" 611 | } 612 | }, 613 | "outputs": [ 614 | { 615 | "data": { 616 | "text/plain": [ 617 | "930673.1112343696" 618 | ] 619 | }, 620 | "execution_count": 9, 621 | "metadata": {}, 622 | "output_type": "execute_result" 623 | } 624 | ], 625 | "source": [ 626 | "(1 + final_returns.sum(axis = 1)/(2 * k)).product()" 627 | ] 628 | } 629 | ], 630 | "metadata": { 631 | "kernelspec": { 632 | "display_name": "projet_S5", 633 | "language": "python", 634 | "name": "projet_s5" 635 | }, 636 | "language_info": { 637 | "codemirror_mode": { 638 | "name": "ipython", 639 | "version": 3 640 | }, 641 | "file_extension": ".py", 642 | "mimetype": "text/x-python", 643 | "name": "python", 644 | "nbconvert_exporter": "python", 645 | "pygments_lexer": "ipython3", 646 | "version": "3.6.8" 647 | }, 648 | "toc": { 649 | "base_numbering": 1, 650 | "nav_menu": {}, 651 | "number_sections": true, 652 | "sideBar": true, 653 | "skip_h1_title": false, 654 | "title_cell": "Table of Contents", 655 | "title_sidebar": "Contents", 656 | "toc_cell": false, 657 | "toc_position": {}, 658 | "toc_section_display": true, 659 | "toc_window_display": false 660 | } 661 | }, 662 | "nbformat": 4, 663 | "nbformat_minor": 2 664 | } 665 | -------------------------------------------------------------------------------- /notebook/[Official] Backtesting LSTM - 1 feature - Absolute Return.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-03-10T19:03:30.208741Z", 9 | "start_time": "2019-03-10T19:03:29.088794Z" 10 | }, 11 | "scrolled": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "# List all device\n", 16 | "from tensorflow.python.client import device_lib\n", 17 | "# print(device_lib.list_local_devices())" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2019-03-10T19:03:30.254928Z", 26 | "start_time": "2019-03-10T19:03:30.210234Z" 27 | }, 28 | "scrolled": true 29 | }, 30 | "outputs": [ 31 | { 32 | "name": "stderr", 33 | "output_type": "stream", 34 | "text": [ 35 | "Using TensorFlow backend.\n" 36 | ] 37 | }, 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "[]" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "# Check available GPU\n", 51 | "from keras import backend as K\n", 52 | "K.tensorflow_backend._get_available_gpus()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2019-03-10T19:03:30.317634Z", 61 | "start_time": "2019-03-10T19:03:30.257471Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "import os\n", 67 | "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n", 68 | "# The GPU id to use, usually either \"0\" or \"1\";\n", 69 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"; " 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2019-03-10T19:03:30.598527Z", 78 | "start_time": "2019-03-10T19:03:30.319606Z" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "# Importing the libraries\n", 84 | "import numpy as np\n", 85 | "import pandas as pd\n", 86 | "from keras.models import Sequential\n", 87 | "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda\n", 88 | "from keras.preprocessing.sequence import TimeseriesGenerator\n", 89 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 90 | "from keras.activations import softmax\n", 91 | "from keras.optimizers import SGD\n", 92 | "from keras.models import load_model\n", 93 | "from keras.utils import to_categorical\n", 94 | "import math\n", 95 | "import pickle\n", 96 | "from sklearn.preprocessing import StandardScaler\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "ExecuteTime": { 104 | "end_time": "2019-03-10T19:03:30.665916Z", 105 | "start_time": "2019-03-10T19:03:30.600628Z" 106 | } 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "index = \"dowjones\"\n", 111 | "# index = \"frankfurt\"\n", 112 | "with open(f\"../data/{index}_calculated/absolute_periods750_250_240.txt\", \"rb\") as fp: # Unpickling\n", 113 | " dataset = pickle.load(fp)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": { 120 | "ExecuteTime": { 121 | "end_time": "2019-03-10T19:03:30.674429Z", 122 | "start_time": "2019-03-10T19:03:30.667818Z" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "timestep = 240\n", 128 | "feature = 31" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 7, 134 | "metadata": { 135 | "ExecuteTime": { 136 | "end_time": "2019-03-10T19:03:30.747804Z", 137 | "start_time": "2019-03-10T19:03:30.675703Z" 138 | } 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "def long_short_postion(returns, k):\n", 143 | " position_ = np.copy(returns)\n", 144 | " short = np.argpartition(position_, k)[:k]\n", 145 | " neutral = np.argpartition(position_, len(position_) - k)[:(len(position_) - k)]\n", 146 | " position_[:] = 1\n", 147 | " position_[neutral] = 0\n", 148 | " position_[short] = -1\n", 149 | " return position_" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 8, 155 | "metadata": { 156 | "ExecuteTime": { 157 | "end_time": "2019-03-10T19:04:14.940189Z", 158 | "start_time": "2019-03-10T19:03:30.750019Z" 159 | } 160 | }, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "0.5104516129032258\n", 167 | "0.48812903225806453\n", 168 | "0.5107096774193548\n", 169 | "0.5192258064516129\n", 170 | "0.5370322580645162\n", 171 | "0.5296774193548387\n", 172 | "0.5021935483870967\n", 173 | "0.5174193548387097\n", 174 | "0.4967741935483871\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "positions = []\n", 180 | "for i in range(len(dataset[0])):\n", 181 | " model_period = f\"../model/LSTM/{index}2_absolute__1feature_period{i}.h5\"\n", 182 | " regressor = load_model(model_period, custom_objects={\"softmax\": softmax})\n", 183 | " x_train = dataset[0][i][0].values\n", 184 | " scaler = StandardScaler().fit(x_train)\n", 185 | " \n", 186 | " x_test = scaler.transform(dataset[1][i][0])\n", 187 | " y_test = to_categorical(dataset[1][i][1].values, 2)\n", 188 | "# print(f\"Period {i}\")\n", 189 | "# print(f\"x test shape: {x_test.shape}\")\n", 190 | "# print(f\"y test shape: {y_test.shape}\")\n", 191 | "# print(f\"x_final shape: {x_final.shape}\")\n", 192 | "# print(f\"y_final shape: {y_final.shape}\")\n", 193 | " x_series = [x_test[i:i + timestep, j]\n", 194 | " for i in range(x_test.shape[0] - timestep) for j in range(feature)]\n", 195 | " y_series = [y_test[i + timestep, j]\n", 196 | " for i in range(y_test.shape[0] - timestep) for j in range(feature)]\n", 197 | " x_final = np.array(x_series)\n", 198 | " y_final = np.array(y_series)\n", 199 | " x_final = np.reshape(x_final, (x_final.shape[0], x_final.shape[1], 1))\n", 200 | "\n", 201 | " predicted = regressor.predict(x_final)\n", 202 | "# predicted = np.reshape\n", 203 | "# print(predicted.shape)\n", 204 | " predicted = np.reshape(predicted[:, 1], (250, 31))\n", 205 | " label = predicted > 0.5\n", 206 | " label = label * 1 # Convert boolean to int\n", 207 | " print(sum(y_test[-250:, :, 1] == label).sum()/label.size)\n", 208 | " positions.append(predicted)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 9, 214 | "metadata": { 215 | "ExecuteTime": { 216 | "end_time": "2019-03-10T19:04:15.164991Z", 217 | "start_time": "2019-03-10T19:04:14.942215Z" 218 | } 219 | }, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/html": [ 224 | "
\n", 225 | "\n", 238 | "\n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | "
NameAABAAAPLAMZNAXPBACATCSCOCVXDISGE...MSFTNKEPFEPGTRVUNHUTXVZWMTXOM
Date
2006-01-0340.9110.6847.5852.5870.4457.8017.4559.0824.4035.37...26.8410.7423.7858.7845.9961.7356.5330.3846.2358.47
2006-01-0440.9710.7147.2551.9571.1759.2717.8558.9123.9935.32...26.9710.6924.5558.8946.5061.8856.1931.2746.3258.57
2006-01-0541.5310.6347.6552.5070.3359.2718.3558.1924.4135.23...26.9910.7624.5858.7046.9561.6955.9831.6345.6958.28
2006-01-0643.2110.9047.8752.6869.3560.4518.7759.2524.7435.47...26.9110.7224.8558.6447.2162.9056.1631.3545.8859.43
2006-01-0943.4210.8647.0853.9968.7761.5519.0658.9525.0035.38...26.8610.8824.8559.0847.2361.4056.8031.4845.7159.40
\n", 412 | "

5 rows × 31 columns

\n", 413 | "
" 414 | ], 415 | "text/plain": [ 416 | "Name AABA AAPL AMZN AXP BA CAT CSCO CVX DIS \\\n", 417 | "Date \n", 418 | "2006-01-03 40.91 10.68 47.58 52.58 70.44 57.80 17.45 59.08 24.40 \n", 419 | "2006-01-04 40.97 10.71 47.25 51.95 71.17 59.27 17.85 58.91 23.99 \n", 420 | "2006-01-05 41.53 10.63 47.65 52.50 70.33 59.27 18.35 58.19 24.41 \n", 421 | "2006-01-06 43.21 10.90 47.87 52.68 69.35 60.45 18.77 59.25 24.74 \n", 422 | "2006-01-09 43.42 10.86 47.08 53.99 68.77 61.55 19.06 58.95 25.00 \n", 423 | "\n", 424 | "Name GE ... MSFT NKE PFE PG TRV UNH UTX \\\n", 425 | "Date ... \n", 426 | "2006-01-03 35.37 ... 26.84 10.74 23.78 58.78 45.99 61.73 56.53 \n", 427 | "2006-01-04 35.32 ... 26.97 10.69 24.55 58.89 46.50 61.88 56.19 \n", 428 | "2006-01-05 35.23 ... 26.99 10.76 24.58 58.70 46.95 61.69 55.98 \n", 429 | "2006-01-06 35.47 ... 26.91 10.72 24.85 58.64 47.21 62.90 56.16 \n", 430 | "2006-01-09 35.38 ... 26.86 10.88 24.85 59.08 47.23 61.40 56.80 \n", 431 | "\n", 432 | "Name VZ WMT XOM \n", 433 | "Date \n", 434 | "2006-01-03 30.38 46.23 58.47 \n", 435 | "2006-01-04 31.27 46.32 58.57 \n", 436 | "2006-01-05 31.63 45.69 58.28 \n", 437 | "2006-01-06 31.35 45.88 59.43 \n", 438 | "2006-01-09 31.48 45.71 59.40 \n", 439 | "\n", 440 | "[5 rows x 31 columns]" 441 | ] 442 | }, 443 | "execution_count": 9, 444 | "metadata": {}, 445 | "output_type": "execute_result" 446 | } 447 | ], 448 | "source": [ 449 | "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n", 450 | "stocks = stocks[[\"Close\", \"Name\"]]\n", 451 | "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n", 452 | "stocks.head()" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 10, 458 | "metadata": { 459 | "ExecuteTime": { 460 | "end_time": "2019-03-10T19:04:15.779895Z", 461 | "start_time": "2019-03-10T19:04:15.166478Z" 462 | } 463 | }, 464 | "outputs": [ 465 | { 466 | "data": { 467 | "text/html": [ 468 | "
\n", 469 | "\n", 482 | "\n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | "
NameAABAAAPLAMZNAXPBACATCSCOCVXDISGE...MSFTNKEPFEPGTRVUNHUTXVZWMTXOM
Date
2008-12-260.0016230.0090530.006610-0.0033390.0102190.019327-0.0048930.0104850.008640-0.008690...-0.0020870.0130290.0052910.0023180.0194430.013097-0.0003910.010350-0.0016230.018606
2008-12-29-0.0372770.008972-0.045964-0.011725-0.013323-0.008895-0.0159800.017058-0.032011-0.019411...-0.008887-0.0160770.011111-0.0054520.026136-0.0277570.003912-0.001506-0.0043360.010753
2008-12-300.007576-0.0032340.0275300.0169490.0315080.0311760.0137410.0255770.0470420.010217...0.0200420.0147060.0266050.0152820.0309780.0574890.0333140.002716-0.0010890.007306
2008-12-310.019215-0.0113540.0102440.0305560.0344240.0231330.0043130.0080400.0093420.024020...0.0051710.026570-0.0022540.0114530.006009-0.0162720.0105580.0201630.0183470.015778
2009-01-020.0532790.0631670.0600620.0420490.0604640.0501460.0404910.0344730.0542090.053704...0.0457820.0400000.0316210.0158520.0000000.0372180.0251870.0218290.0199790.022673
\n", 656 | "

5 rows × 31 columns

\n", 657 | "
" 658 | ], 659 | "text/plain": [ 660 | "Name AABA AAPL AMZN AXP BA CAT \\\n", 661 | "Date \n", 662 | "2008-12-26 0.001623 0.009053 0.006610 -0.003339 0.010219 0.019327 \n", 663 | "2008-12-29 -0.037277 0.008972 -0.045964 -0.011725 -0.013323 -0.008895 \n", 664 | "2008-12-30 0.007576 -0.003234 0.027530 0.016949 0.031508 0.031176 \n", 665 | "2008-12-31 0.019215 -0.011354 0.010244 0.030556 0.034424 0.023133 \n", 666 | "2009-01-02 0.053279 0.063167 0.060062 0.042049 0.060464 0.050146 \n", 667 | "\n", 668 | "Name CSCO CVX DIS GE ... MSFT NKE \\\n", 669 | "Date ... \n", 670 | "2008-12-26 -0.004893 0.010485 0.008640 -0.008690 ... -0.002087 0.013029 \n", 671 | "2008-12-29 -0.015980 0.017058 -0.032011 -0.019411 ... -0.008887 -0.016077 \n", 672 | "2008-12-30 0.013741 0.025577 0.047042 0.010217 ... 0.020042 0.014706 \n", 673 | "2008-12-31 0.004313 0.008040 0.009342 0.024020 ... 0.005171 0.026570 \n", 674 | "2009-01-02 0.040491 0.034473 0.054209 0.053704 ... 0.045782 0.040000 \n", 675 | "\n", 676 | "Name PFE PG TRV UNH UTX VZ \\\n", 677 | "Date \n", 678 | "2008-12-26 0.005291 0.002318 0.019443 0.013097 -0.000391 0.010350 \n", 679 | "2008-12-29 0.011111 -0.005452 0.026136 -0.027757 0.003912 -0.001506 \n", 680 | "2008-12-30 0.026605 0.015282 0.030978 0.057489 0.033314 0.002716 \n", 681 | "2008-12-31 -0.002254 0.011453 0.006009 -0.016272 0.010558 0.020163 \n", 682 | "2009-01-02 0.031621 0.015852 0.000000 0.037218 0.025187 0.021829 \n", 683 | "\n", 684 | "Name WMT XOM \n", 685 | "Date \n", 686 | "2008-12-26 -0.001623 0.018606 \n", 687 | "2008-12-29 -0.004336 0.010753 \n", 688 | "2008-12-30 -0.001089 0.007306 \n", 689 | "2008-12-31 0.018347 0.015778 \n", 690 | "2009-01-02 0.019979 0.022673 \n", 691 | "\n", 692 | "[5 rows x 31 columns]" 693 | ] 694 | }, 695 | "execution_count": 10, 696 | "metadata": {}, 697 | "output_type": "execute_result" 698 | } 699 | ], 700 | "source": [ 701 | "real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)\n", 702 | "real_returns = real_returns.dropna()\n", 703 | "real_returns[750:3000].head()" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": 11, 709 | "metadata": { 710 | "ExecuteTime": { 711 | "end_time": "2019-03-10T19:04:15.827807Z", 712 | "start_time": "2019-03-10T19:04:15.781310Z" 713 | } 714 | }, 715 | "outputs": [ 716 | { 717 | "data": { 718 | "text/plain": [ 719 | "(2250, 31)" 720 | ] 721 | }, 722 | "execution_count": 11, 723 | "metadata": {}, 724 | "output_type": "execute_result" 725 | } 726 | ], 727 | "source": [ 728 | "all_positions = np.concatenate(positions,axis=0)\n", 729 | "all_positions.shape" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": 12, 735 | "metadata": { 736 | "ExecuteTime": { 737 | "end_time": "2019-03-10T19:04:19.673728Z", 738 | "start_time": "2019-03-10T19:04:15.829485Z" 739 | } 740 | }, 741 | "outputs": [ 742 | { 743 | "name": "stdout", 744 | "output_type": "stream", 745 | "text": [ 746 | "rebalance = 0.416847796589599\n", 747 | "rebalance1 = 0.7287858977877059\n", 748 | "rebalance = 0.988530751320795\n", 749 | "rebalance1 = 1.058599060576066\n", 750 | "rebalance = 2.2003725835255565\n", 751 | "rebalance1 = 1.5484746988471223\n", 752 | "rebalance = 1.0373938949183026\n", 753 | "rebalance1 = 1.0532690812815328\n", 754 | "rebalance = 1.2411430431165373\n", 755 | "rebalance1 = 1.144256999889448\n", 756 | "rebalance = 1.1235350642658526\n", 757 | "rebalance1 = 1.0831576083988692\n", 758 | "rebalance = 0.9623747025919739\n", 759 | "rebalance1 = 0.9998904990603397\n", 760 | "rebalance = 0.9177365598653195\n", 761 | "rebalance1 = 0.9738202513502956\n", 762 | "rebalance = 0.9114912909403768\n", 763 | "rebalance1 = 0.9685717723658719\n", 764 | "rebalance = 0.9048209816604091\n", 765 | "rebalance1 = 0.963766259664476\n", 766 | "rebalance = 0.8799231762933988\n", 767 | "rebalance1 = 0.9489119605846448\n", 768 | "rebalance = 0.8327474649310889\n", 769 | "rebalance1 = 0.9221604112598187\n", 770 | "rebalance = 0.8459709771506109\n", 771 | "rebalance1 = 0.9285075023608181\n", 772 | "rebalance = 0.8591237981217743\n", 773 | "rebalance1 = 0.9348027933880991\n", 774 | "rebalance = 0.8302393252717782\n", 775 | "rebalance1 = 0.9183123644921956\n" 776 | ] 777 | } 778 | ], 779 | "source": [ 780 | "for k in range(1, 16):\n", 781 | " probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)\n", 782 | " position = probabilities.apply(lambda x: long_short_postion(x, k), axis=1, result_type='broadcast')\n", 783 | " final_returns = real_returns[750:3000].mul(position)\n", 784 | "# no_rebalance = (final_returns + 1).product().sum()/(2 * k)\n", 785 | " rebalance = (1 + final_returns.sum(axis = 1)/k).product()\n", 786 | " rebalance1 = (1 + final_returns.sum(axis = 1)/(2 * k)).product()\n", 787 | "# print(f\"no rebalance = {no_rebalance}\")\n", 788 | " print(f\"rebalance = {rebalance}\")\n", 789 | " print(f\"rebalance1 = {rebalance1}\")" 790 | ] 791 | } 792 | ], 793 | "metadata": { 794 | "kernelspec": { 795 | "display_name": "projet_S5", 796 | "language": "python", 797 | "name": "projet_s5" 798 | }, 799 | "language_info": { 800 | "codemirror_mode": { 801 | "name": "ipython", 802 | "version": 3 803 | }, 804 | "file_extension": ".py", 805 | "mimetype": "text/x-python", 806 | "name": "python", 807 | "nbconvert_exporter": "python", 808 | "pygments_lexer": "ipython3", 809 | "version": "3.6.7" 810 | }, 811 | "toc": { 812 | "base_numbering": 1, 813 | "nav_menu": {}, 814 | "number_sections": true, 815 | "sideBar": true, 816 | "skip_h1_title": false, 817 | "title_cell": "Table of Contents", 818 | "title_sidebar": "Contents", 819 | "toc_cell": false, 820 | "toc_position": {}, 821 | "toc_section_display": true, 822 | "toc_window_display": false 823 | } 824 | }, 825 | "nbformat": 4, 826 | "nbformat_minor": 2 827 | } 828 | -------------------------------------------------------------------------------- /notebook/[Official] Backtesting LSTM - Absolute Return.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-03-10T18:50:02.855975Z", 9 | "start_time": "2019-03-10T18:50:01.880636Z" 10 | }, 11 | "scrolled": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "# List all device\n", 16 | "from tensorflow.python.client import device_lib\n", 17 | "# print(device_lib.list_local_devices())" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2019-03-10T18:50:02.896741Z", 26 | "start_time": "2019-03-10T18:50:02.857968Z" 27 | }, 28 | "scrolled": true 29 | }, 30 | "outputs": [ 31 | { 32 | "name": "stderr", 33 | "output_type": "stream", 34 | "text": [ 35 | "Using TensorFlow backend.\n" 36 | ] 37 | }, 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "[]" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "# Check available GPU\n", 51 | "from keras import backend as K\n", 52 | "K.tensorflow_backend._get_available_gpus()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2019-03-10T18:50:02.929988Z", 61 | "start_time": "2019-03-10T18:50:02.898478Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "import os\n", 67 | "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n", 68 | "# The GPU id to use, usually either \"0\" or \"1\";\n", 69 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"; " 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2019-03-10T18:50:03.195356Z", 78 | "start_time": "2019-03-10T18:50:02.933022Z" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "# Importing the libraries\n", 84 | "import numpy as np\n", 85 | "import pandas as pd\n", 86 | "from keras.models import Sequential\n", 87 | "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda\n", 88 | "from keras.preprocessing.sequence import TimeseriesGenerator\n", 89 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 90 | "from keras.activations import softmax\n", 91 | "from keras.optimizers import SGD\n", 92 | "from keras.models import load_model\n", 93 | "from keras.utils import to_categorical\n", 94 | "import math\n", 95 | "import pickle\n", 96 | "from sklearn.preprocessing import StandardScaler\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "ExecuteTime": { 104 | "end_time": "2019-03-10T18:50:03.262046Z", 105 | "start_time": "2019-03-10T18:50:03.196828Z" 106 | } 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "index = \"dowjones\"\n", 111 | "# index = \"frankfurt\"\n", 112 | "with open(f\"../data/{index}_calculated/absolute_periods750_250_240.txt\", \"rb\") as fp: # Unpickling\n", 113 | " dataset = pickle.load(fp)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": { 120 | "ExecuteTime": { 121 | "end_time": "2019-03-10T18:50:03.293144Z", 122 | "start_time": "2019-03-10T18:50:03.263450Z" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "timestep = 240" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 7, 133 | "metadata": { 134 | "ExecuteTime": { 135 | "end_time": "2019-03-10T18:50:03.377283Z", 136 | "start_time": "2019-03-10T18:50:03.294391Z" 137 | } 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "def long_short_postion(returns, k):\n", 142 | " position_ = np.copy(returns)\n", 143 | " short = np.argpartition(position_, k)[:k]\n", 144 | " neutral = np.argpartition(position_, len(position_) - k)[:(len(position_) - k)]\n", 145 | " position_[:] = 1\n", 146 | " position_[neutral] = 0\n", 147 | " position_[short] = -1\n", 148 | " return position_" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 8, 154 | "metadata": { 155 | "ExecuteTime": { 156 | "end_time": "2019-03-10T18:50:21.861882Z", 157 | "start_time": "2019-03-10T18:50:03.379109Z" 158 | } 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "positions = []\n", 163 | "for i in range(len(dataset[0])):\n", 164 | " model_period = f\"../model/LSTM/{index}2_absolute__period{i}.h5\"\n", 165 | " regressor = load_model(model_period, custom_objects={\"softmax\": softmax})\n", 166 | " x_train = dataset[0][i][0].values\n", 167 | " scaler = StandardScaler().fit(x_train)\n", 168 | " \n", 169 | " x_test = scaler.transform(dataset[1][i][0])\n", 170 | " y_test = to_categorical(dataset[1][i][1].values, 2)\n", 171 | "# print(f\"Period {i}\")\n", 172 | "# print(f\"x test shape: {x_test.shape}\")\n", 173 | "# print(f\"y test shape: {y_test.shape}\")\n", 174 | "# print(f\"x_final shape: {x_final.shape}\")\n", 175 | "# print(f\"y_final shape: {y_final.shape}\")\n", 176 | " x_series = [x_test[i:i + timestep, :]\n", 177 | " for i in range(x_test.shape[0] - timestep)]\n", 178 | " y_series = [y_test[i + timestep]\n", 179 | " for i in range(y_test.shape[0] - timestep)]\n", 180 | " x_final = np.array(x_series)\n", 181 | " y_final = np.array(y_series)\n", 182 | "\n", 183 | "\n", 184 | "\n", 185 | " predicted = regressor.predict(x_final)\n", 186 | " label = predicted > 0.5\n", 187 | " label = label * 1 # Convert boolean to int\n", 188 | "# print((sum(y_test[:, :, 1] == label[:, :, 1])/(y_test.size/2)).sum())\n", 189 | " positions.append(predicted[:, :, 1])" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 9, 195 | "metadata": { 196 | "ExecuteTime": { 197 | "end_time": "2019-03-10T18:50:22.146192Z", 198 | "start_time": "2019-03-10T18:50:21.864031Z" 199 | }, 200 | "scrolled": true 201 | }, 202 | "outputs": [ 203 | { 204 | "data": { 205 | "text/html": [ 206 | "
\n", 207 | "\n", 220 | "\n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | "
NameAABAAAPLAMZNAXPBACATCSCOCVXDISGE...MSFTNKEPFEPGTRVUNHUTXVZWMTXOM
Date
2006-01-0340.9110.6847.5852.5870.4457.8017.4559.0824.4035.37...26.8410.7423.7858.7845.9961.7356.5330.3846.2358.47
2006-01-0440.9710.7147.2551.9571.1759.2717.8558.9123.9935.32...26.9710.6924.5558.8946.5061.8856.1931.2746.3258.57
2006-01-0541.5310.6347.6552.5070.3359.2718.3558.1924.4135.23...26.9910.7624.5858.7046.9561.6955.9831.6345.6958.28
2006-01-0643.2110.9047.8752.6869.3560.4518.7759.2524.7435.47...26.9110.7224.8558.6447.2162.9056.1631.3545.8859.43
2006-01-0943.4210.8647.0853.9968.7761.5519.0658.9525.0035.38...26.8610.8824.8559.0847.2361.4056.8031.4845.7159.40
\n", 394 | "

5 rows × 31 columns

\n", 395 | "
" 396 | ], 397 | "text/plain": [ 398 | "Name AABA AAPL AMZN AXP BA CAT CSCO CVX DIS \\\n", 399 | "Date \n", 400 | "2006-01-03 40.91 10.68 47.58 52.58 70.44 57.80 17.45 59.08 24.40 \n", 401 | "2006-01-04 40.97 10.71 47.25 51.95 71.17 59.27 17.85 58.91 23.99 \n", 402 | "2006-01-05 41.53 10.63 47.65 52.50 70.33 59.27 18.35 58.19 24.41 \n", 403 | "2006-01-06 43.21 10.90 47.87 52.68 69.35 60.45 18.77 59.25 24.74 \n", 404 | "2006-01-09 43.42 10.86 47.08 53.99 68.77 61.55 19.06 58.95 25.00 \n", 405 | "\n", 406 | "Name GE ... MSFT NKE PFE PG TRV UNH UTX \\\n", 407 | "Date ... \n", 408 | "2006-01-03 35.37 ... 26.84 10.74 23.78 58.78 45.99 61.73 56.53 \n", 409 | "2006-01-04 35.32 ... 26.97 10.69 24.55 58.89 46.50 61.88 56.19 \n", 410 | "2006-01-05 35.23 ... 26.99 10.76 24.58 58.70 46.95 61.69 55.98 \n", 411 | "2006-01-06 35.47 ... 26.91 10.72 24.85 58.64 47.21 62.90 56.16 \n", 412 | "2006-01-09 35.38 ... 26.86 10.88 24.85 59.08 47.23 61.40 56.80 \n", 413 | "\n", 414 | "Name VZ WMT XOM \n", 415 | "Date \n", 416 | "2006-01-03 30.38 46.23 58.47 \n", 417 | "2006-01-04 31.27 46.32 58.57 \n", 418 | "2006-01-05 31.63 45.69 58.28 \n", 419 | "2006-01-06 31.35 45.88 59.43 \n", 420 | "2006-01-09 31.48 45.71 59.40 \n", 421 | "\n", 422 | "[5 rows x 31 columns]" 423 | ] 424 | }, 425 | "execution_count": 9, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n", 432 | "stocks = stocks[[\"Close\", \"Name\"]]\n", 433 | "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n", 434 | "stocks.head()" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 10, 440 | "metadata": { 441 | "ExecuteTime": { 442 | "end_time": "2019-03-10T18:50:22.196696Z", 443 | "start_time": "2019-03-10T18:50:22.149149Z" 444 | } 445 | }, 446 | "outputs": [ 447 | { 448 | "data": { 449 | "text/html": [ 450 | "
\n", 451 | "\n", 464 | "\n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | "
NameAABAAAPLAMZNAXPBACATCSCOCVXDISGE...MSFTNKEPFEPGTRVUNHUTXVZWMTXOM
Date
2008-12-260.0016230.0090530.006610-0.0033390.0102190.019327-0.0048930.0104850.008640-0.008690...-0.0020870.0130290.0052910.0023180.0194430.013097-0.0003910.010350-0.0016230.018606
2008-12-29-0.0372770.008972-0.045964-0.011725-0.013323-0.008895-0.0159800.017058-0.032011-0.019411...-0.008887-0.0160770.011111-0.0054520.026136-0.0277570.003912-0.001506-0.0043360.010753
2008-12-300.007576-0.0032340.0275300.0169490.0315080.0311760.0137410.0255770.0470420.010217...0.0200420.0147060.0266050.0152820.0309780.0574890.0333140.002716-0.0010890.007306
2008-12-310.019215-0.0113540.0102440.0305560.0344240.0231330.0043130.0080400.0093420.024020...0.0051710.026570-0.0022540.0114530.006009-0.0162720.0105580.0201630.0183470.015778
2009-01-020.0532790.0631670.0600620.0420490.0604640.0501460.0404910.0344730.0542090.053704...0.0457820.0400000.0316210.0158520.0000000.0372180.0251870.0218290.0199790.022673
\n", 638 | "

5 rows × 31 columns

\n", 639 | "
" 640 | ], 641 | "text/plain": [ 642 | "Name AABA AAPL AMZN AXP BA CAT \\\n", 643 | "Date \n", 644 | "2008-12-26 0.001623 0.009053 0.006610 -0.003339 0.010219 0.019327 \n", 645 | "2008-12-29 -0.037277 0.008972 -0.045964 -0.011725 -0.013323 -0.008895 \n", 646 | "2008-12-30 0.007576 -0.003234 0.027530 0.016949 0.031508 0.031176 \n", 647 | "2008-12-31 0.019215 -0.011354 0.010244 0.030556 0.034424 0.023133 \n", 648 | "2009-01-02 0.053279 0.063167 0.060062 0.042049 0.060464 0.050146 \n", 649 | "\n", 650 | "Name CSCO CVX DIS GE ... MSFT NKE \\\n", 651 | "Date ... \n", 652 | "2008-12-26 -0.004893 0.010485 0.008640 -0.008690 ... -0.002087 0.013029 \n", 653 | "2008-12-29 -0.015980 0.017058 -0.032011 -0.019411 ... -0.008887 -0.016077 \n", 654 | "2008-12-30 0.013741 0.025577 0.047042 0.010217 ... 0.020042 0.014706 \n", 655 | "2008-12-31 0.004313 0.008040 0.009342 0.024020 ... 0.005171 0.026570 \n", 656 | "2009-01-02 0.040491 0.034473 0.054209 0.053704 ... 0.045782 0.040000 \n", 657 | "\n", 658 | "Name PFE PG TRV UNH UTX VZ \\\n", 659 | "Date \n", 660 | "2008-12-26 0.005291 0.002318 0.019443 0.013097 -0.000391 0.010350 \n", 661 | "2008-12-29 0.011111 -0.005452 0.026136 -0.027757 0.003912 -0.001506 \n", 662 | "2008-12-30 0.026605 0.015282 0.030978 0.057489 0.033314 0.002716 \n", 663 | "2008-12-31 -0.002254 0.011453 0.006009 -0.016272 0.010558 0.020163 \n", 664 | "2009-01-02 0.031621 0.015852 0.000000 0.037218 0.025187 0.021829 \n", 665 | "\n", 666 | "Name WMT XOM \n", 667 | "Date \n", 668 | "2008-12-26 -0.001623 0.018606 \n", 669 | "2008-12-29 -0.004336 0.010753 \n", 670 | "2008-12-30 -0.001089 0.007306 \n", 671 | "2008-12-31 0.018347 0.015778 \n", 672 | "2009-01-02 0.019979 0.022673 \n", 673 | "\n", 674 | "[5 rows x 31 columns]" 675 | ] 676 | }, 677 | "execution_count": 10, 678 | "metadata": {}, 679 | "output_type": "execute_result" 680 | } 681 | ], 682 | "source": [ 683 | "real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)\n", 684 | "real_returns = real_returns.dropna()\n", 685 | "real_returns[750:3000].head()" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 11, 691 | "metadata": { 692 | "ExecuteTime": { 693 | "end_time": "2019-03-10T18:50:22.232861Z", 694 | "start_time": "2019-03-10T18:50:22.199243Z" 695 | } 696 | }, 697 | "outputs": [ 698 | { 699 | "data": { 700 | "text/plain": [ 701 | "(2250, 31)" 702 | ] 703 | }, 704 | "execution_count": 11, 705 | "metadata": {}, 706 | "output_type": "execute_result" 707 | } 708 | ], 709 | "source": [ 710 | "all_positions = np.concatenate(positions,axis=0)\n", 711 | "all_positions.shape" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 12, 717 | "metadata": { 718 | "ExecuteTime": { 719 | "end_time": "2019-03-10T18:50:26.373525Z", 720 | "start_time": "2019-03-10T18:50:22.234334Z" 721 | } 722 | }, 723 | "outputs": [ 724 | { 725 | "name": "stdout", 726 | "output_type": "stream", 727 | "text": [ 728 | "rebalance = 0.36855354986190514\n", 729 | "rebalance1 = 0.6621851939596246\n", 730 | "rebalance = 0.4080995445308407\n", 731 | "rebalance1 = 0.6678730157955822\n", 732 | "rebalance = 0.5921330183114122\n", 733 | "rebalance1 = 0.7894692748857888\n", 734 | "rebalance = 0.5970559403035915\n", 735 | "rebalance1 = 0.7879871997916339\n", 736 | "rebalance = 0.7232641469124205\n", 737 | "rebalance1 = 0.8639078907198249\n", 738 | "rebalance = 0.8128304975543116\n", 739 | "rebalance1 = 0.9135528001160027\n", 740 | "rebalance = 0.785759152130262\n", 741 | "rebalance1 = 0.8967744833065788\n", 742 | "rebalance = 0.7776428519233565\n", 743 | "rebalance1 = 0.8909891094888095\n", 744 | "rebalance = 0.7535956852175906\n", 745 | "rebalance1 = 0.8763090744146368\n", 746 | "rebalance = 0.8567053574970215\n", 747 | "rebalance1 = 0.9335921800003164\n", 748 | "rebalance = 0.8181464485682514\n", 749 | "rebalance1 = 0.9115270819091531\n", 750 | "rebalance = 0.8744274366140588\n", 751 | "rebalance1 = 0.9416789642005597\n", 752 | "rebalance = 0.9681492772013753\n", 753 | "rebalance1 = 0.9901472574490726\n", 754 | "rebalance = 0.9247589275299702\n", 755 | "rebalance1 = 0.9670643327244407\n", 756 | "rebalance = 0.9597354807771837\n", 757 | "rebalance1 = 0.9848117044643548\n" 758 | ] 759 | } 760 | ], 761 | "source": [ 762 | "for k in range(1, 16):\n", 763 | " probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)\n", 764 | " position = probabilities.apply(lambda x: long_short_postion(x, k), axis=1, result_type='broadcast')\n", 765 | " final_returns = real_returns[750:3000].mul(position)\n", 766 | "# no_rebalance = (final_returns + 1).product().sum()/(2 * k)\n", 767 | " rebalance = (1 + final_returns.sum(axis = 1)/k).product()\n", 768 | " rebalance1 = (1 + final_returns.sum(axis = 1)/(2 * k)).product()\n", 769 | "# print(f\"no rebalance = {no_rebalance}\")\n", 770 | " print(f\"rebalance = {rebalance}\")\n", 771 | " print(f\"rebalance1 = {rebalance1}\")" 772 | ] 773 | } 774 | ], 775 | "metadata": { 776 | "kernelspec": { 777 | "display_name": "projet_S5", 778 | "language": "python", 779 | "name": "projet_s5" 780 | }, 781 | "language_info": { 782 | "codemirror_mode": { 783 | "name": "ipython", 784 | "version": 3 785 | }, 786 | "file_extension": ".py", 787 | "mimetype": "text/x-python", 788 | "name": "python", 789 | "nbconvert_exporter": "python", 790 | "pygments_lexer": "ipython3", 791 | "version": "3.6.7" 792 | }, 793 | "toc": { 794 | "base_numbering": 1, 795 | "nav_menu": {}, 796 | "number_sections": true, 797 | "sideBar": true, 798 | "skip_h1_title": false, 799 | "title_cell": "Table of Contents", 800 | "title_sidebar": "Contents", 801 | "toc_cell": false, 802 | "toc_position": {}, 803 | "toc_section_display": true, 804 | "toc_window_display": false 805 | } 806 | }, 807 | "nbformat": 4, 808 | "nbformat_minor": 2 809 | } 810 | -------------------------------------------------------------------------------- /notebook/[Official] Backtesting LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-03-13T22:04:54.889499Z", 9 | "start_time": "2019-03-13T22:04:53.977846Z" 10 | }, 11 | "scrolled": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "# List all device\n", 16 | "from tensorflow.python.client import device_lib\n", 17 | "# print(device_lib.list_local_devices())" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2019-03-13T22:04:54.930443Z", 26 | "start_time": "2019-03-13T22:04:54.890979Z" 27 | }, 28 | "scrolled": true 29 | }, 30 | "outputs": [ 31 | { 32 | "name": "stderr", 33 | "output_type": "stream", 34 | "text": [ 35 | "Using TensorFlow backend.\n" 36 | ] 37 | }, 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "[]" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "# Check available GPU\n", 51 | "from keras import backend as K\n", 52 | "K.tensorflow_backend._get_available_gpus()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2019-03-13T22:04:54.962585Z", 61 | "start_time": "2019-03-13T22:04:54.931895Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "import os\n", 67 | "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n", 68 | "# The GPU id to use, usually either \"0\" or \"1\";\n", 69 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"; " 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2019-03-13T22:04:55.245739Z", 78 | "start_time": "2019-03-13T22:04:54.964457Z" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "# Importing the libraries\n", 84 | "import numpy as np\n", 85 | "import pandas as pd\n", 86 | "from keras.models import Sequential\n", 87 | "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda\n", 88 | "from keras.preprocessing.sequence import TimeseriesGenerator\n", 89 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 90 | "from keras.activations import softmax\n", 91 | "from keras.optimizers import SGD\n", 92 | "from keras.models import load_model\n", 93 | "from keras.utils import to_categorical\n", 94 | "import math\n", 95 | "import pickle\n", 96 | "from sklearn.preprocessing import StandardScaler\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "ExecuteTime": { 104 | "end_time": "2019-03-13T22:04:55.298260Z", 105 | "start_time": "2019-03-13T22:04:55.247176Z" 106 | } 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "index = \"dowjones\"\n", 111 | "# index = \"frankfurt\"\n", 112 | "with open(f\"../data/{index}_calculated/periods750_250_240.txt\", \"rb\") as fp: # Unpickling\n", 113 | " dataset = pickle.load(fp)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": { 120 | "ExecuteTime": { 121 | "end_time": "2019-03-13T22:04:55.337362Z", 122 | "start_time": "2019-03-13T22:04:55.299503Z" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "timestep = 240" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 7, 133 | "metadata": { 134 | "ExecuteTime": { 135 | "end_time": "2019-03-13T22:04:55.406816Z", 136 | "start_time": "2019-03-13T22:04:55.340316Z" 137 | } 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "def long_short_postion(returns, k):\n", 142 | " position_ = np.copy(returns)\n", 143 | " short = np.argpartition(position_, k)[:k]\n", 144 | " neutral = np.argpartition(position_, len(position_) - k)[:(len(position_) - k)]\n", 145 | " position_[:] = 1\n", 146 | " position_[neutral] = 0\n", 147 | " position_[short] = -1\n", 148 | " return position_" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 8, 154 | "metadata": { 155 | "ExecuteTime": { 156 | "end_time": "2019-03-13T22:04:56.521521Z", 157 | "start_time": "2019-03-13T22:04:55.411702Z" 158 | } 159 | }, 160 | "outputs": [ 161 | { 162 | "ename": "ValueError", 163 | "evalue": "Error when checking input: expected lstm_10_input to have shape (240, 1) but got array with shape (240, 31)", 164 | "output_type": "error", 165 | "traceback": [ 166 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 167 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 168 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m \u001b[0mpredicted\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mregressor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_final\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 25\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;31m# Convert boolean to int\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 169 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps)\u001b[0m\n\u001b[1;32m 1145\u001b[0m 'argument.')\n\u001b[1;32m 1146\u001b[0m \u001b[0;31m# Validate user data.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1147\u001b[0;31m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_standardize_user_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1148\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstateful\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1149\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 170 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m 747\u001b[0m \u001b[0mfeed_input_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 748\u001b[0m \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 749\u001b[0;31m exception_prefix='input')\n\u001b[0m\u001b[1;32m 750\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 751\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 171 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 137\u001b[0;31m str(data_shape))\n\u001b[0m\u001b[1;32m 138\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 172 | "\u001b[0;31mValueError\u001b[0m: Error when checking input: expected lstm_10_input to have shape (240, 1) but got array with shape (240, 31)" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "positions = []\n", 178 | "for i in range(len(dataset[0])):\n", 179 | " model_period = f\"../model/LSTM/{index}2_period{i}.h5\"\n", 180 | " regressor = load_model(model_period, custom_objects={\"softmax\": softmax})\n", 181 | " x_train = dataset[0][i][0].values\n", 182 | " scaler = StandardScaler().fit(x_train)\n", 183 | " \n", 184 | " x_test = scaler.transform(dataset[1][i][0])\n", 185 | " y_test = to_categorical(dataset[1][i][1].values, 2)\n", 186 | "# print(f\"Period {i}\")\n", 187 | "# print(f\"x test shape: {x_test.shape}\")\n", 188 | "# print(f\"y test shape: {y_test.shape}\")\n", 189 | "# print(f\"x_final shape: {x_final.shape}\")\n", 190 | "# print(f\"y_final shape: {y_final.shape}\")\n", 191 | " x_series = [x_test[i:i + timestep, :]\n", 192 | " for i in range(x_test.shape[0] - timestep)]\n", 193 | " y_series = [y_test[i + timestep]\n", 194 | " for i in range(y_test.shape[0] - timestep)]\n", 195 | " x_final = np.array(x_series)\n", 196 | " y_final = np.array(y_series)\n", 197 | "\n", 198 | "\n", 199 | "\n", 200 | " predicted = regressor.predict(x_final)\n", 201 | " label = predicted > 0.5\n", 202 | " label = label * 1 # Convert boolean to int\n", 203 | "# print((sum(y_test[:, :, 1] == label[:, :, 1])/(y_test.size/2)).sum())\n", 204 | " positions.append(predicted[:, :, 1])" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "ExecuteTime": { 212 | "end_time": "2019-03-13T22:04:56.524199Z", 213 | "start_time": "2019-03-13T22:04:53.997Z" 214 | } 215 | }, 216 | "outputs": [], 217 | "source": [ 218 | "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n", 219 | "stocks = stocks[[\"Close\", \"Name\"]]\n", 220 | "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n", 221 | "stocks.head()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "ExecuteTime": { 229 | "end_time": "2019-03-13T22:04:56.525496Z", 230 | "start_time": "2019-03-13T22:04:53.999Z" 231 | } 232 | }, 233 | "outputs": [], 234 | "source": [ 235 | "real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)\n", 236 | "real_returns = real_returns.dropna()\n", 237 | "real_returns[750:3000].head()" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "ExecuteTime": { 245 | "end_time": "2019-03-13T22:04:56.526851Z", 246 | "start_time": "2019-03-13T22:04:54.000Z" 247 | } 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "all_positions = np.concatenate(positions,axis=0)\n", 252 | "all_positions.shape" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": { 259 | "ExecuteTime": { 260 | "end_time": "2019-03-13T22:04:56.528322Z", 261 | "start_time": "2019-03-13T22:04:54.002Z" 262 | } 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "for k in range(1, 16):\n", 267 | " probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)\n", 268 | " position = probabilities.apply(lambda x: long_short_postion(x, k), axis=1, result_type='broadcast')\n", 269 | " final_returns = real_returns[750:3000].mul(position)\n", 270 | "# no_rebalance = (final_returns + 1).product().sum()/(2 * k)\n", 271 | "# rebalance = (1 + final_returns.sum(axis = 1)/k).product()\n", 272 | " rebalance1 = (1 + final_returns.sum(axis = 1)/(2 * k)).product()\n", 273 | "# print(f\"no rebalance = {no_rebalance}\")\n", 274 | "# print(f\"rebalance = {rebalance}\")\n", 275 | " print(f\"rebalance = {rebalance1}\")" 276 | ] 277 | } 278 | ], 279 | "metadata": { 280 | "kernelspec": { 281 | "display_name": "projet_S5", 282 | "language": "python", 283 | "name": "projet_s5" 284 | }, 285 | "language_info": { 286 | "codemirror_mode": { 287 | "name": "ipython", 288 | "version": 3 289 | }, 290 | "file_extension": ".py", 291 | "mimetype": "text/x-python", 292 | "name": "python", 293 | "nbconvert_exporter": "python", 294 | "pygments_lexer": "ipython3", 295 | "version": "3.6.7" 296 | }, 297 | "toc": { 298 | "base_numbering": 1, 299 | "nav_menu": {}, 300 | "number_sections": true, 301 | "sideBar": true, 302 | "skip_h1_title": false, 303 | "title_cell": "Table of Contents", 304 | "title_sidebar": "Contents", 305 | "toc_cell": false, 306 | "toc_position": {}, 307 | "toc_section_display": true, 308 | "toc_window_display": false 309 | } 310 | }, 311 | "nbformat": 4, 312 | "nbformat_minor": 2 313 | } 314 | -------------------------------------------------------------------------------- /notebook/[Official] Benchmark Machine Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-03-07T12:08:14.069749Z", 9 | "start_time": "2019-03-07T12:08:13.085083Z" 10 | } 11 | }, 12 | "outputs": [ 13 | { 14 | "name": "stderr", 15 | "output_type": "stream", 16 | "text": [ 17 | "Using TensorFlow backend.\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "# Importing the libraries\n", 23 | "import numpy as np\n", 24 | "import pandas as pd\n", 25 | "from keras.models import Sequential\n", 26 | "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda\n", 27 | "from keras.preprocessing.sequence import TimeseriesGenerator\n", 28 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 29 | "from keras.activations import softmax\n", 30 | "from keras.optimizers import SGD\n", 31 | "import math\n", 32 | "import pickle" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "ExecuteTime": { 40 | "end_time": "2019-03-07T12:08:15.158729Z", 41 | "start_time": "2019-03-07T12:08:14.071377Z" 42 | } 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "with open(\"../data/dowjones_calculated/periods.txt\", \"rb\") as fp: # Unpickling\n", 47 | " dataset = pickle.load(fp)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": { 54 | "ExecuteTime": { 55 | "end_time": "2019-03-07T12:08:15.660764Z", 56 | "start_time": "2019-03-07T12:08:15.165391Z" 57 | } 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "X_train = dataset[0][0][0]\n", 62 | "meanX = X_train.mean(axis=1)\n", 63 | "stdX = X_train.std(axis = 1)\n", 64 | "X_train = X_train.sub(meanX, axis=0)\n", 65 | "X_train = X_train.div(stdX, axis = 0)\n", 66 | "X_train = X_train.values\n", 67 | "\n", 68 | "y_train = dataset[0][0][1].values\n", 69 | "\n", 70 | "X_test = dataset[1][0][0]\n", 71 | "meanX = X_test.mean(axis=1)\n", 72 | "stdX = X_test.std(axis = 1)\n", 73 | "X_test = X_test.sub(meanX, axis=0)\n", 74 | "X_test = X_test.div(stdX, axis = 0)\n", 75 | "X_test = X_test.values\n", 76 | "\n", 77 | "y_test = dataset[1][0][1].values" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 10, 83 | "metadata": { 84 | "ExecuteTime": { 85 | "end_time": "2019-03-07T12:26:28.894255Z", 86 | "start_time": "2019-03-07T12:26:28.887726Z" 87 | } 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "y_train = y_train * 1.0\n", 92 | "y_test = y_test * 1.0" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "metadata": { 99 | "ExecuteTime": { 100 | "end_time": "2019-03-07T10:26:07.602938Z", 101 | "start_time": "2019-03-07T10:26:07.565938Z" 102 | } 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "def get_one_hot(targets, nb_classes):\n", 107 | " res = np.eye(nb_classes)[np.array(targets).reshape(-1)]\n", 108 | " return res.reshape(list(targets.shape)+[nb_classes])\n", 109 | "# y_train = get_one_hot(y_train, 2)\n", 110 | "# y_test = get_one_hot(y_test, 2)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 5, 116 | "metadata": { 117 | "ExecuteTime": { 118 | "end_time": "2019-03-07T10:26:07.693204Z", 119 | "start_time": "2019-03-07T10:26:07.605116Z" 120 | } 121 | }, 122 | "outputs": [ 123 | { 124 | "name": "stdout", 125 | "output_type": "stream", 126 | "text": [ 127 | "x train shape: (750, 31)\n", 128 | "y train shape: (750, 31)\n", 129 | "x test shape: (490, 31)\n", 130 | "y test shape: (490, 31)\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "print(f\"x train shape: {X_train.shape}\")\n", 136 | "print(f\"y train shape: {y_train.shape}\")\n", 137 | "print(f\"x test shape: {X_test.shape}\")\n", 138 | "print(f\"y test shape: {y_test.shape}\")\n", 139 | "# print(f\"predicted shape: {predicted.shape}\")" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 6, 145 | "metadata": { 146 | "ExecuteTime": { 147 | "end_time": "2019-03-07T10:26:07.755153Z", 148 | "start_time": "2019-03-07T10:26:07.697677Z" 149 | } 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "timestep = 10" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 7, 159 | "metadata": { 160 | "ExecuteTime": { 161 | "end_time": "2019-03-07T10:26:07.825818Z", 162 | "start_time": "2019-03-07T10:26:07.756645Z" 163 | } 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "data = X_train\n", 168 | "targets = y_train\n", 169 | "\n", 170 | "train_gen = TimeseriesGenerator(data, targets,\n", 171 | " length=timestep, sampling_rate=1,\n", 172 | " batch_size=(X_train.shape[0] - timestep))" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 8, 178 | "metadata": { 179 | "ExecuteTime": { 180 | "end_time": "2019-03-07T10:26:07.892629Z", 181 | "start_time": "2019-03-07T10:26:07.831831Z" 182 | } 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "data = X_test\n", 187 | "targets = y_test\n", 188 | "\n", 189 | "test_gen = TimeseriesGenerator(data, targets,\n", 190 | " length=timestep, sampling_rate=1,\n", 191 | " batch_size=(X_test.shape[0] - timestep))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 9, 197 | "metadata": { 198 | "ExecuteTime": { 199 | "end_time": "2019-03-07T10:26:08.001932Z", 200 | "start_time": "2019-03-07T10:26:07.897006Z" 201 | } 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "X_train = train_gen[0][0]\n", 206 | "y_train = train_gen[0][1]\n", 207 | "X_test = test_gen[0][0]\n", 208 | "y_test = test_gen[0][1]" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 10, 214 | "metadata": { 215 | "ExecuteTime": { 216 | "end_time": "2019-03-07T10:26:08.029448Z", 217 | "start_time": "2019-03-07T10:26:08.003458Z" 218 | } 219 | }, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "x train shape: (740, 10, 31)\n", 226 | "y train shape: (740, 31)\n", 227 | "x test shape: (480, 10, 31)\n", 228 | "y test shape: (480, 31)\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "print(f\"x train shape: {X_train.shape}\")\n", 234 | "print(f\"y train shape: {y_train.shape}\")\n", 235 | "print(f\"x test shape: {X_test.shape}\")\n", 236 | "print(f\"y test shape: {y_test.shape}\")" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 11, 242 | "metadata": { 243 | "ExecuteTime": { 244 | "end_time": "2019-03-07T10:26:08.094488Z", 245 | "start_time": "2019-03-07T10:26:08.033868Z" 246 | } 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "X_train = X_train.transpose((0,2,1))\n", 251 | "X_train = np.reshape(X_train, (X_train.shape[0] * X_train.shape[1], timestep))\n", 252 | "y_train = np.reshape(y_train, (y_train.shape[0] * y_train.shape[1], 1))\n", 253 | "\n", 254 | "X_test = X_test.transpose((0,2,1))\n", 255 | "X_test = np.reshape(X_test, (X_test.shape[0] * X_test.shape[1], timestep))\n", 256 | "y_test = np.reshape(y_test, (y_test.shape[0] * y_test.shape[1], 1))" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 12, 262 | "metadata": { 263 | "ExecuteTime": { 264 | "end_time": "2019-03-07T10:26:08.175919Z", 265 | "start_time": "2019-03-07T10:26:08.095912Z" 266 | } 267 | }, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "x train shape: (22940, 10)\n", 274 | "y train shape: (22940, 1)\n", 275 | "x test shape: (14880, 10)\n", 276 | "y test shape: (14880, 1)\n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "print(f\"x train shape: {X_train.shape}\")\n", 282 | "print(f\"y train shape: {y_train.shape}\")\n", 283 | "print(f\"x test shape: {X_test.shape}\")\n", 284 | "print(f\"y test shape: {y_test.shape}\")" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 12, 290 | "metadata": { 291 | "ExecuteTime": { 292 | "end_time": "2019-03-07T12:32:10.282439Z", 293 | "start_time": "2019-03-07T12:32:06.645476Z" 294 | } 295 | }, 296 | "outputs": [], 297 | "source": [ 298 | "from sklearn.ensemble import RandomForestClassifier\n", 299 | "from sklearn.ensemble import RandomForestRegressor" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 14, 305 | "metadata": { 306 | "ExecuteTime": { 307 | "end_time": "2019-03-07T10:26:09.762268Z", 308 | "start_time": "2019-03-07T10:26:08.516032Z" 309 | } 310 | }, 311 | "outputs": [ 312 | { 313 | "name": "stderr", 314 | "output_type": "stream", 315 | "text": [ 316 | "/home/tqa/anaconda3/envs/projet_S5/lib/python3.6/site-packages/sklearn/ensemble/forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", 317 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", 318 | "/home/tqa/anaconda3/envs/projet_S5/lib/python3.6/site-packages/ipykernel_launcher.py:6: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", 319 | " \n" 320 | ] 321 | }, 322 | { 323 | "data": { 324 | "text/plain": [ 325 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", 326 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", 327 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 328 | " min_samples_leaf=1, min_samples_split=2,\n", 329 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=2,\n", 330 | " oob_score=False, random_state=0, verbose=0, warm_start=False)" 331 | ] 332 | }, 333 | "execution_count": 14, 334 | "metadata": {}, 335 | "output_type": "execute_result" 336 | } 337 | ], 338 | "source": [ 339 | "# Create a random forest Classifier. By convention, clf means 'Classifier'\n", 340 | "clf = RandomForestClassifier(n_jobs=2, random_state=0)\n", 341 | "\n", 342 | "# Train the Classifier to take the training features and learn how they relate\n", 343 | "# to the training y (the species)\n", 344 | "clf.fit(X_train, y_train)" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 13, 350 | "metadata": { 351 | "ExecuteTime": { 352 | "end_time": "2019-03-07T12:32:25.714297Z", 353 | "start_time": "2019-03-07T12:32:12.218136Z" 354 | } 355 | }, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n", 361 | " max_features='auto', max_leaf_nodes=None,\n", 362 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 363 | " min_samples_leaf=1, min_samples_split=2,\n", 364 | " min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=None,\n", 365 | " oob_score=False, random_state=42, verbose=0, warm_start=False)" 366 | ] 367 | }, 368 | "execution_count": 13, 369 | "metadata": {}, 370 | "output_type": "execute_result" 371 | } 372 | ], 373 | "source": [ 374 | "rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)\n", 375 | "\n", 376 | "rf.fit(X_train, y_train)" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 15, 382 | "metadata": { 383 | "ExecuteTime": { 384 | "end_time": "2019-03-07T10:26:10.749119Z", 385 | "start_time": "2019-03-07T10:26:09.764213Z" 386 | } 387 | }, 388 | "outputs": [], 389 | "source": [ 390 | "from xgboost import XGBClassifier" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 16, 396 | "metadata": { 397 | "ExecuteTime": { 398 | "end_time": "2019-03-07T10:26:12.714370Z", 399 | "start_time": "2019-03-07T10:26:10.751271Z" 400 | }, 401 | "scrolled": true 402 | }, 403 | "outputs": [ 404 | { 405 | "name": "stderr", 406 | "output_type": "stream", 407 | "text": [ 408 | "/home/tqa/anaconda3/envs/projet_S5/lib/python3.6/site-packages/sklearn/preprocessing/label.py:219: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", 409 | " y = column_or_1d(y, warn=True)\n", 410 | "/home/tqa/anaconda3/envs/projet_S5/lib/python3.6/site-packages/sklearn/preprocessing/label.py:252: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", 411 | " y = column_or_1d(y, warn=True)\n" 412 | ] 413 | }, 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", 418 | " colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,\n", 419 | " max_depth=3, min_child_weight=1, missing=None, n_estimators=100,\n", 420 | " n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,\n", 421 | " reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n", 422 | " silent=True, subsample=1)" 423 | ] 424 | }, 425 | "execution_count": 16, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "# fit model no training data\n", 432 | "model = XGBClassifier()\n", 433 | "model.fit(X_train, y_train)" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 17, 439 | "metadata": { 440 | "ExecuteTime": { 441 | "end_time": "2019-03-07T10:26:12.821883Z", 442 | "start_time": "2019-03-07T10:26:12.715817Z" 443 | } 444 | }, 445 | "outputs": [], 446 | "source": [ 447 | "predict = clf.predict(X_test)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 18, 453 | "metadata": { 454 | "ExecuteTime": { 455 | "end_time": "2019-03-07T10:26:12.894092Z", 456 | "start_time": "2019-03-07T10:26:12.834796Z" 457 | } 458 | }, 459 | "outputs": [], 460 | "source": [ 461 | "predict = model.predict(X_test)" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 15, 467 | "metadata": { 468 | "ExecuteTime": { 469 | "end_time": "2019-03-07T12:35:55.590386Z", 470 | "start_time": "2019-03-07T12:35:55.456304Z" 471 | } 472 | }, 473 | "outputs": [], 474 | "source": [ 475 | "predict = rf.predict(X_test)" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 19, 481 | "metadata": { 482 | "ExecuteTime": { 483 | "end_time": "2019-03-07T12:38:03.267099Z", 484 | "start_time": "2019-03-07T12:38:03.258752Z" 485 | } 486 | }, 487 | "outputs": [], 488 | "source": [ 489 | "b = predict.tolist()" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 20, 495 | "metadata": { 496 | "ExecuteTime": { 497 | "end_time": "2019-03-07T10:26:13.150956Z", 498 | "start_time": "2019-03-07T10:26:12.914204Z" 499 | } 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "a = y_test.flatten().tolist()" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 21, 509 | "metadata": { 510 | "ExecuteTime": { 511 | "end_time": "2019-03-07T10:26:13.808747Z", 512 | "start_time": "2019-03-07T10:26:13.158687Z" 513 | } 514 | }, 515 | "outputs": [ 516 | { 517 | "data": { 518 | "text/plain": [ 519 | "0.5420698924731183" 520 | ] 521 | }, 522 | "execution_count": 21, 523 | "metadata": {}, 524 | "output_type": "execute_result" 525 | } 526 | ], 527 | "source": [ 528 | "sum(i == j for i,j in zip(a,b))/len(a)" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": 21, 534 | "metadata": { 535 | "ExecuteTime": { 536 | "end_time": "2019-03-07T12:38:19.654303Z", 537 | "start_time": "2019-03-07T12:38:19.650975Z" 538 | } 539 | }, 540 | "outputs": [], 541 | "source": [ 542 | "c = np.array(b)" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": 23, 548 | "metadata": { 549 | "ExecuteTime": { 550 | "end_time": "2019-03-07T12:38:24.506514Z", 551 | "start_time": "2019-03-07T12:38:24.486118Z" 552 | } 553 | }, 554 | "outputs": [ 555 | { 556 | "ename": "ValueError", 557 | "evalue": "cannot reshape array of size 15190 into shape (15,31)", 558 | "output_type": "error", 559 | "traceback": [ 560 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 561 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 562 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m//\u001b[0m\u001b[0;36m31\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m31\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 563 | "\u001b[0;31mValueError\u001b[0m: cannot reshape array of size 15190 into shape (15,31)" 564 | ] 565 | } 566 | ], 567 | "source": [ 568 | "d = c.reshape(c.shape[0]//31, 31)" 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": 25, 574 | "metadata": { 575 | "ExecuteTime": { 576 | "end_time": "2019-03-07T10:26:16.351638Z", 577 | "start_time": "2019-03-07T10:26:15.767304Z" 578 | } 579 | }, 580 | "outputs": [], 581 | "source": [ 582 | "e = y_test.reshape(y_test.shape[0]//31, 31)" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 29, 588 | "metadata": { 589 | "ExecuteTime": { 590 | "end_time": "2019-03-07T10:26:17.631733Z", 591 | "start_time": "2019-03-07T10:26:17.554547Z" 592 | } 593 | }, 594 | "outputs": [ 595 | { 596 | "data": { 597 | "text/plain": [ 598 | "0.5420698924731183" 599 | ] 600 | }, 601 | "execution_count": 29, 602 | "metadata": {}, 603 | "output_type": "execute_result" 604 | } 605 | ], 606 | "source": [ 607 | "sum(sum(d == e))/d.size" 608 | ] 609 | } 610 | ], 611 | "metadata": { 612 | "kernelspec": { 613 | "display_name": "projet_S5", 614 | "language": "python", 615 | "name": "projet_s5" 616 | }, 617 | "language_info": { 618 | "codemirror_mode": { 619 | "name": "ipython", 620 | "version": 3 621 | }, 622 | "file_extension": ".py", 623 | "mimetype": "text/x-python", 624 | "name": "python", 625 | "nbconvert_exporter": "python", 626 | "pygments_lexer": "ipython3", 627 | "version": "3.6.7" 628 | }, 629 | "toc": { 630 | "base_numbering": 1, 631 | "nav_menu": {}, 632 | "number_sections": true, 633 | "sideBar": true, 634 | "skip_h1_title": false, 635 | "title_cell": "Table of Contents", 636 | "title_sidebar": "Contents", 637 | "toc_cell": false, 638 | "toc_position": {}, 639 | "toc_section_display": true, 640 | "toc_window_display": false 641 | } 642 | }, 643 | "nbformat": 4, 644 | "nbformat_minor": 2 645 | } 646 | -------------------------------------------------------------------------------- /notebook/[Official] Long Short Term Memory.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-03-10T11:29:30.263351Z", 9 | "start_time": "2019-03-10T11:29:29.331896Z" 10 | }, 11 | "scrolled": true 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "# List all device\n", 16 | "from tensorflow.python.client import device_lib\n", 17 | "# print(device_lib.list_local_devices())" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2019-03-10T11:29:30.600350Z", 26 | "start_time": "2019-03-10T11:29:30.265493Z" 27 | }, 28 | "scrolled": true 29 | }, 30 | "outputs": [ 31 | { 32 | "name": "stderr", 33 | "output_type": "stream", 34 | "text": [ 35 | "Using TensorFlow backend.\n" 36 | ] 37 | }, 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "['/job:localhost/replica:0/task:0/device:GPU:0']" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "# Check available GPU\n", 51 | "from keras import backend as K\n", 52 | "K.tensorflow_backend._get_available_gpus()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2019-03-10T11:29:30.625708Z", 61 | "start_time": "2019-03-10T11:29:30.601959Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "import os\n", 67 | "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n", 68 | "# The GPU id to use, usually either \"0\" or \"1\";\n", 69 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"; " 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2019-03-10T11:29:31.251738Z", 78 | "start_time": "2019-03-10T11:29:30.628052Z" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "# Importing the libraries\n", 84 | "import numpy as np\n", 85 | "import pandas as pd\n", 86 | "from keras.models import Sequential\n", 87 | "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda, GRU, BatchNormalization, Bidirectional\n", 88 | "from keras.preprocessing.sequence import TimeseriesGenerator\n", 89 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 90 | "from keras.activations import softmax\n", 91 | "from keras.optimizers import SGD, RMSprop\n", 92 | "import math\n", 93 | "import pickle\n", 94 | "import matplotlib.pyplot as plt\n", 95 | "from keras.utils import to_categorical\n", 96 | "from sklearn.preprocessing import StandardScaler" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": { 103 | "ExecuteTime": { 104 | "end_time": "2019-03-10T11:29:31.271692Z", 105 | "start_time": "2019-03-10T11:29:31.253924Z" 106 | } 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "index = \"dowjones\"\n", 111 | "index = \"frankfurt\"\n", 112 | "with open(f\"../data/{index}_calculated/periods750_250_240.txt\", \"rb\") as fp: # Unpickling\n", 113 | " dataset = pickle.load(fp)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": { 120 | "ExecuteTime": { 121 | "end_time": "2019-03-10T11:29:31.326427Z", 122 | "start_time": "2019-03-10T11:29:31.273077Z" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "def normalize_data(df):\n", 128 | " \"\"\"normalize a dataframe.\"\"\"\n", 129 | " mean = df.mean(axis=1)\n", 130 | " std = df.std(axis=1)\n", 131 | " df = df.sub(mean, axis=0)\n", 132 | " df = df.div(std, axis=0)\n", 133 | " df = df.values\n", 134 | " return df\n", 135 | "def get_one_hot(targets, nb_classes):\n", 136 | " res = np.eye(nb_classes)[np.array(targets).reshape(-1)]\n", 137 | " return res.reshape(list(targets.shape)+[nb_classes])" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 7, 143 | "metadata": { 144 | "ExecuteTime": { 145 | "end_time": "2019-03-10T11:29:31.390751Z", 146 | "start_time": "2019-03-10T11:29:31.332033Z" 147 | } 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "i = 7\n", 152 | "timestep = 240" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 8, 158 | "metadata": { 159 | "ExecuteTime": { 160 | "end_time": "2019-03-10T11:29:31.491879Z", 161 | "start_time": "2019-03-10T11:29:31.392404Z" 162 | } 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# x_train = dataset[0][i][0]['AMZN'].values * 1000\n", 167 | "# y_train = dataset[0][i][1]['AMZN'].values * 1.0\n", 168 | "# x_test = dataset[1][i][0]['AMZN'].values * 1000\n", 169 | "# y_test = dataset[1][i][1]['AMZN'].values * 1.0\n", 170 | "\n", 171 | "# x_train = dataset[0][i][0].values\n", 172 | "# x_train = (x_train - x_train.mean())/x_train.std()\n", 173 | "# y_train = dataset[0][i][1].values * 1.0\n", 174 | "# x_test = dataset[1][i][0].values\n", 175 | "# x_test = (x_test - x_test.mean())/x_test.std()\n", 176 | "# y_test = dataset[1][i][1].values * 1.0\n", 177 | "\n", 178 | "# x_train = dataset[0][i][0].values * 1000\n", 179 | "# x_test = dataset[1][i][0].values * 1000\n", 180 | "\n", 181 | "x_train = dataset[0][i][0].values\n", 182 | "x_test = dataset[1][i][0].values\n", 183 | "\n", 184 | "scaler = StandardScaler().fit(x_train)\n", 185 | "\n", 186 | "x_train = scaler.transform(x_train)\n", 187 | "x_test = scaler.transform(x_test)\n", 188 | "\n", 189 | "# x_train = normalize_data(dataset[0][i][0])\n", 190 | "# x_test = normalize_data(dataset[1][i][0])\n", 191 | "\n", 192 | "# y_train = get_one_hot(dataset[0][i][1].values, 2) * 1.0\n", 193 | "# y_test = get_one_hot(dataset[1][i][1].values, 2) * 1.0\n", 194 | "y_train = to_categorical(dataset[0][i][1].values, 2)\n", 195 | "y_test = to_categorical(dataset[1][i][1].values, 2)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 9, 201 | "metadata": { 202 | "ExecuteTime": { 203 | "end_time": "2019-03-10T11:29:31.548816Z", 204 | "start_time": "2019-03-10T11:29:31.494231Z" 205 | } 206 | }, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "x train shape: (750, 62)\n", 213 | "y train shape: (750, 62, 2)\n", 214 | "x test shape: (490, 62)\n", 215 | "y test shape: (490, 62, 2)\n" 216 | ] 217 | } 218 | ], 219 | "source": [ 220 | "print(f\"x train shape: {x_train.shape}\")\n", 221 | "print(f\"y train shape: {y_train.shape}\")\n", 222 | "print(f\"x test shape: {x_test.shape}\")\n", 223 | "print(f\"y test shape: {y_test.shape}\")" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 10, 229 | "metadata": { 230 | "ExecuteTime": { 231 | "end_time": "2019-03-10T11:29:31.710273Z", 232 | "start_time": "2019-03-10T11:29:31.553791Z" 233 | } 234 | }, 235 | "outputs": [ 236 | { 237 | "name": "stdout", 238 | "output_type": "stream", 239 | "text": [ 240 | "x shape: (31620, 240)\n", 241 | "y shape: (31620, 2)\n" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "# The second range will be looped first\n", 247 | "# x_series = [x_train[i:i+240] for i in range(750 - 240)]\n", 248 | "# y_series = [y_train[i+240] for i in range(750 - 240)]\n", 249 | "x_series = [x_train[i:i+timestep, j] for i in range(x_train.shape[0] - timestep) for j in range(x_train.shape[1])]\n", 250 | "y_series = [y_train[i+timestep, j] for i in range(y_train.shape[0] - timestep) for j in range(y_train.shape[1])]\n", 251 | "x = np.array(x_series)\n", 252 | "y = np.array(y_series)\n", 253 | "print(f\"x shape: {x.shape}\")\n", 254 | "print(f\"y shape: {y.shape}\")" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 11, 260 | "metadata": { 261 | "ExecuteTime": { 262 | "end_time": "2019-03-10T11:29:31.714291Z", 263 | "start_time": "2019-03-10T11:29:31.711668Z" 264 | } 265 | }, 266 | "outputs": [ 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "x shape: (31620, 240, 1)\n" 272 | ] 273 | } 274 | ], 275 | "source": [ 276 | "x = np.reshape(x, (x.shape[0], x.shape[1], 1))\n", 277 | "print(f\"x shape: {x.shape}\")\n" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 12, 283 | "metadata": { 284 | "ExecuteTime": { 285 | "end_time": "2019-03-10T11:29:32.910595Z", 286 | "start_time": "2019-03-10T11:29:31.715759Z" 287 | } 288 | }, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "_________________________________________________________________\n", 295 | "Layer (type) Output Shape Param # \n", 296 | "=================================================================\n", 297 | "lstm_1 (LSTM) (None, 240, 25) 2700 \n", 298 | "_________________________________________________________________\n", 299 | "lstm_2 (LSTM) (None, 240, 100) 50400 \n", 300 | "_________________________________________________________________\n", 301 | "lstm_3 (LSTM) (None, 240, 100) 80400 \n", 302 | "_________________________________________________________________\n", 303 | "lstm_4 (LSTM) (None, 100) 80400 \n", 304 | "_________________________________________________________________\n", 305 | "dense_1 (Dense) (None, 2) 202 \n", 306 | "=================================================================\n", 307 | "Total params: 214,102\n", 308 | "Trainable params: 214,102\n", 309 | "Non-trainable params: 0\n", 310 | "_________________________________________________________________\n" 311 | ] 312 | } 313 | ], 314 | "source": [ 315 | "dropout_rate = 0.1\n", 316 | "# expected input data shape: (batch_size, timesteps, data_dim)\n", 317 | "regressor = Sequential()\n", 318 | "\n", 319 | "# regressor.add(Bidirectional(LSTM(units=25, input_shape=(timestep, 1), dropout=dropout_rate)))\n", 320 | "regressor.add(LSTM(units=25, input_shape=(timestep, 1), return_sequences = True,dropout=dropout_rate))\n", 321 | "regressor.add(LSTM(units=100, return_sequences = True,dropout=dropout_rate))\n", 322 | "regressor.add(LSTM(units=100, return_sequences = True,dropout=dropout_rate))\n", 323 | "regressor.add(LSTM(units=100, input_shape=(timestep, 1), dropout=dropout_rate))\n", 324 | "# regressor.add(LSTM(units=25, batch_input_shape=(527, timestep, 1), dropout=dropout_rate, stateful=False))\n", 325 | "# regressor.add(LSTM(units=25, batch_input_shape=(527, timestep, 1), dropout=dropout_rate))\n", 326 | "# regressor.add(LSTM(units=25, return_sequences = True,dropout=dropout_rate, stateful=False))\n", 327 | "# regressor.add(LSTM(units=25, return_sequences = True,dropout=dropout_rate, stateful=False))\n", 328 | "# regressor.add(LSTM(units=25, dropout=dropout_rate, stateful=False))\n", 329 | "# regressor.add(LSTM(units=25, input_shape=(timestep, 1), dropout=dropout_rate))\n", 330 | "# regressor.add(GRU(units=25, input_shape=(timestep, 1), dropout=dropout_rate))\n", 331 | "# regressor.add(Dense(100, input_shape=(timestep, ), activation='relu'))\n", 332 | "# regressor.add(Dense(100, activation='relu'))\n", 333 | "regressor.add(Dense(2, activation='softmax'))\n", 334 | "regressor.compile(loss='binary_crossentropy',\n", 335 | " optimizer='rmsprop',\n", 336 | " metrics=['accuracy'])\n", 337 | "regressor.summary()" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 13, 343 | "metadata": { 344 | "ExecuteTime": { 345 | "end_time": "2019-03-10T11:36:27.480440Z", 346 | "start_time": "2019-03-10T11:29:32.912147Z" 347 | }, 348 | "scrolled": true 349 | }, 350 | "outputs": [ 351 | { 352 | "name": "stdout", 353 | "output_type": "stream", 354 | "text": [ 355 | "Train on 25296 samples, validate on 6324 samples\n", 356 | "Epoch 1/1000\n", 357 | "25296/25296 [==============================] - 52s 2ms/step - loss: 0.6930 - acc: 0.5104 - val_loss: 0.6935 - val_acc: 0.5016\n", 358 | "Epoch 2/1000\n", 359 | "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5145 - val_loss: 0.6933 - val_acc: 0.5016\n", 360 | "Epoch 3/1000\n", 361 | "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6929 - acc: 0.5144 - val_loss: 0.6934 - val_acc: 0.5016\n", 362 | "Epoch 4/1000\n", 363 | "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6927 - acc: 0.5142 - val_loss: 0.6932 - val_acc: 0.5022\n", 364 | "Epoch 5/1000\n", 365 | "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5147 - val_loss: 0.6932 - val_acc: 0.5019\n", 366 | "Epoch 6/1000\n", 367 | "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5146 - val_loss: 0.6937 - val_acc: 0.5016\n", 368 | "Epoch 7/1000\n", 369 | "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5140 - val_loss: 0.6936 - val_acc: 0.5016\n", 370 | "Epoch 8/1000\n", 371 | "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5136 - val_loss: 0.6932 - val_acc: 0.5016\n", 372 | "Epoch 9/1000\n", 373 | " 1581/25296 [>.............................] - ETA: 47s - loss: 0.6928 - acc: 0.5155" 374 | ] 375 | }, 376 | { 377 | "ename": "KeyboardInterrupt", 378 | "evalue": "", 379 | "output_type": "error", 380 | "traceback": [ 381 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 382 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 383 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m result = regressor.fit(x, y, epochs=1000,batch_size=527, validation_split=0.2, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=200),\n\u001b[0;32m----> 4\u001b[0;31m ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;31m# regressor.fit(x, y, epochs=1000,batch_size=500, validation_split=0.2, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=20),\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 384 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[1;32m 1037\u001b[0m \u001b[0minitial_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minitial_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1038\u001b[0m \u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1039\u001b[0;31m validation_steps=validation_steps)\n\u001b[0m\u001b[1;32m 1040\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1041\u001b[0m def evaluate(self, x=None, y=None,\n", 385 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training_arrays.py\u001b[0m in \u001b[0;36mfit_loop\u001b[0;34m(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 199\u001b[0;31m \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 200\u001b[0m \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout_labels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 386 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 2713\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_legacy_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2714\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2715\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2716\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2717\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpy_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 387 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 2673\u001b[0m \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_metadata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2674\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2675\u001b[0;31m \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2676\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfetched\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 388 | "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1437\u001b[0m ret = tf_session.TF_SessionRunCallable(\n\u001b[1;32m 1438\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1439\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 1440\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1441\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 389 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 390 | ] 391 | } 392 | ], 393 | "source": [ 394 | "# result = regressor.fit(x, y, epochs=1000,batch_size=527, validation_split=0.2, shuffle=False, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=200),\n", 395 | "# ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\n", 396 | "result = regressor.fit(x, y, epochs=1000,batch_size=527, validation_split=0.2, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=200),\n", 397 | " ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\n", 398 | "# regressor.fit(x, y, epochs=1000,batch_size=500, validation_split=0.2, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=20),\n", 399 | "# ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\n" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": { 406 | "ExecuteTime": { 407 | "end_time": "2019-03-10T11:36:27.483294Z", 408 | "start_time": "2019-03-10T11:29:29.366Z" 409 | } 410 | }, 411 | "outputs": [], 412 | "source": [ 413 | "plt.plot(result.history[\"val_acc\"])\n", 414 | "plt.plot(result.history[\"acc\"])" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": { 421 | "ExecuteTime": { 422 | "end_time": "2019-03-10T11:36:27.484346Z", 423 | "start_time": "2019-03-10T11:29:29.368Z" 424 | } 425 | }, 426 | "outputs": [], 427 | "source": [ 428 | "plt.plot(result.history[\"val_loss\"])\n", 429 | "plt.plot(result.history[\"loss\"])" 430 | ] 431 | } 432 | ], 433 | "metadata": { 434 | "kernelspec": { 435 | "display_name": "projet_S5", 436 | "language": "python", 437 | "name": "projet_s5" 438 | }, 439 | "language_info": { 440 | "codemirror_mode": { 441 | "name": "ipython", 442 | "version": 3 443 | }, 444 | "file_extension": ".py", 445 | "mimetype": "text/x-python", 446 | "name": "python", 447 | "nbconvert_exporter": "python", 448 | "pygments_lexer": "ipython3", 449 | "version": "3.6.8" 450 | }, 451 | "toc": { 452 | "base_numbering": 1, 453 | "nav_menu": {}, 454 | "number_sections": true, 455 | "sideBar": true, 456 | "skip_h1_title": false, 457 | "title_cell": "Table of Contents", 458 | "title_sidebar": "Contents", 459 | "toc_cell": false, 460 | "toc_position": {}, 461 | "toc_section_display": true, 462 | "toc_window_display": false 463 | } 464 | }, 465 | "nbformat": 4, 466 | "nbformat_minor": 2 467 | } 468 | -------------------------------------------------------------------------------- /notebook/[Official] [Trading Algorithm - Baseline 1] Equally weighted portfolio.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-03-07T10:37:49.257364Z", 9 | "start_time": "2019-03-07T10:37:49.057116Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import numpy as np\n", 15 | "import pandas as pd\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "ExecuteTime": { 23 | "end_time": "2019-03-07T10:37:49.847010Z", 24 | "start_time": "2019-03-07T10:37:49.259067Z" 25 | } 26 | }, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | "
NameAABAAAPLAMZNAXPBACATCSCOCVXDISGE...MSFTNKEPFEPGTRVUNHUTXVZWMTXOM
Date
2006-01-0340.9110.6847.5852.5870.4457.8017.4559.0824.4035.37...26.8410.7423.7858.7845.9961.7356.5330.3846.2358.47
2006-01-0440.9710.7147.2551.9571.1759.2717.8558.9123.9935.32...26.9710.6924.5558.8946.5061.8856.1931.2746.3258.57
2006-01-0541.5310.6347.6552.5070.3359.2718.3558.1924.4135.23...26.9910.7624.5858.7046.9561.6955.9831.6345.6958.28
2006-01-0643.2110.9047.8752.6869.3560.4518.7759.2524.7435.47...26.9110.7224.8558.6447.2162.9056.1631.3545.8859.43
2006-01-0943.4210.8647.0853.9968.7761.5519.0658.9525.0035.38...26.8610.8824.8559.0847.2361.4056.8031.4845.7159.40
\n", 219 | "

5 rows × 31 columns

\n", 220 | "
" 221 | ], 222 | "text/plain": [ 223 | "Name AABA AAPL AMZN AXP BA CAT CSCO CVX DIS \\\n", 224 | "Date \n", 225 | "2006-01-03 40.91 10.68 47.58 52.58 70.44 57.80 17.45 59.08 24.40 \n", 226 | "2006-01-04 40.97 10.71 47.25 51.95 71.17 59.27 17.85 58.91 23.99 \n", 227 | "2006-01-05 41.53 10.63 47.65 52.50 70.33 59.27 18.35 58.19 24.41 \n", 228 | "2006-01-06 43.21 10.90 47.87 52.68 69.35 60.45 18.77 59.25 24.74 \n", 229 | "2006-01-09 43.42 10.86 47.08 53.99 68.77 61.55 19.06 58.95 25.00 \n", 230 | "\n", 231 | "Name GE ... MSFT NKE PFE PG TRV UNH UTX \\\n", 232 | "Date ... \n", 233 | "2006-01-03 35.37 ... 26.84 10.74 23.78 58.78 45.99 61.73 56.53 \n", 234 | "2006-01-04 35.32 ... 26.97 10.69 24.55 58.89 46.50 61.88 56.19 \n", 235 | "2006-01-05 35.23 ... 26.99 10.76 24.58 58.70 46.95 61.69 55.98 \n", 236 | "2006-01-06 35.47 ... 26.91 10.72 24.85 58.64 47.21 62.90 56.16 \n", 237 | "2006-01-09 35.38 ... 26.86 10.88 24.85 59.08 47.23 61.40 56.80 \n", 238 | "\n", 239 | "Name VZ WMT XOM \n", 240 | "Date \n", 241 | "2006-01-03 30.38 46.23 58.47 \n", 242 | "2006-01-04 31.27 46.32 58.57 \n", 243 | "2006-01-05 31.63 45.69 58.28 \n", 244 | "2006-01-06 31.35 45.88 59.43 \n", 245 | "2006-01-09 31.48 45.71 59.40 \n", 246 | "\n", 247 | "[5 rows x 31 columns]" 248 | ] 249 | }, 250 | "execution_count": 2, 251 | "metadata": {}, 252 | "output_type": "execute_result" 253 | } 254 | ], 255 | "source": [ 256 | "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n", 257 | "stocks = stocks[[\"Close\", \"Name\"]]\n", 258 | "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n", 259 | "stocks.head()" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 3, 265 | "metadata": { 266 | "ExecuteTime": { 267 | "end_time": "2019-03-07T10:37:49.909612Z", 268 | "start_time": "2019-03-07T10:37:49.848989Z" 269 | }, 270 | "scrolled": true 271 | }, 272 | "outputs": [ 273 | { 274 | "data": { 275 | "text/html": [ 276 | "
\n", 277 | "\n", 290 | "\n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | "
NameAABAAAPLAMZNAXPBACATCSCOCVXDISGE...MSFTNKEPFEPGTRVUNHUTXVZWMTXOM
Date
2006-01-041.0014671.0028090.9930640.9880181.0103631.0254331.0229230.9971230.9831970.998586...1.0048440.9953451.0323801.0018711.0110891.0024300.9939851.0292961.0019471.001710
2006-01-051.0136690.9925301.0084661.0105870.9881971.0000001.0280110.9877781.0175070.997452...1.0007421.0065481.0012220.9967741.0096770.9969300.9962631.0115130.9863990.995049
2006-01-061.0404531.0254001.0046171.0034290.9860661.0199091.0228881.0182161.0135191.006812...0.9970360.9962831.0109850.9989781.0055381.0196141.0032150.9911481.0041581.019732
2006-01-091.0048600.9963300.9834971.0248670.9916371.0181971.0154500.9949371.0105090.997463...0.9981421.0149251.0000001.0075031.0004240.9761531.0113961.0041470.9962950.999495
2006-01-100.9898661.0635360.9696260.9970361.0047990.9959380.9952781.0049191.0128000.994630...1.0052121.0018380.9835010.9961070.9928011.0197071.0007041.0041301.0032821.007744
\n", 464 | "

5 rows × 31 columns

\n", 465 | "
" 466 | ], 467 | "text/plain": [ 468 | "Name AABA AAPL AMZN AXP BA CAT \\\n", 469 | "Date \n", 470 | "2006-01-04 1.001467 1.002809 0.993064 0.988018 1.010363 1.025433 \n", 471 | "2006-01-05 1.013669 0.992530 1.008466 1.010587 0.988197 1.000000 \n", 472 | "2006-01-06 1.040453 1.025400 1.004617 1.003429 0.986066 1.019909 \n", 473 | "2006-01-09 1.004860 0.996330 0.983497 1.024867 0.991637 1.018197 \n", 474 | "2006-01-10 0.989866 1.063536 0.969626 0.997036 1.004799 0.995938 \n", 475 | "\n", 476 | "Name CSCO CVX DIS GE ... MSFT \\\n", 477 | "Date ... \n", 478 | "2006-01-04 1.022923 0.997123 0.983197 0.998586 ... 1.004844 \n", 479 | "2006-01-05 1.028011 0.987778 1.017507 0.997452 ... 1.000742 \n", 480 | "2006-01-06 1.022888 1.018216 1.013519 1.006812 ... 0.997036 \n", 481 | "2006-01-09 1.015450 0.994937 1.010509 0.997463 ... 0.998142 \n", 482 | "2006-01-10 0.995278 1.004919 1.012800 0.994630 ... 1.005212 \n", 483 | "\n", 484 | "Name NKE PFE PG TRV UNH UTX \\\n", 485 | "Date \n", 486 | "2006-01-04 0.995345 1.032380 1.001871 1.011089 1.002430 0.993985 \n", 487 | "2006-01-05 1.006548 1.001222 0.996774 1.009677 0.996930 0.996263 \n", 488 | "2006-01-06 0.996283 1.010985 0.998978 1.005538 1.019614 1.003215 \n", 489 | "2006-01-09 1.014925 1.000000 1.007503 1.000424 0.976153 1.011396 \n", 490 | "2006-01-10 1.001838 0.983501 0.996107 0.992801 1.019707 1.000704 \n", 491 | "\n", 492 | "Name VZ WMT XOM \n", 493 | "Date \n", 494 | "2006-01-04 1.029296 1.001947 1.001710 \n", 495 | "2006-01-05 1.011513 0.986399 0.995049 \n", 496 | "2006-01-06 0.991148 1.004158 1.019732 \n", 497 | "2006-01-09 1.004147 0.996295 0.999495 \n", 498 | "2006-01-10 1.004130 1.003282 1.007744 \n", 499 | "\n", 500 | "[5 rows x 31 columns]" 501 | ] 502 | }, 503 | "execution_count": 3, 504 | "metadata": {}, 505 | "output_type": "execute_result" 506 | } 507 | ], 508 | "source": [ 509 | "real_returns = stocks / stocks.shift(1)\n", 510 | "real_returns = real_returns.dropna()\n", 511 | "real_returns.head()" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": {}, 517 | "source": [ 518 | "Construct an equally weighted portfolio at the beginning, no modification" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": 6, 524 | "metadata": { 525 | "ExecuteTime": { 526 | "end_time": "2019-03-07T10:38:05.942847Z", 527 | "start_time": "2019-03-07T10:38:05.923858Z" 528 | } 529 | }, 530 | "outputs": [ 531 | { 532 | "data": { 533 | "text/plain": [ 534 | "Name\n", 535 | "AABA 1.703652\n", 536 | "AAPL 15.553472\n", 537 | "AMZN 24.930303\n", 538 | "AXP 1.857342\n", 539 | "BA 4.167905\n", 540 | "CAT 2.612698\n", 541 | "CSCO 2.204494\n", 542 | "CVX 2.084414\n", 543 | "DIS 4.404010\n", 544 | "GE 0.499659\n", 545 | "GOOGL 4.782169\n", 546 | "GS 1.907548\n", 547 | "HD 4.535423\n", 548 | "IBM 1.844540\n", 549 | "INTC 1.776532\n", 550 | "JNJ 2.264994\n", 551 | "JPM 2.656100\n", 552 | "KO 2.294732\n", 553 | "MCD 5.036862\n", 554 | "MMM 2.895334\n", 555 | "MRK 1.708513\n", 556 | "MSFT 3.174303\n", 557 | "NKE 5.736704\n", 558 | "PFE 1.550533\n", 559 | "PG 1.553785\n", 560 | "TRV 2.982429\n", 561 | "UNH 3.429493\n", 562 | "UTX 2.132306\n", 563 | "VZ 1.701935\n", 564 | "WMT 2.106818\n", 565 | "XOM 1.406517\n", 566 | "dtype: float64" 567 | ] 568 | }, 569 | "execution_count": 6, 570 | "metadata": {}, 571 | "output_type": "execute_result" 572 | } 573 | ], 574 | "source": [ 575 | "real_returns.product()" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 4, 581 | "metadata": { 582 | "ExecuteTime": { 583 | "end_time": "2019-03-07T10:37:49.946281Z", 584 | "start_time": "2019-03-07T10:37:49.911687Z" 585 | } 586 | }, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "3.790178028501573" 592 | ] 593 | }, 594 | "execution_count": 4, 595 | "metadata": {}, 596 | "output_type": "execute_result" 597 | } 598 | ], 599 | "source": [ 600 | "real_returns.product().mean()" 601 | ] 602 | }, 603 | { 604 | "cell_type": "markdown", 605 | "metadata": {}, 606 | "source": [ 607 | "Equally weighted portfolio" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": 5, 613 | "metadata": { 614 | "ExecuteTime": { 615 | "end_time": "2019-03-07T10:37:50.013158Z", 616 | "start_time": "2019-03-07T10:37:49.949700Z" 617 | } 618 | }, 619 | "outputs": [ 620 | { 621 | "data": { 622 | "text/plain": [ 623 | "3.5255573996595255" 624 | ] 625 | }, 626 | "execution_count": 5, 627 | "metadata": {}, 628 | "output_type": "execute_result" 629 | } 630 | ], 631 | "source": [ 632 | "real_returns.mean(axis = 1).product()" 633 | ] 634 | } 635 | ], 636 | "metadata": { 637 | "kernelspec": { 638 | "display_name": "projet_S5", 639 | "language": "python", 640 | "name": "projet_s5" 641 | }, 642 | "language_info": { 643 | "codemirror_mode": { 644 | "name": "ipython", 645 | "version": 3 646 | }, 647 | "file_extension": ".py", 648 | "mimetype": "text/x-python", 649 | "name": "python", 650 | "nbconvert_exporter": "python", 651 | "pygments_lexer": "ipython3", 652 | "version": "3.6.8" 653 | }, 654 | "toc": { 655 | "base_numbering": 1, 656 | "nav_menu": {}, 657 | "number_sections": true, 658 | "sideBar": true, 659 | "skip_h1_title": false, 660 | "title_cell": "Table of Contents", 661 | "title_sidebar": "Contents", 662 | "toc_cell": false, 663 | "toc_position": {}, 664 | "toc_section_display": true, 665 | "toc_window_display": false 666 | } 667 | }, 668 | "nbformat": 4, 669 | "nbformat_minor": 2 670 | } 671 | -------------------------------------------------------------------------------- /notebook/best_model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tqa236/LSTM_algo_trading/ddef49af393069df2ec1dbd3843fed79e65ba141/notebook/best_model.h5 -------------------------------------------------------------------------------- /src/calculate_returns.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Calculate returns and labels.""" 4 | 5 | import argparse 6 | 7 | import pandas as pd 8 | 9 | from utils import (calculate_absolute_class, calculate_class, 10 | calculate_log_returns, calculate_returns) 11 | 12 | 13 | def main(): 14 | """Run main program.""" 15 | index = "dowjones" 16 | index = "frankfurt" 17 | parser = argparse.ArgumentParser( 18 | description="Parse arguments for models.") 19 | parser.add_argument( 20 | "--indir", help="Dataset directory.", 21 | default="../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv") 22 | 23 | parser.add_argument('--outdir', help='Model directory.', 24 | default="../model/dowjones/sample.csv") 25 | # args = parser.parse_args() 26 | # dataset = pd.read_csv(args.indir, 27 | # index_col='Date', 28 | # parse_dates=['Date']) 29 | dataset = pd.read_csv(f"../data/frankfurt_calculated/stocks.csv", 30 | index_col='Date', 31 | parse_dates=['Date']) 32 | returns = calculate_returns(dataset) 33 | log_returns = calculate_log_returns(dataset) 34 | labels = calculate_class(returns) 35 | absolute_labels = calculate_absolute_class(returns) 36 | log_labels = calculate_class(log_returns) 37 | absolute_log_labels = calculate_absolute_class(log_returns) 38 | # returns = (returns - returns.mean()) / returns.std() 39 | print(f"Returns shape: {returns.shape}") 40 | print(f"Labels shape: {labels.shape}") 41 | returns.to_csv(f"../data/{index}_calculated/returns1.csv") 42 | labels.to_csv(f"../data/{index}_calculated/labels1.csv") 43 | absolute_labels.to_csv(f"../data/{index}_calculated/absolute_labels1.csv") 44 | log_returns.to_csv(f"../data/{index}_calculated/log_returns1.csv") 45 | log_labels.to_csv(f"../data/{index}_calculated/log_labels1.csv") 46 | absolute_log_labels.to_csv( 47 | f"../data/{index}_calculated/absolute_log_labels1.csv") 48 | 49 | print("Done.") 50 | return 0 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /src/divide_period.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Divide the data into period.""" 4 | 5 | import argparse 6 | import pickle 7 | 8 | import pandas as pd 9 | 10 | 11 | def divide_period(returns, labels, train_length=750, test_length=250, 12 | timesteps=240): 13 | """Divide the data into period.""" 14 | num_period = int((len(labels) - train_length) / test_length) 15 | trains = [(returns[test_length * i: train_length + test_length * i], 16 | labels[test_length * i: train_length + test_length * i]) 17 | for i in range(num_period)] 18 | tests = [(returns[train_length - timesteps + test_length * i: 19 | train_length + test_length * (i + 1)], 20 | labels[train_length - timesteps + test_length * i: 21 | train_length + test_length * (i + 1)]) 22 | for i in range(num_period)] 23 | 24 | return (trains, tests) 25 | 26 | 27 | def main(): 28 | """Run main program.""" 29 | train_length = 750 30 | test_length = 250 31 | timesteps = 240 32 | index = "dowjones" 33 | index = "frankfurt" 34 | parser = argparse.ArgumentParser( 35 | description="Parse arguments for models.") 36 | parser.add_argument("--returns", help="Dataset directory.", 37 | default=f"../data/{index}_calculated/returns1.csv") 38 | parser.add_argument( 39 | "--labels", help="Dataset directory.", 40 | default=f"../data/{index}_calculated/labels1.csv") 41 | parser.add_argument('--outdir', help='Model directory.', 42 | default=f"../data/{index}_calculated/periods" 43 | f"{train_length}_{test_length}_{timesteps}.txt") 44 | 45 | args = parser.parse_args() 46 | returns = pd.read_csv(args.returns, index_col='Date', 47 | parse_dates=['Date']) 48 | labels = pd.read_csv(args.labels, index_col='Date', 49 | parse_dates=['Date']) 50 | 51 | periods = divide_period( 52 | returns, labels, train_length, test_length, timesteps) 53 | print("Training set") 54 | print(f"Returns shape for 1 period: {periods[0][0][0].shape}") 55 | print(f"Labels shape for 1 period: {periods[0][0][1].shape}") 56 | print("Test set") 57 | print(f"Returns shape for 1 period: {periods[1][0][0].shape}") 58 | print(f"Labels shape for 1 period: {periods[1][0][1].shape}") 59 | 60 | with open(args.outdir, "wb") as file: 61 | pickle.dump(periods, file) 62 | print("Done.") 63 | return 0 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /src/make_dataframe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Calculate returns and labels.""" 4 | 5 | import argparse 6 | 7 | import pandas as pd 8 | 9 | 10 | def main(): 11 | """Run main program.""" 12 | parser = argparse.ArgumentParser( 13 | description="Parse arguments for models.") 14 | parser.add_argument( 15 | "--indir", help="Dataset directory.", 16 | default="../data/frankfurt/FSE_metadata.csv") 17 | 18 | parser.add_argument('--outdir', help='Model directory.', 19 | default="../data/frankfurt_calculated/stocks.csv") 20 | args = parser.parse_args() 21 | tickers = pd.read_csv(args.indir) 22 | choose_from = tickers["from_date"] < "2001-01-01" 23 | choose_to = tickers["to_date"] > "2018-01-01" 24 | tickers = tickers[choose_from & choose_to] 25 | stock = pd.read_csv('../data/frankfurt/stocks/AAD_X.csv', 26 | index_col='Date', parse_dates=['Date']) 27 | stocks = pd.DataFrame(index=stock.index) 28 | stocks = stocks.loc['2001-01-01':'2018-01-01'] 29 | for ticker in tickers.code: 30 | stock = pd.read_csv(f'../data/frankfurt/stocks/{ticker}.csv', 31 | index_col='Date', parse_dates=['Date']) 32 | stocks[ticker] = stock["Close"].loc['2001-01-01':'2018-01-01'] 33 | stocks = stocks.dropna(axis=1) 34 | print(f"Stocks shape: {stocks.shape}") 35 | stocks.to_csv(args.outdir) 36 | 37 | print("Done.") 38 | return 0 39 | 40 | 41 | if __name__ == "__main__": 42 | main() 43 | -------------------------------------------------------------------------------- /src/make_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Make dataset.""" 4 | import pandas as pd 5 | 6 | import quandl 7 | 8 | 9 | def download_data(metadata_df, api=None): 10 | """Download data from Quandl.""" 11 | for ticker in metadata_df["code"]: 12 | print(ticker) 13 | # try: 14 | symbol = "FSE/" + ticker 15 | quandl.ApiConfig.api_key = api 16 | mydata = quandl.get(symbol) 17 | mydata.to_csv("../data/frankfurt/stocks_tmp/" + ticker + ".csv") 18 | # except: 19 | # pass 20 | 21 | 22 | def main(): 23 | """Run main program.""" 24 | metadata_df = pd.read_csv("../data/frankfurt/FSE_metadata.csv") 25 | api = pd.read_csv("../data/personal_data/quandl_API.txt", 26 | header=None)[0][0] 27 | download_data(metadata_df, api) 28 | return 0 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /src/random_forest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Train an test a random forest model.""" 4 | 5 | import argparse 6 | import pickle 7 | 8 | import numpy as np 9 | from sklearn.ensemble import RandomForestClassifier 10 | 11 | from utils import generate_time_series_sample, normalize_data 12 | 13 | 14 | def train(dataset, model_name, timestep=20): 15 | """Train an LSTM model.""" 16 | positions = [] 17 | for i in range(len(dataset[0])): 18 | # model_period = f"{model_name}_period{i}.h5" 19 | 20 | x_train, y_train = generate_time_series_sample( 21 | normalize_data(dataset[0][i][0]), 22 | dataset[0][i][1].values, timestep) 23 | 24 | x_test, y_test = generate_time_series_sample( 25 | normalize_data(dataset[1][i][0]), 26 | dataset[1][i][1].values, timestep) 27 | 28 | x_train = x_train.transpose((0, 2, 1)) 29 | x_train = np.reshape( 30 | x_train, (x_train.shape[0] * x_train.shape[1], timestep)) 31 | y_train = np.reshape(y_train, (y_train.shape[0] * y_train.shape[1])) 32 | 33 | x_test = x_test.transpose((0, 2, 1)) 34 | x_test = np.reshape( 35 | x_test, (x_test.shape[0] * x_test.shape[1], timestep)) 36 | y_test = np.reshape(y_test, (y_test.shape[0] * y_test.shape[1])) 37 | print(f"x train shape: {x_train.shape}") 38 | print(f"y train shape: {y_train.shape}") 39 | print(f"x test shape: {x_test.shape}") 40 | print(f"y test shape: {y_test.shape}") 41 | 42 | clf = RandomForestClassifier(n_jobs=2, random_state=0, max_depth=5) 43 | clf.fit(x_train, y_train) 44 | predict = clf.predict(x_test) 45 | predict = predict.reshape(predict.shape[0] // 31, 31)[-250:] 46 | position = dataset[1][i][1].values[-250:, :] 47 | result = sum(sum(predict == position)) / predict.size 48 | 49 | predict1 = clf.predict(x_test) 50 | predict1 = predict1.reshape(predict1.shape[0] // 31, 31)[-300:-250] 51 | position1 = dataset[1][i][1].values[-300:-250, :] 52 | result1 = sum(sum(predict1 == position1)) / predict1.size 53 | 54 | positions.append(predict) 55 | print(result) 56 | print(result1) 57 | all_positions = np.concatenate(positions, axis=0) 58 | print(all_positions.shape) 59 | 60 | 61 | def main(): 62 | """Run main program.""" 63 | parser = argparse.ArgumentParser( 64 | description="Parse arguments for models.") 65 | parser.add_argument("--dataset", help="Dataset directory.", 66 | default="../data/dowjones_calculated/periods.txt") 67 | parser.add_argument('--outdir', help='Model directory.', 68 | default='../model/LSTM/my_model1') 69 | args = parser.parse_args() 70 | 71 | with open(args.dataset, "rb") as file: # Unpickling 72 | dataset = pickle.load(file) 73 | train(dataset, args.outdir, 10) 74 | 75 | print("Done.") 76 | return 0 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /src/random_strategy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Create a random strategy to pick the stocks.""" 4 | 5 | import argparse 6 | import time 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | from utils import calculate_returns, long_short_postion 12 | 13 | 14 | def generate_random_strategy(returns): 15 | """Generate a random probability tha"t a stock will beat the market.""" 16 | probabilities = returns 17 | probabilities = probabilities.apply( 18 | lambda x: [np.random.rand() for i in x], 19 | axis=1, result_type='broadcast') 20 | return probabilities 21 | 22 | 23 | def calculate_random_returns(returns, k=10, start=750, end=3000): 24 | """Calculate the returns of a random trading strategy.""" 25 | probabilities = generate_random_strategy(returns) 26 | positions = probabilities 27 | positions.apply(lambda x: long_short_postion( 28 | x, k), axis=1, result_type='broadcast') 29 | random_returns = returns.mul(positions) 30 | random_returns = random_returns[start:end] 31 | no_rebalance = (random_returns + 1).product().sum() / (2 * k) 32 | rebalance = (1 + random_returns.sum(axis=1) / (2 * k)).product() 33 | return [no_rebalance, rebalance] 34 | 35 | 36 | def random_trading(returns, k=10, start=750, end=3000, times=1): 37 | """Make a list of random trading returns.""" 38 | return [calculate_random_returns( 39 | returns, k, start, end) for i in range(times)] 40 | 41 | 42 | def main(): 43 | """Run main program.""" 44 | start = time.time() 45 | parser = argparse.ArgumentParser( 46 | description="Parse arguments for models.") 47 | parser.add_argument( 48 | "--indir", help="Dataset directory.", 49 | default="../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv") 50 | 51 | parser.add_argument('--outdir', help='Model directory.', 52 | default="../data/dowjones_calculated/rebalance.csv") 53 | args = parser.parse_args() 54 | dataset = pd.read_csv(args.indir, index_col='Date', 55 | parse_dates=['Date']) 56 | returns = calculate_returns(dataset) 57 | times = 1000 58 | results = random_trading(returns, times=times) 59 | pd.DataFrame(data=results).to_csv( 60 | f"../data/dowjones_calculated/random_trading_{times}times.csv", 61 | sep=',', index=False, header=["No Rebalance", "Rebalance"]) 62 | print("Done.") 63 | end = time.time() 64 | print(end - start) 65 | return 0 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Train a LSTM model.""" 4 | 5 | import argparse 6 | import os 7 | import pickle 8 | 9 | import numpy as np 10 | from sklearn.preprocessing import StandardScaler 11 | 12 | from keras.callbacks import EarlyStopping, ModelCheckpoint 13 | from keras.layers import LSTM, Dense, Reshape 14 | from keras.models import Sequential 15 | from keras.utils import to_categorical 16 | 17 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 18 | # The GPU id to use, usually either "0" or "1"; 19 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 20 | 21 | 22 | def get_one_hot(targets, nb_classes): 23 | """Convert class array to one hot vector.""" 24 | res = np.eye(nb_classes)[np.array(targets).reshape(-1)] 25 | return res.reshape(list(targets.shape) + [nb_classes]) 26 | 27 | 28 | def train_one_feature(dataset, model_name, timestep=240, feature=31, 29 | dropout_level=0.1): 30 | """Train an LSTM model with 1 feature.""" 31 | for i in range(len(dataset[0])): 32 | model_period = f"{model_name}_1feature_period{i}.h5" 33 | x_train = dataset[0][i][0].values 34 | scaler = StandardScaler().fit(x_train) 35 | x_train = scaler.transform(x_train) 36 | y_train = to_categorical(dataset[0][i][1].values, 2) 37 | 38 | print(f"Period {i}") 39 | print(f"x train shape: {x_train.shape}") 40 | print(f"y train shape: {y_train.shape}") 41 | x_series = [x_train[i:i + timestep, j] 42 | for i in range(x_train.shape[0] - timestep) 43 | for j in range(feature)] 44 | y_series = [y_train[i + timestep, j] 45 | for i in range(y_train.shape[0] - timestep) 46 | for j in range(feature)] 47 | x_final = np.array(x_series) 48 | y_final = np.array(y_series) 49 | x_final = np.reshape(x_final, (x_final.shape[0], x_final.shape[1], 1)) 50 | print(f"x_final shape: {x_final.shape}") 51 | print(f"y_final shape: {y_final.shape}") 52 | 53 | # expected input data shape: (batch_size, timesteps, data_dim) 54 | regressor = Sequential() 55 | regressor.add(LSTM(units=25, input_shape=(timestep, 1), 56 | dropout=dropout_level, 57 | recurrent_dropout=dropout_level)) 58 | regressor.add(Dense(2, activation='softmax')) 59 | regressor.compile(loss='binary_crossentropy', 60 | optimizer='rmsprop', 61 | metrics=['accuracy']) 62 | regressor.summary() 63 | 64 | regressor.fit(x_final, y_final, batch_size=1000, epochs=1000, 65 | validation_split=0.2, 66 | callbacks=[EarlyStopping(monitor='val_loss', 67 | mode='min', patience=10), 68 | ModelCheckpoint(filepath=model_period, 69 | monitor='val_acc', 70 | save_best_only=True)]) 71 | 72 | 73 | def train(dataset, model_name, timestep=240, feature=31, dropout_level=0.1): 74 | """Train an LSTM model.""" 75 | for i in range(len(dataset[0])): 76 | model_period = f"{model_name}_period{i}.h5" 77 | x_train = dataset[0][i][0].values 78 | scaler = StandardScaler().fit(x_train) 79 | x_train = scaler.transform(x_train) 80 | y_train = to_categorical(dataset[0][i][1].values, 2) 81 | 82 | print(f"Period {i}") 83 | print(f"x train shape: {x_train.shape}") 84 | print(f"y train shape: {y_train.shape}") 85 | 86 | x_series = [x_train[i:i + timestep, :] 87 | for i in range(x_train.shape[0] - timestep)] 88 | y_series = [y_train[i + timestep] 89 | for i in range(y_train.shape[0] - timestep)] 90 | x_final = np.array(x_series) 91 | y_final = np.array(y_series) 92 | print(f"x_final shape: {x_final.shape}") 93 | print(f"y_final shape: {y_final.shape}") 94 | 95 | # expected input data shape: (batch_size, timesteps, data_dim) 96 | regressor = Sequential() 97 | regressor.add(LSTM(units=25, input_shape=(timestep, feature), 98 | dropout=dropout_level, 99 | recurrent_dropout=dropout_level)) 100 | regressor.add(Dense(feature * 2, activation='relu')) 101 | regressor.add(Reshape((feature, 2))) 102 | regressor.add(Dense(2, activation='softmax')) 103 | regressor.compile(loss='binary_crossentropy', 104 | optimizer='rmsprop', 105 | metrics=['accuracy']) 106 | regressor.summary() 107 | 108 | regressor.fit(x_final, y_final, batch_size=1000, epochs=1000, 109 | validation_split=0.2, 110 | callbacks=[EarlyStopping(monitor='val_loss', 111 | mode='min', patience=10), 112 | ModelCheckpoint(filepath=model_period, 113 | monitor='val_acc', 114 | save_best_only=True)]) 115 | 116 | 117 | def main(): 118 | """Run main program.""" 119 | index = "dowjones" 120 | # index = "frankfurt" 121 | parser = argparse.ArgumentParser( 122 | description="Parse arguments for models.") 123 | # parser.add_argument("--dataset", help="Dataset directory.", 124 | # default=f"../data/{index}_calculated/" 125 | # f"absolute_periods750_250_240.txt") 126 | # parser.add_argument('--outdir', help='Model directory.', 127 | # default=f'../model/LSTM/{index}2_absolute') 128 | parser.add_argument("--dataset", help="Dataset directory.", 129 | default=f"../data/{index}_calculated/" 130 | f"periods750_250_240.txt") 131 | parser.add_argument('--outdir', help='Model directory.', 132 | default=f'../model/LSTM/{index}_drop0.1_') 133 | args = parser.parse_args() 134 | 135 | with open(args.dataset, "rb") as file: # Unpickling 136 | dataset = pickle.load(file) 137 | train(dataset, args.outdir) 138 | train_one_feature(dataset, args.outdir) 139 | print("Done.") 140 | return 0 141 | 142 | 143 | if __name__ == "__main__": 144 | main() 145 | -------------------------------------------------------------------------------- /src/train_one_ticker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Train a LSTM model.""" 4 | 5 | import argparse 6 | import os 7 | import pickle 8 | 9 | import numpy as np 10 | from sklearn.preprocessing import StandardScaler 11 | 12 | from keras.callbacks import EarlyStopping, ModelCheckpoint 13 | from keras.layers import LSTM, Dense 14 | from keras.models import Sequential 15 | from keras.utils import to_categorical 16 | 17 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 18 | # The GPU id to use, usually either "0" or "1"; 19 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 20 | 21 | 22 | def get_one_hot(targets, nb_classes): 23 | """Convert class array to one hot vector.""" 24 | res = np.eye(nb_classes)[np.array(targets).reshape(-1)] 25 | return res.reshape(list(targets.shape) + [nb_classes]) 26 | 27 | 28 | def train_one_ticker(dataset, model_name, timestep=240, feature=31, 29 | dropout_level=0.1): 30 | """Train an LSTM model with 1 feature.""" 31 | for i in range(len(dataset[0])): 32 | for j in range(feature): 33 | model_period = f"{model_name}_period{i}_ticker{j}.h5" 34 | x_train = dataset[0][i][0].values 35 | scaler = StandardScaler().fit(x_train) 36 | x_train = scaler.transform(x_train) 37 | y_train = to_categorical(dataset[0][i][1].values, 2) 38 | 39 | print(f"Period {i}") 40 | print(f"x train shape: {x_train.shape}") 41 | print(f"y train shape: {y_train.shape}") 42 | 43 | x_series = [x_train[i:i + timestep, j] 44 | for i in range(x_train.shape[0] - timestep)] 45 | y_series = [y_train[i + timestep, j] 46 | for i in range(y_train.shape[0] - timestep)] 47 | x_final = np.array(x_series) 48 | y_final = np.array(y_series) 49 | x_final = np.reshape( 50 | x_final, (x_final.shape[0], x_final.shape[1], 1)) 51 | print(f"x_final shape: {x_final.shape}") 52 | print(f"y_final shape: {y_final.shape}") 53 | 54 | # expected input data shape: (batch_size, timesteps, data_dim) 55 | regressor = Sequential() 56 | regressor.add(LSTM(units=25, input_shape=(timestep, 1), 57 | recurrent_dropout=dropout_level, 58 | dropout=dropout_level)) 59 | regressor.add(Dense(2, activation='softmax')) 60 | regressor.compile(loss='binary_crossentropy', 61 | optimizer='rmsprop', 62 | metrics=['accuracy']) 63 | regressor.summary() 64 | 65 | regressor.fit(x_final, y_final, batch_size=1000, epochs=1000, 66 | validation_split=0.2, 67 | callbacks=[EarlyStopping(monitor='val_loss', 68 | mode='min', patience=10), 69 | ModelCheckpoint(filepath=model_period, 70 | monitor='val_acc', 71 | save_best_only=True)]) 72 | 73 | 74 | def main(): 75 | """Run main program.""" 76 | index = "dowjones" 77 | # index = "frankfurt" 78 | parser = argparse.ArgumentParser( 79 | description="Parse arguments for models.") 80 | parser.add_argument("--dataset", help="Dataset directory.", 81 | default=f"../data/{index}_calculated/" 82 | f"absolute_periods750_250_240.txt") 83 | parser.add_argument('--outdir', help='Model directory.', 84 | default=f'../model/LSTM/{index}_absolute') 85 | 86 | args = parser.parse_args() 87 | 88 | with open(args.dataset, "rb") as file: # Unpickling 89 | dataset = pickle.load(file) 90 | train_one_ticker(dataset, args.outdir) 91 | print("Done.") 92 | return 0 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Every function that need to be used more than one time.""" 4 | 5 | import numpy as np 6 | 7 | from keras.preprocessing.sequence import TimeseriesGenerator 8 | 9 | 10 | def generate_random_strategy(returns): 11 | """Generate a random probability tha"t a stock will beat the market.""" 12 | probabilities = returns 13 | probabilities = probabilities.apply( 14 | lambda x: [np.random.rand() for i in x], 15 | axis=1, result_type='broadcast') 16 | return probabilities 17 | 18 | 19 | def long_short_postion(probabilities, k): 20 | """ 21 | Make a simple long short strategy. 22 | 23 | Decide the stock position based on the probability that it will beat 24 | the market. 25 | """ 26 | positions = probabilities 27 | short = np.argpartition(positions, k)[:k] 28 | neutral = np.argpartition(positions, len( 29 | positions) - k)[:(len(positions) - k)] 30 | positions[:] = 1 31 | positions[neutral] = 0 32 | positions[short] = -1 33 | return positions 34 | 35 | 36 | def calculate_class(returns): 37 | """Find the class for each LSTM sequence based on the median returns.""" 38 | median_returns = returns.median(axis=1) 39 | labels = returns.iloc[:, :].apply(lambda x: np.where 40 | (x >= median_returns, 1, 0), axis=0) 41 | return labels 42 | 43 | 44 | def calculate_absolute_class(returns): 45 | """Predict the stock will go up or down.""" 46 | labels = returns.iloc[:, :].apply(lambda x: np.where 47 | (x >= 0, 1, 0), axis=0) 48 | return labels 49 | 50 | 51 | def calculate_returns(stocks): 52 | """Calculate the real returns of all indices without normalization.""" 53 | # stocks = stocks[["Close", "Name"]] 54 | # stocks = stocks.pivot_table( 55 | # values='Close', index=stocks.index, columns='Name', aggfunc='first') 56 | returns = (stocks - stocks.shift(1)) / stocks.shift(1) 57 | returns = returns.dropna() 58 | return returns 59 | 60 | 61 | def calculate_log_returns(stocks): 62 | """Calculate the log returns of all indices without normalization.""" 63 | # stocks = stocks[["Close", "Name"]] 64 | # stocks = stocks.pivot_table( 65 | # values='Close', index=stocks.index, columns='Name', aggfunc='first') 66 | returns = np.log(stocks) - np.log(stocks.shift(1)) 67 | returns = returns.dropna() 68 | return returns 69 | 70 | 71 | def normalize_data(df): 72 | """normalize a dataframe.""" 73 | mean = df.mean(axis=1) 74 | std = df.std(axis=1) 75 | df = df.sub(mean, axis=0) 76 | df = df.div(std, axis=0) 77 | df = df.values 78 | return df 79 | 80 | 81 | def generate_time_series_sample(data, target, timestep): 82 | """Generate samples of a time series with a certain length.""" 83 | generator = TimeseriesGenerator(data, target, 84 | length=timestep, sampling_rate=1, 85 | batch_size=(data.shape[0] - timestep)) 86 | return generator[0][0], generator[0][1] 87 | --------------------------------------------------------------------------------