├── .gitignore
├── .whitesource
├── LICENSE
├── README.md
├── notebook
    ├── [Experiment] Long Short Term Memory - Multi-dimensional - Validation.ipynb
    ├── [Experiment] Long Short Term Memory - Sanity Check - 1 feature - Absolute Return.ipynb
    ├── [Experiment] Long Short Term Memory - Sanity Check - 1 feature.ipynb
    ├── [Experiment] Long Short Term Memory - Sanity Check - Absolute Return.ipynb
    ├── [Experiment] Long Short Term Memory - Sanity Check.ipynb
    ├── [Experiment] Long Short Term Memory - Stateful vs Stateless - Multi-dimensional.ipynb
    ├── [Experiment] Long Short Term Memory - Stateful vs Stateless.ipynb
    ├── [Experiment] Long Short Term Memory - Training - 1 feature - Absolute Return.ipynb
    ├── [Experiment] Long Short Term Memory - Training - 1 feature.ipynb
    ├── [Experiment] Long Short Term Memory - Training - 1 ticker - Absolute Return.ipynb
    ├── [Experiment] Long Short Term Memory - Training - Absolute Return.ipynb
    ├── [Experiment] Long Short Term Memory - Training.ipynb
    ├── [Official] Backtesting - Upper bound.ipynb
    ├── [Official] Backtesting LSTM - 1 feature - Absolute Return.ipynb
    ├── [Official] Backtesting LSTM - 1 feature - Dropout.ipynb
    ├── [Official] Backtesting LSTM - 1 feature.ipynb
    ├── [Official] Backtesting LSTM - Absolute Return.ipynb
    ├── [Official] Backtesting LSTM.ipynb
    ├── [Official] Backtesting.ipynb
    ├── [Official] Benchmark Machine Learning.ipynb
    ├── [Official] Data Preparation - Frankfurt Stock Exchange.ipynb
    ├── [Official] Data Preparation and Visualization.ipynb
    ├── [Official] Long Short Term Memory - 1 feature.ipynb
    ├── [Official] Long Short Term Memory - Price.ipynb
    ├── [Official] Long Short Term Memory.ipynb
    ├── [Official] Trading Algorithm.ipynb
    ├── [Official] [Trading Algorithm - Baseline 1] Equally weighted portfolio.ipynb
    ├── [Trading Algorithm - Baseline 1] Random strategy.ipynb
    ├── [Trading Algorithm - Baseline 1] Statistics.ipynb
    └── best_model.h5
└── src
    ├── calculate_returns.py
    ├── divide_period.py
    ├── make_dataframe.py
    ├── make_dataset.py
    ├── random_forest.py
    ├── random_strategy.py
    ├── train.py
    ├── train_one_ticker.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | data/*
107 | # !data/dowjones_calculated
108 | # !data/dowjones_calculated/*
109 | model/*
110 | 


--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scanSettings": {
 3 |     "baseBranches": []
 4 |   },
 5 |   "checkRunSettings": {
 6 |     "vulnerableCheckRunConclusionLevel": "failure",
 7 |     "displayMode": "diff"
 8 |   },
 9 |   "issueSettings": {
10 |     "minSeverityLevel": "LOW"
11 |   }
12 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 tqa236
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Algorithmic trading using LSTM
2 | Reproduce the result of the paper "Deep Learning with Long Short-Term Memory Networks for Financial Market Prediction"
3 | 


--------------------------------------------------------------------------------
/notebook/[Experiment] Long Short Term Memory - Multi-dimensional - Validation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-03-10T15:15:02.021024Z",
  9 |      "start_time": "2019-03-10T15:15:01.029972Z"
 10 |     },
 11 |     "scrolled": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "# List all device\n",
 16 |     "from tensorflow.python.client import device_lib\n",
 17 |     "# print(device_lib.list_local_devices())"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {
 24 |     "ExecuteTime": {
 25 |      "end_time": "2019-03-10T15:15:02.340559Z",
 26 |      "start_time": "2019-03-10T15:15:02.022700Z"
 27 |     },
 28 |     "scrolled": true
 29 |    },
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stderr",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "Using TensorFlow backend.\n"
 36 |      ]
 37 |     },
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "['/job:localhost/replica:0/task:0/device:GPU:0']"
 42 |       ]
 43 |      },
 44 |      "execution_count": 2,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "# Check available GPU\n",
 51 |     "from keras import backend as K\n",
 52 |     "K.tensorflow_backend._get_available_gpus()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2019-03-10T15:15:02.345722Z",
 61 |      "start_time": "2019-03-10T15:15:02.342072Z"
 62 |     }
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "import os\n",
 67 |     "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n",
 68 |     "# The GPU id to use, usually either \"0\" or \"1\";\n",
 69 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\";  "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {
 76 |     "ExecuteTime": {
 77 |      "end_time": "2019-03-10T15:15:02.747170Z",
 78 |      "start_time": "2019-03-10T15:15:02.347906Z"
 79 |     }
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# Importing the libraries\n",
 84 |     "import numpy as np\n",
 85 |     "import pandas as pd\n",
 86 |     "from keras.models import Sequential\n",
 87 |     "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda, GRU, BatchNormalization, Bidirectional\n",
 88 |     "from keras.preprocessing.sequence import TimeseriesGenerator\n",
 89 |     "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
 90 |     "from keras.activations import softmax\n",
 91 |     "from keras.optimizers import SGD, RMSprop\n",
 92 |     "import math\n",
 93 |     "import pickle\n",
 94 |     "import matplotlib.pyplot as plt\n",
 95 |     "from keras.utils import to_categorical\n",
 96 |     "from sklearn.preprocessing import StandardScaler"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {
103 |     "ExecuteTime": {
104 |      "end_time": "2019-03-10T15:15:02.753249Z",
105 |      "start_time": "2019-03-10T15:15:02.749539Z"
106 |     }
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "def calculate_class(returns):\n",
111 |     "    \"\"\"Find the class for each LSTM sequence based on the median returns.\"\"\"\n",
112 |     "    median_returns = returns.median(axis=1)\n",
113 |     "    labels = returns.iloc[:, :].apply(lambda x: np.where\n",
114 |     "                                      (x >= median_returns, 1, 0), axis=0)\n",
115 |     "    return labels"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 6,
121 |    "metadata": {
122 |     "ExecuteTime": {
123 |      "end_time": "2019-03-10T15:15:02.840157Z",
124 |      "start_time": "2019-03-10T15:15:02.754637Z"
125 |     }
126 |    },
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "(750, 31)"
132 |       ]
133 |      },
134 |      "execution_count": 6,
135 |      "metadata": {},
136 |      "output_type": "execute_result"
137 |     }
138 |    ],
139 |    "source": [
140 |     "np.random.seed(2)\n",
141 |     "feature = 31\n",
142 |     "x_train = np.random.rand(750, feature)\n",
143 |     "x_train.shape"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 7,
149 |    "metadata": {
150 |     "ExecuteTime": {
151 |      "end_time": "2019-03-10T15:15:02.913412Z",
152 |      "start_time": "2019-03-10T15:15:02.845965Z"
153 |     }
154 |    },
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "(750, 31, 2)"
160 |       ]
161 |      },
162 |      "execution_count": 7,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": [
168 |     "y_train = to_categorical(calculate_class(pd.DataFrame(x_train)).values, 2)\n",
169 |     "y_train.shape"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 8,
175 |    "metadata": {
176 |     "ExecuteTime": {
177 |      "end_time": "2019-03-10T15:15:02.977431Z",
178 |      "start_time": "2019-03-10T15:15:02.914749Z"
179 |     }
180 |    },
181 |    "outputs": [],
182 |    "source": [
183 |     "x_test = np.random.rand(750, feature)\n",
184 |     "y_test = to_categorical(calculate_class(pd.DataFrame(x_test)).values, 2)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 9,
190 |    "metadata": {
191 |     "ExecuteTime": {
192 |      "end_time": "2019-03-10T15:15:03.040801Z",
193 |      "start_time": "2019-03-10T15:15:02.978941Z"
194 |     }
195 |    },
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/plain": [
200 |        "array([0.4359949 , 0.02592623, 0.54966248, 0.43532239, 0.4203678 ,\n",
201 |        "       0.33033482, 0.20464863, 0.61927097, 0.29965467, 0.26682728,\n",
202 |        "       0.62113383, 0.52914209, 0.13457995, 0.51357812, 0.18443987,\n",
203 |        "       0.78533515, 0.85397529, 0.49423684, 0.84656149, 0.07964548,\n",
204 |        "       0.50524609, 0.0652865 , 0.42812233, 0.09653092, 0.12715997,\n",
205 |        "       0.59674531, 0.226012  , 0.10694568, 0.22030621, 0.34982629,\n",
206 |        "       0.46778748])"
207 |       ]
208 |      },
209 |      "execution_count": 9,
210 |      "metadata": {},
211 |      "output_type": "execute_result"
212 |     }
213 |    ],
214 |    "source": [
215 |     "x_train[0]"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 10,
221 |    "metadata": {
222 |     "ExecuteTime": {
223 |      "end_time": "2019-03-10T15:15:03.110679Z",
224 |      "start_time": "2019-03-10T15:15:03.044348Z"
225 |     }
226 |    },
227 |    "outputs": [
228 |     {
229 |      "data": {
230 |       "text/plain": [
231 |        "array([0.14867022, 0.13899823, 0.10494235, 0.87939913, 0.18732867,\n",
232 |        "       0.22262717, 0.7317139 , 0.77066332, 0.10318812, 0.38814662,\n",
233 |        "       0.56174004, 0.35915058, 0.41018272, 0.8014471 , 0.81498221,\n",
234 |        "       0.87985186, 0.85469715, 0.81734218, 0.66587059, 0.85641202,\n",
235 |        "       0.54491559, 0.67412301, 0.34791387, 0.87840982, 0.60886867,\n",
236 |        "       0.4042137 , 0.12943719, 0.66850456, 0.93534669, 0.88344742,\n",
237 |        "       0.57987801])"
238 |       ]
239 |      },
240 |      "execution_count": 10,
241 |      "metadata": {},
242 |      "output_type": "execute_result"
243 |     }
244 |    ],
245 |    "source": [
246 |     "x_test[0]"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 11,
252 |    "metadata": {
253 |     "ExecuteTime": {
254 |      "end_time": "2019-03-10T15:15:03.176886Z",
255 |      "start_time": "2019-03-10T15:15:03.112562Z"
256 |     }
257 |    },
258 |    "outputs": [],
259 |    "source": [
260 |     "timestep = 240"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 12,
266 |    "metadata": {
267 |     "ExecuteTime": {
268 |      "end_time": "2019-03-10T15:15:03.290749Z",
269 |      "start_time": "2019-03-10T15:15:03.179221Z"
270 |     }
271 |    },
272 |    "outputs": [
273 |     {
274 |      "name": "stdout",
275 |      "output_type": "stream",
276 |      "text": [
277 |       "x shape: (509, 240, 31)\n",
278 |       "y shape: (509, 31, 2)\n"
279 |      ]
280 |     }
281 |    ],
282 |    "source": [
283 |     "x_series = [x_train[i:i+timestep, :] for i in range(1, x_train.shape[0] - timestep)]\n",
284 |     "y_series = [y_train[i+timestep] for i in range(y_train.shape[0] - timestep - 1)]\n",
285 |     "x = np.array(x_series)\n",
286 |     "y = np.array(y_series)\n",
287 |     "print(f\"x shape: {x.shape}\")\n",
288 |     "print(f\"y shape: {y.shape}\")"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 28,
294 |    "metadata": {
295 |     "ExecuteTime": {
296 |      "end_time": "2019-03-10T15:25:47.817681Z",
297 |      "start_time": "2019-03-10T15:25:47.779716Z"
298 |     }
299 |    },
300 |    "outputs": [
301 |     {
302 |      "name": "stdout",
303 |      "output_type": "stream",
304 |      "text": [
305 |       "x1 shape: (509, 240, 31)\n",
306 |       "y1 shape: (509, 31, 2)\n"
307 |      ]
308 |     }
309 |    ],
310 |    "source": [
311 |     "x_series1 = [x_test[i:i+timestep, :] for i in range(1, x_test.shape[0] - timestep)]\n",
312 |     "y_series1 = [y_test[i+timestep] for i in range(y_test.shape[0] - timestep - 1)]\n",
313 |     "x1 = np.array(x_series1)\n",
314 |     "y1 = np.array(y_series1)\n",
315 |     "print(f\"x1 shape: {x1.shape}\")\n",
316 |     "print(f\"y1 shape: {y1.shape}\")"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 24,
322 |    "metadata": {
323 |     "ExecuteTime": {
324 |      "end_time": "2019-03-10T15:16:57.614508Z",
325 |      "start_time": "2019-03-10T15:16:57.365758Z"
326 |     }
327 |    },
328 |    "outputs": [
329 |     {
330 |      "name": "stdout",
331 |      "output_type": "stream",
332 |      "text": [
333 |       "_________________________________________________________________\n",
334 |       "Layer (type)                 Output Shape              Param #   \n",
335 |       "=================================================================\n",
336 |       "lstm_2 (LSTM)                (None, 25)                5700      \n",
337 |       "_________________________________________________________________\n",
338 |       "dense_3 (Dense)              (None, 62)                1612      \n",
339 |       "_________________________________________________________________\n",
340 |       "reshape_2 (Reshape)          (None, 31, 2)             0         \n",
341 |       "_________________________________________________________________\n",
342 |       "dense_4 (Dense)              (None, 31, 2)             6         \n",
343 |       "=================================================================\n",
344 |       "Total params: 7,318\n",
345 |       "Trainable params: 7,318\n",
346 |       "Non-trainable params: 0\n",
347 |       "_________________________________________________________________\n"
348 |      ]
349 |     }
350 |    ],
351 |    "source": [
352 |     "regressor = Sequential()\n",
353 |     "regressor.add(LSTM(units=25, input_shape=(timestep, feature)))\n",
354 |     "regressor.add(Dense(feature * 2, activation='relu'))\n",
355 |     "regressor.add(Reshape((feature, 2)))\n",
356 |     "# regressor.add(Lambda(lambda x: softmax(x, axis=-1)))\n",
357 |     "regressor.add(Dense(2, activation='softmax'))\n",
358 |     "regressor.compile(loss='binary_crossentropy',\n",
359 |     "                  optimizer='rmsprop',\n",
360 |     "                  metrics=['accuracy'])\n",
361 |     "regressor.summary()"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": 27,
367 |    "metadata": {
368 |     "ExecuteTime": {
369 |      "end_time": "2019-03-10T15:21:12.670058Z",
370 |      "start_time": "2019-03-10T15:20:59.345962Z"
371 |     }
372 |    },
373 |    "outputs": [
374 |     {
375 |      "name": "stdout",
376 |      "output_type": "stream",
377 |      "text": [
378 |       "Train on 509 samples, validate on 509 samples\n",
379 |       "Epoch 1/100\n",
380 |       "509/509 [==============================] - 4s 7ms/step - loss: 0.4044 - acc: 0.8138 - val_loss: 0.4569 - val_acc: 0.7725\n",
381 |       "Epoch 2/100\n",
382 |       "509/509 [==============================] - 4s 7ms/step - loss: 0.4009 - acc: 0.8156 - val_loss: 0.4536 - val_acc: 0.7772\n",
383 |       "Epoch 3/100\n",
384 |       "509/509 [==============================] - 4s 7ms/step - loss: 0.3970 - acc: 0.8186 - val_loss: 0.4505 - val_acc: 0.7782\n",
385 |       "Epoch 4/100\n",
386 |       "384/509 [=====================>........] - ETA: 0s - loss: 0.3954 - acc: 0.8165"
387 |      ]
388 |     },
389 |     {
390 |      "ename": "KeyboardInterrupt",
391 |      "evalue": "",
392 |      "output_type": "error",
393 |      "traceback": [
394 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
395 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
396 |       "\u001b[0;32m<ipython-input-27-152610f31559>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# result = regressor.fit_generator(train_gen, steps_per_epoch=len(train_gen), epochs=1000)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m result = regressor.fit(x, y, epochs=100, validation_data=(x1, y1), callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=10),\n\u001b[0;32m----> 3\u001b[0;31m              ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)])\n\u001b[0m",
397 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[1;32m   1037\u001b[0m                                         \u001b[0minitial_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minitial_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1038\u001b[0m                                         \u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1039\u001b[0;31m                                         validation_steps=validation_steps)\n\u001b[0m\u001b[1;32m   1040\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1041\u001b[0m     def evaluate(self, x=None, y=None,\n",
398 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training_arrays.py\u001b[0m in \u001b[0;36mfit_loop\u001b[0;34m(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)\u001b[0m\n\u001b[1;32m    197\u001b[0m                     \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 199\u001b[0;31m                 \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    200\u001b[0m                 \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    201\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout_labels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
399 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   2713\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_legacy_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2714\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2715\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2716\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2717\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mpy_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
400 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   2673\u001b[0m             \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_metadata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2674\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2675\u001b[0;31m             \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2676\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mfetched\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
401 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1437\u001b[0m           ret = tf_session.TF_SessionRunCallable(\n\u001b[1;32m   1438\u001b[0m               \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1439\u001b[0;31m               run_metadata_ptr)\n\u001b[0m\u001b[1;32m   1440\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1441\u001b[0m           \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
402 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
403 |      ]
404 |     }
405 |    ],
406 |    "source": [
407 |     "# result = regressor.fit_generator(train_gen, steps_per_epoch=len(train_gen), epochs=1000)\n",
408 |     "result = regressor.fit(x, y, epochs=100, validation_data=(x1, y1), callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=10),\n",
409 |     "             ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)])"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 26,
415 |    "metadata": {
416 |     "ExecuteTime": {
417 |      "end_time": "2019-03-10T15:20:39.149785Z",
418 |      "start_time": "2019-03-10T15:20:39.137993Z"
419 |     }
420 |    },
421 |    "outputs": [
422 |     {
423 |      "ename": "NameError",
424 |      "evalue": "name 'result' is not defined",
425 |      "output_type": "error",
426 |      "traceback": [
427 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
428 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
429 |       "\u001b[0;32m<ipython-input-26-16dc20e766e0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"acc\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"val_acc\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
430 |       "\u001b[0;31mNameError\u001b[0m: name 'result' is not defined"
431 |      ]
432 |     }
433 |    ],
434 |    "source": [
435 |     "plt.plot(result.history[\"acc\"])\n",
436 |     "plt.plot(result.history[\"val_acc\"])"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "code",
441 |    "execution_count": null,
442 |    "metadata": {
443 |     "ExecuteTime": {
444 |      "end_time": "2019-03-10T15:15:05.223053Z",
445 |      "start_time": "2019-03-10T15:15:01.084Z"
446 |     }
447 |    },
448 |    "outputs": [],
449 |    "source": [
450 |     "plt.plot(result.history[\"loss\"])\n",
451 |     "plt.plot(result.history[\"val_loss\"])"
452 |    ]
453 |   }
454 |  ],
455 |  "metadata": {
456 |   "kernelspec": {
457 |    "display_name": "projet_S5",
458 |    "language": "python",
459 |    "name": "projet_s5"
460 |   },
461 |   "language_info": {
462 |    "codemirror_mode": {
463 |     "name": "ipython",
464 |     "version": 3
465 |    },
466 |    "file_extension": ".py",
467 |    "mimetype": "text/x-python",
468 |    "name": "python",
469 |    "nbconvert_exporter": "python",
470 |    "pygments_lexer": "ipython3",
471 |    "version": "3.6.8"
472 |   },
473 |   "toc": {
474 |    "base_numbering": 1,
475 |    "nav_menu": {},
476 |    "number_sections": true,
477 |    "sideBar": true,
478 |    "skip_h1_title": false,
479 |    "title_cell": "Table of Contents",
480 |    "title_sidebar": "Contents",
481 |    "toc_cell": false,
482 |    "toc_position": {},
483 |    "toc_section_display": true,
484 |    "toc_window_display": false
485 |   }
486 |  },
487 |  "nbformat": 4,
488 |  "nbformat_minor": 2
489 | }
490 | 


--------------------------------------------------------------------------------
/notebook/[Official] Backtesting - Upper bound.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-03-04T22:20:00.623019Z",
  9 |      "start_time": "2019-03-04T22:20:00.432075Z"
 10 |     }
 11 |    },
 12 |    "outputs": [],
 13 |    "source": [
 14 |     "import pandas as pd"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {
 21 |     "ExecuteTime": {
 22 |      "end_time": "2019-03-04T22:20:01.712331Z",
 23 |      "start_time": "2019-03-04T22:20:00.624514Z"
 24 |     }
 25 |    },
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/html": [
 30 |        "<div>\n",
 31 |        "<style scoped>\n",
 32 |        "    .dataframe tbody tr th:only-of-type {\n",
 33 |        "        vertical-align: middle;\n",
 34 |        "    }\n",
 35 |        "\n",
 36 |        "    .dataframe tbody tr th {\n",
 37 |        "        vertical-align: top;\n",
 38 |        "    }\n",
 39 |        "\n",
 40 |        "    .dataframe thead th {\n",
 41 |        "        text-align: right;\n",
 42 |        "    }\n",
 43 |        "</style>\n",
 44 |        "<table border=\"1\" class=\"dataframe\">\n",
 45 |        "  <thead>\n",
 46 |        "    <tr style=\"text-align: right;\">\n",
 47 |        "      <th>Name</th>\n",
 48 |        "      <th>AABA</th>\n",
 49 |        "      <th>AAPL</th>\n",
 50 |        "      <th>AMZN</th>\n",
 51 |        "      <th>AXP</th>\n",
 52 |        "      <th>BA</th>\n",
 53 |        "      <th>CAT</th>\n",
 54 |        "      <th>CSCO</th>\n",
 55 |        "      <th>CVX</th>\n",
 56 |        "      <th>DIS</th>\n",
 57 |        "      <th>GE</th>\n",
 58 |        "      <th>...</th>\n",
 59 |        "      <th>MSFT</th>\n",
 60 |        "      <th>NKE</th>\n",
 61 |        "      <th>PFE</th>\n",
 62 |        "      <th>PG</th>\n",
 63 |        "      <th>TRV</th>\n",
 64 |        "      <th>UNH</th>\n",
 65 |        "      <th>UTX</th>\n",
 66 |        "      <th>VZ</th>\n",
 67 |        "      <th>WMT</th>\n",
 68 |        "      <th>XOM</th>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>Date</th>\n",
 72 |        "      <th></th>\n",
 73 |        "      <th></th>\n",
 74 |        "      <th></th>\n",
 75 |        "      <th></th>\n",
 76 |        "      <th></th>\n",
 77 |        "      <th></th>\n",
 78 |        "      <th></th>\n",
 79 |        "      <th></th>\n",
 80 |        "      <th></th>\n",
 81 |        "      <th></th>\n",
 82 |        "      <th></th>\n",
 83 |        "      <th></th>\n",
 84 |        "      <th></th>\n",
 85 |        "      <th></th>\n",
 86 |        "      <th></th>\n",
 87 |        "      <th></th>\n",
 88 |        "      <th></th>\n",
 89 |        "      <th></th>\n",
 90 |        "      <th></th>\n",
 91 |        "      <th></th>\n",
 92 |        "      <th></th>\n",
 93 |        "    </tr>\n",
 94 |        "  </thead>\n",
 95 |        "  <tbody>\n",
 96 |        "    <tr>\n",
 97 |        "      <th>2006-01-03</th>\n",
 98 |        "      <td>40.91</td>\n",
 99 |        "      <td>10.68</td>\n",
100 |        "      <td>47.58</td>\n",
101 |        "      <td>52.58</td>\n",
102 |        "      <td>70.44</td>\n",
103 |        "      <td>57.80</td>\n",
104 |        "      <td>17.45</td>\n",
105 |        "      <td>59.08</td>\n",
106 |        "      <td>24.40</td>\n",
107 |        "      <td>35.37</td>\n",
108 |        "      <td>...</td>\n",
109 |        "      <td>26.84</td>\n",
110 |        "      <td>10.74</td>\n",
111 |        "      <td>23.78</td>\n",
112 |        "      <td>58.78</td>\n",
113 |        "      <td>45.99</td>\n",
114 |        "      <td>61.73</td>\n",
115 |        "      <td>56.53</td>\n",
116 |        "      <td>30.38</td>\n",
117 |        "      <td>46.23</td>\n",
118 |        "      <td>58.47</td>\n",
119 |        "    </tr>\n",
120 |        "    <tr>\n",
121 |        "      <th>2006-01-04</th>\n",
122 |        "      <td>40.97</td>\n",
123 |        "      <td>10.71</td>\n",
124 |        "      <td>47.25</td>\n",
125 |        "      <td>51.95</td>\n",
126 |        "      <td>71.17</td>\n",
127 |        "      <td>59.27</td>\n",
128 |        "      <td>17.85</td>\n",
129 |        "      <td>58.91</td>\n",
130 |        "      <td>23.99</td>\n",
131 |        "      <td>35.32</td>\n",
132 |        "      <td>...</td>\n",
133 |        "      <td>26.97</td>\n",
134 |        "      <td>10.69</td>\n",
135 |        "      <td>24.55</td>\n",
136 |        "      <td>58.89</td>\n",
137 |        "      <td>46.50</td>\n",
138 |        "      <td>61.88</td>\n",
139 |        "      <td>56.19</td>\n",
140 |        "      <td>31.27</td>\n",
141 |        "      <td>46.32</td>\n",
142 |        "      <td>58.57</td>\n",
143 |        "    </tr>\n",
144 |        "    <tr>\n",
145 |        "      <th>2006-01-05</th>\n",
146 |        "      <td>41.53</td>\n",
147 |        "      <td>10.63</td>\n",
148 |        "      <td>47.65</td>\n",
149 |        "      <td>52.50</td>\n",
150 |        "      <td>70.33</td>\n",
151 |        "      <td>59.27</td>\n",
152 |        "      <td>18.35</td>\n",
153 |        "      <td>58.19</td>\n",
154 |        "      <td>24.41</td>\n",
155 |        "      <td>35.23</td>\n",
156 |        "      <td>...</td>\n",
157 |        "      <td>26.99</td>\n",
158 |        "      <td>10.76</td>\n",
159 |        "      <td>24.58</td>\n",
160 |        "      <td>58.70</td>\n",
161 |        "      <td>46.95</td>\n",
162 |        "      <td>61.69</td>\n",
163 |        "      <td>55.98</td>\n",
164 |        "      <td>31.63</td>\n",
165 |        "      <td>45.69</td>\n",
166 |        "      <td>58.28</td>\n",
167 |        "    </tr>\n",
168 |        "    <tr>\n",
169 |        "      <th>2006-01-06</th>\n",
170 |        "      <td>43.21</td>\n",
171 |        "      <td>10.90</td>\n",
172 |        "      <td>47.87</td>\n",
173 |        "      <td>52.68</td>\n",
174 |        "      <td>69.35</td>\n",
175 |        "      <td>60.45</td>\n",
176 |        "      <td>18.77</td>\n",
177 |        "      <td>59.25</td>\n",
178 |        "      <td>24.74</td>\n",
179 |        "      <td>35.47</td>\n",
180 |        "      <td>...</td>\n",
181 |        "      <td>26.91</td>\n",
182 |        "      <td>10.72</td>\n",
183 |        "      <td>24.85</td>\n",
184 |        "      <td>58.64</td>\n",
185 |        "      <td>47.21</td>\n",
186 |        "      <td>62.90</td>\n",
187 |        "      <td>56.16</td>\n",
188 |        "      <td>31.35</td>\n",
189 |        "      <td>45.88</td>\n",
190 |        "      <td>59.43</td>\n",
191 |        "    </tr>\n",
192 |        "    <tr>\n",
193 |        "      <th>2006-01-09</th>\n",
194 |        "      <td>43.42</td>\n",
195 |        "      <td>10.86</td>\n",
196 |        "      <td>47.08</td>\n",
197 |        "      <td>53.99</td>\n",
198 |        "      <td>68.77</td>\n",
199 |        "      <td>61.55</td>\n",
200 |        "      <td>19.06</td>\n",
201 |        "      <td>58.95</td>\n",
202 |        "      <td>25.00</td>\n",
203 |        "      <td>35.38</td>\n",
204 |        "      <td>...</td>\n",
205 |        "      <td>26.86</td>\n",
206 |        "      <td>10.88</td>\n",
207 |        "      <td>24.85</td>\n",
208 |        "      <td>59.08</td>\n",
209 |        "      <td>47.23</td>\n",
210 |        "      <td>61.40</td>\n",
211 |        "      <td>56.80</td>\n",
212 |        "      <td>31.48</td>\n",
213 |        "      <td>45.71</td>\n",
214 |        "      <td>59.40</td>\n",
215 |        "    </tr>\n",
216 |        "  </tbody>\n",
217 |        "</table>\n",
218 |        "<p>5 rows × 31 columns</p>\n",
219 |        "</div>"
220 |       ],
221 |       "text/plain": [
222 |        "Name         AABA   AAPL   AMZN    AXP     BA    CAT   CSCO    CVX    DIS  \\\n",
223 |        "Date                                                                        \n",
224 |        "2006-01-03  40.91  10.68  47.58  52.58  70.44  57.80  17.45  59.08  24.40   \n",
225 |        "2006-01-04  40.97  10.71  47.25  51.95  71.17  59.27  17.85  58.91  23.99   \n",
226 |        "2006-01-05  41.53  10.63  47.65  52.50  70.33  59.27  18.35  58.19  24.41   \n",
227 |        "2006-01-06  43.21  10.90  47.87  52.68  69.35  60.45  18.77  59.25  24.74   \n",
228 |        "2006-01-09  43.42  10.86  47.08  53.99  68.77  61.55  19.06  58.95  25.00   \n",
229 |        "\n",
230 |        "Name           GE  ...     MSFT    NKE    PFE     PG    TRV    UNH    UTX  \\\n",
231 |        "Date               ...                                                      \n",
232 |        "2006-01-03  35.37  ...    26.84  10.74  23.78  58.78  45.99  61.73  56.53   \n",
233 |        "2006-01-04  35.32  ...    26.97  10.69  24.55  58.89  46.50  61.88  56.19   \n",
234 |        "2006-01-05  35.23  ...    26.99  10.76  24.58  58.70  46.95  61.69  55.98   \n",
235 |        "2006-01-06  35.47  ...    26.91  10.72  24.85  58.64  47.21  62.90  56.16   \n",
236 |        "2006-01-09  35.38  ...    26.86  10.88  24.85  59.08  47.23  61.40  56.80   \n",
237 |        "\n",
238 |        "Name           VZ    WMT    XOM  \n",
239 |        "Date                             \n",
240 |        "2006-01-03  30.38  46.23  58.47  \n",
241 |        "2006-01-04  31.27  46.32  58.57  \n",
242 |        "2006-01-05  31.63  45.69  58.28  \n",
243 |        "2006-01-06  31.35  45.88  59.43  \n",
244 |        "2006-01-09  31.48  45.71  59.40  \n",
245 |        "\n",
246 |        "[5 rows x 31 columns]"
247 |       ]
248 |      },
249 |      "execution_count": 2,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": [
255 |     "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n",
256 |     "stocks = stocks[[\"Close\", \"Name\"]]\n",
257 |     "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n",
258 |     "stocks.head()"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 3,
264 |    "metadata": {
265 |     "ExecuteTime": {
266 |      "end_time": "2019-03-04T22:20:01.755068Z",
267 |      "start_time": "2019-03-04T22:20:01.713442Z"
268 |     }
269 |    },
270 |    "outputs": [
271 |     {
272 |      "data": {
273 |       "text/html": [
274 |        "<div>\n",
275 |        "<style scoped>\n",
276 |        "    .dataframe tbody tr th:only-of-type {\n",
277 |        "        vertical-align: middle;\n",
278 |        "    }\n",
279 |        "\n",
280 |        "    .dataframe tbody tr th {\n",
281 |        "        vertical-align: top;\n",
282 |        "    }\n",
283 |        "\n",
284 |        "    .dataframe thead th {\n",
285 |        "        text-align: right;\n",
286 |        "    }\n",
287 |        "</style>\n",
288 |        "<table border=\"1\" class=\"dataframe\">\n",
289 |        "  <thead>\n",
290 |        "    <tr style=\"text-align: right;\">\n",
291 |        "      <th>Name</th>\n",
292 |        "      <th>AABA</th>\n",
293 |        "      <th>AAPL</th>\n",
294 |        "      <th>AMZN</th>\n",
295 |        "      <th>AXP</th>\n",
296 |        "      <th>BA</th>\n",
297 |        "      <th>CAT</th>\n",
298 |        "      <th>CSCO</th>\n",
299 |        "      <th>CVX</th>\n",
300 |        "      <th>DIS</th>\n",
301 |        "      <th>GE</th>\n",
302 |        "      <th>...</th>\n",
303 |        "      <th>MSFT</th>\n",
304 |        "      <th>NKE</th>\n",
305 |        "      <th>PFE</th>\n",
306 |        "      <th>PG</th>\n",
307 |        "      <th>TRV</th>\n",
308 |        "      <th>UNH</th>\n",
309 |        "      <th>UTX</th>\n",
310 |        "      <th>VZ</th>\n",
311 |        "      <th>WMT</th>\n",
312 |        "      <th>XOM</th>\n",
313 |        "    </tr>\n",
314 |        "    <tr>\n",
315 |        "      <th>Date</th>\n",
316 |        "      <th></th>\n",
317 |        "      <th></th>\n",
318 |        "      <th></th>\n",
319 |        "      <th></th>\n",
320 |        "      <th></th>\n",
321 |        "      <th></th>\n",
322 |        "      <th></th>\n",
323 |        "      <th></th>\n",
324 |        "      <th></th>\n",
325 |        "      <th></th>\n",
326 |        "      <th></th>\n",
327 |        "      <th></th>\n",
328 |        "      <th></th>\n",
329 |        "      <th></th>\n",
330 |        "      <th></th>\n",
331 |        "      <th></th>\n",
332 |        "      <th></th>\n",
333 |        "      <th></th>\n",
334 |        "      <th></th>\n",
335 |        "      <th></th>\n",
336 |        "      <th></th>\n",
337 |        "    </tr>\n",
338 |        "  </thead>\n",
339 |        "  <tbody>\n",
340 |        "    <tr>\n",
341 |        "      <th>2006-01-04</th>\n",
342 |        "      <td>0.001467</td>\n",
343 |        "      <td>0.002809</td>\n",
344 |        "      <td>-0.006936</td>\n",
345 |        "      <td>-0.011982</td>\n",
346 |        "      <td>0.010363</td>\n",
347 |        "      <td>0.025433</td>\n",
348 |        "      <td>0.022923</td>\n",
349 |        "      <td>-0.002877</td>\n",
350 |        "      <td>-0.016803</td>\n",
351 |        "      <td>-0.001414</td>\n",
352 |        "      <td>...</td>\n",
353 |        "      <td>0.004844</td>\n",
354 |        "      <td>-0.004655</td>\n",
355 |        "      <td>0.032380</td>\n",
356 |        "      <td>0.001871</td>\n",
357 |        "      <td>0.011089</td>\n",
358 |        "      <td>0.002430</td>\n",
359 |        "      <td>-0.006015</td>\n",
360 |        "      <td>0.029296</td>\n",
361 |        "      <td>0.001947</td>\n",
362 |        "      <td>0.001710</td>\n",
363 |        "    </tr>\n",
364 |        "    <tr>\n",
365 |        "      <th>2006-01-05</th>\n",
366 |        "      <td>0.013669</td>\n",
367 |        "      <td>-0.007470</td>\n",
368 |        "      <td>0.008466</td>\n",
369 |        "      <td>0.010587</td>\n",
370 |        "      <td>-0.011803</td>\n",
371 |        "      <td>0.000000</td>\n",
372 |        "      <td>0.028011</td>\n",
373 |        "      <td>-0.012222</td>\n",
374 |        "      <td>0.017507</td>\n",
375 |        "      <td>-0.002548</td>\n",
376 |        "      <td>...</td>\n",
377 |        "      <td>0.000742</td>\n",
378 |        "      <td>0.006548</td>\n",
379 |        "      <td>0.001222</td>\n",
380 |        "      <td>-0.003226</td>\n",
381 |        "      <td>0.009677</td>\n",
382 |        "      <td>-0.003070</td>\n",
383 |        "      <td>-0.003737</td>\n",
384 |        "      <td>0.011513</td>\n",
385 |        "      <td>-0.013601</td>\n",
386 |        "      <td>-0.004951</td>\n",
387 |        "    </tr>\n",
388 |        "    <tr>\n",
389 |        "      <th>2006-01-06</th>\n",
390 |        "      <td>0.040453</td>\n",
391 |        "      <td>0.025400</td>\n",
392 |        "      <td>0.004617</td>\n",
393 |        "      <td>0.003429</td>\n",
394 |        "      <td>-0.013934</td>\n",
395 |        "      <td>0.019909</td>\n",
396 |        "      <td>0.022888</td>\n",
397 |        "      <td>0.018216</td>\n",
398 |        "      <td>0.013519</td>\n",
399 |        "      <td>0.006812</td>\n",
400 |        "      <td>...</td>\n",
401 |        "      <td>-0.002964</td>\n",
402 |        "      <td>-0.003717</td>\n",
403 |        "      <td>0.010985</td>\n",
404 |        "      <td>-0.001022</td>\n",
405 |        "      <td>0.005538</td>\n",
406 |        "      <td>0.019614</td>\n",
407 |        "      <td>0.003215</td>\n",
408 |        "      <td>-0.008852</td>\n",
409 |        "      <td>0.004158</td>\n",
410 |        "      <td>0.019732</td>\n",
411 |        "    </tr>\n",
412 |        "    <tr>\n",
413 |        "      <th>2006-01-09</th>\n",
414 |        "      <td>0.004860</td>\n",
415 |        "      <td>-0.003670</td>\n",
416 |        "      <td>-0.016503</td>\n",
417 |        "      <td>0.024867</td>\n",
418 |        "      <td>-0.008363</td>\n",
419 |        "      <td>0.018197</td>\n",
420 |        "      <td>0.015450</td>\n",
421 |        "      <td>-0.005063</td>\n",
422 |        "      <td>0.010509</td>\n",
423 |        "      <td>-0.002537</td>\n",
424 |        "      <td>...</td>\n",
425 |        "      <td>-0.001858</td>\n",
426 |        "      <td>0.014925</td>\n",
427 |        "      <td>0.000000</td>\n",
428 |        "      <td>0.007503</td>\n",
429 |        "      <td>0.000424</td>\n",
430 |        "      <td>-0.023847</td>\n",
431 |        "      <td>0.011396</td>\n",
432 |        "      <td>0.004147</td>\n",
433 |        "      <td>-0.003705</td>\n",
434 |        "      <td>-0.000505</td>\n",
435 |        "    </tr>\n",
436 |        "    <tr>\n",
437 |        "      <th>2006-01-10</th>\n",
438 |        "      <td>-0.010134</td>\n",
439 |        "      <td>0.063536</td>\n",
440 |        "      <td>-0.030374</td>\n",
441 |        "      <td>-0.002964</td>\n",
442 |        "      <td>0.004799</td>\n",
443 |        "      <td>-0.004062</td>\n",
444 |        "      <td>-0.004722</td>\n",
445 |        "      <td>0.004919</td>\n",
446 |        "      <td>0.012800</td>\n",
447 |        "      <td>-0.005370</td>\n",
448 |        "      <td>...</td>\n",
449 |        "      <td>0.005212</td>\n",
450 |        "      <td>0.001838</td>\n",
451 |        "      <td>-0.016499</td>\n",
452 |        "      <td>-0.003893</td>\n",
453 |        "      <td>-0.007199</td>\n",
454 |        "      <td>0.019707</td>\n",
455 |        "      <td>0.000704</td>\n",
456 |        "      <td>0.004130</td>\n",
457 |        "      <td>0.003282</td>\n",
458 |        "      <td>0.007744</td>\n",
459 |        "    </tr>\n",
460 |        "  </tbody>\n",
461 |        "</table>\n",
462 |        "<p>5 rows × 31 columns</p>\n",
463 |        "</div>"
464 |       ],
465 |       "text/plain": [
466 |        "Name            AABA      AAPL      AMZN       AXP        BA       CAT  \\\n",
467 |        "Date                                                                     \n",
468 |        "2006-01-04  0.001467  0.002809 -0.006936 -0.011982  0.010363  0.025433   \n",
469 |        "2006-01-05  0.013669 -0.007470  0.008466  0.010587 -0.011803  0.000000   \n",
470 |        "2006-01-06  0.040453  0.025400  0.004617  0.003429 -0.013934  0.019909   \n",
471 |        "2006-01-09  0.004860 -0.003670 -0.016503  0.024867 -0.008363  0.018197   \n",
472 |        "2006-01-10 -0.010134  0.063536 -0.030374 -0.002964  0.004799 -0.004062   \n",
473 |        "\n",
474 |        "Name            CSCO       CVX       DIS        GE    ...         MSFT  \\\n",
475 |        "Date                                                  ...                \n",
476 |        "2006-01-04  0.022923 -0.002877 -0.016803 -0.001414    ...     0.004844   \n",
477 |        "2006-01-05  0.028011 -0.012222  0.017507 -0.002548    ...     0.000742   \n",
478 |        "2006-01-06  0.022888  0.018216  0.013519  0.006812    ...    -0.002964   \n",
479 |        "2006-01-09  0.015450 -0.005063  0.010509 -0.002537    ...    -0.001858   \n",
480 |        "2006-01-10 -0.004722  0.004919  0.012800 -0.005370    ...     0.005212   \n",
481 |        "\n",
482 |        "Name             NKE       PFE        PG       TRV       UNH       UTX  \\\n",
483 |        "Date                                                                     \n",
484 |        "2006-01-04 -0.004655  0.032380  0.001871  0.011089  0.002430 -0.006015   \n",
485 |        "2006-01-05  0.006548  0.001222 -0.003226  0.009677 -0.003070 -0.003737   \n",
486 |        "2006-01-06 -0.003717  0.010985 -0.001022  0.005538  0.019614  0.003215   \n",
487 |        "2006-01-09  0.014925  0.000000  0.007503  0.000424 -0.023847  0.011396   \n",
488 |        "2006-01-10  0.001838 -0.016499 -0.003893 -0.007199  0.019707  0.000704   \n",
489 |        "\n",
490 |        "Name              VZ       WMT       XOM  \n",
491 |        "Date                                      \n",
492 |        "2006-01-04  0.029296  0.001947  0.001710  \n",
493 |        "2006-01-05  0.011513 -0.013601 -0.004951  \n",
494 |        "2006-01-06 -0.008852  0.004158  0.019732  \n",
495 |        "2006-01-09  0.004147 -0.003705 -0.000505  \n",
496 |        "2006-01-10  0.004130  0.003282  0.007744  \n",
497 |        "\n",
498 |        "[5 rows x 31 columns]"
499 |       ]
500 |      },
501 |      "execution_count": 3,
502 |      "metadata": {},
503 |      "output_type": "execute_result"
504 |     }
505 |    ],
506 |    "source": [
507 |     "real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)\n",
508 |     "real_returns = real_returns.dropna()\n",
509 |     "real_returns.head()"
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "code",
514 |    "execution_count": 4,
515 |    "metadata": {
516 |     "ExecuteTime": {
517 |      "end_time": "2019-03-04T22:20:01.793446Z",
518 |      "start_time": "2019-03-04T22:20:01.757070Z"
519 |     }
520 |    },
521 |    "outputs": [],
522 |    "source": [
523 |     "labels = pd.read_csv(\"../data/dowjones_calculated/labels.csv\", index_col='Date', parse_dates=['Date'])"
524 |    ]
525 |   },
526 |   {
527 |    "cell_type": "code",
528 |    "execution_count": 5,
529 |    "metadata": {
530 |     "ExecuteTime": {
531 |      "end_time": "2019-03-04T22:20:01.883028Z",
532 |      "start_time": "2019-03-04T22:20:01.795329Z"
533 |     }
534 |    },
535 |    "outputs": [
536 |     {
537 |      "data": {
538 |       "text/plain": [
539 |        "(3015, 31)"
540 |       ]
541 |      },
542 |      "execution_count": 5,
543 |      "metadata": {},
544 |      "output_type": "execute_result"
545 |     }
546 |    ],
547 |    "source": [
548 |     "labels.shape"
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "code",
553 |    "execution_count": 6,
554 |    "metadata": {
555 |     "ExecuteTime": {
556 |      "end_time": "2019-03-04T22:20:01.951594Z",
557 |      "start_time": "2019-03-04T22:20:01.884435Z"
558 |     }
559 |    },
560 |    "outputs": [],
561 |    "source": [
562 |     "k = 10"
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "code",
567 |    "execution_count": 7,
568 |    "metadata": {
569 |     "ExecuteTime": {
570 |      "end_time": "2019-03-04T22:20:02.021016Z",
571 |      "start_time": "2019-03-04T22:20:01.955397Z"
572 |     }
573 |    },
574 |    "outputs": [],
575 |    "source": [
576 |     "final_returns = real_returns[750:3000].mul(labels[750:3000])"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "code",
581 |    "execution_count": 8,
582 |    "metadata": {
583 |     "ExecuteTime": {
584 |      "end_time": "2019-03-04T22:20:02.090299Z",
585 |      "start_time": "2019-03-04T22:20:02.024960Z"
586 |     }
587 |    },
588 |    "outputs": [
589 |     {
590 |      "data": {
591 |       "text/plain": [
592 |        "42347.751327396145"
593 |       ]
594 |      },
595 |      "execution_count": 8,
596 |      "metadata": {},
597 |      "output_type": "execute_result"
598 |     }
599 |    ],
600 |    "source": [
601 |     "(final_returns + 1).product().sum()/(2 * k)"
602 |    ]
603 |   },
604 |   {
605 |    "cell_type": "code",
606 |    "execution_count": 9,
607 |    "metadata": {
608 |     "ExecuteTime": {
609 |      "end_time": "2019-03-04T22:20:02.156823Z",
610 |      "start_time": "2019-03-04T22:20:02.092813Z"
611 |     }
612 |    },
613 |    "outputs": [
614 |     {
615 |      "data": {
616 |       "text/plain": [
617 |        "930673.1112343696"
618 |       ]
619 |      },
620 |      "execution_count": 9,
621 |      "metadata": {},
622 |      "output_type": "execute_result"
623 |     }
624 |    ],
625 |    "source": [
626 |     "(1 + final_returns.sum(axis = 1)/(2 * k)).product()"
627 |    ]
628 |   }
629 |  ],
630 |  "metadata": {
631 |   "kernelspec": {
632 |    "display_name": "projet_S5",
633 |    "language": "python",
634 |    "name": "projet_s5"
635 |   },
636 |   "language_info": {
637 |    "codemirror_mode": {
638 |     "name": "ipython",
639 |     "version": 3
640 |    },
641 |    "file_extension": ".py",
642 |    "mimetype": "text/x-python",
643 |    "name": "python",
644 |    "nbconvert_exporter": "python",
645 |    "pygments_lexer": "ipython3",
646 |    "version": "3.6.8"
647 |   },
648 |   "toc": {
649 |    "base_numbering": 1,
650 |    "nav_menu": {},
651 |    "number_sections": true,
652 |    "sideBar": true,
653 |    "skip_h1_title": false,
654 |    "title_cell": "Table of Contents",
655 |    "title_sidebar": "Contents",
656 |    "toc_cell": false,
657 |    "toc_position": {},
658 |    "toc_section_display": true,
659 |    "toc_window_display": false
660 |   }
661 |  },
662 |  "nbformat": 4,
663 |  "nbformat_minor": 2
664 | }
665 | 


--------------------------------------------------------------------------------
/notebook/[Official] Backtesting LSTM - 1 feature - Absolute Return.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-03-10T19:03:30.208741Z",
  9 |      "start_time": "2019-03-10T19:03:29.088794Z"
 10 |     },
 11 |     "scrolled": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "# List all device\n",
 16 |     "from tensorflow.python.client import device_lib\n",
 17 |     "# print(device_lib.list_local_devices())"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {
 24 |     "ExecuteTime": {
 25 |      "end_time": "2019-03-10T19:03:30.254928Z",
 26 |      "start_time": "2019-03-10T19:03:30.210234Z"
 27 |     },
 28 |     "scrolled": true
 29 |    },
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stderr",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "Using TensorFlow backend.\n"
 36 |      ]
 37 |     },
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "[]"
 42 |       ]
 43 |      },
 44 |      "execution_count": 2,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "# Check available GPU\n",
 51 |     "from keras import backend as K\n",
 52 |     "K.tensorflow_backend._get_available_gpus()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2019-03-10T19:03:30.317634Z",
 61 |      "start_time": "2019-03-10T19:03:30.257471Z"
 62 |     }
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "import os\n",
 67 |     "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n",
 68 |     "# The GPU id to use, usually either \"0\" or \"1\";\n",
 69 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\";  "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {
 76 |     "ExecuteTime": {
 77 |      "end_time": "2019-03-10T19:03:30.598527Z",
 78 |      "start_time": "2019-03-10T19:03:30.319606Z"
 79 |     }
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# Importing the libraries\n",
 84 |     "import numpy as np\n",
 85 |     "import pandas as pd\n",
 86 |     "from keras.models import Sequential\n",
 87 |     "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda\n",
 88 |     "from keras.preprocessing.sequence import TimeseriesGenerator\n",
 89 |     "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
 90 |     "from keras.activations import softmax\n",
 91 |     "from keras.optimizers import SGD\n",
 92 |     "from keras.models import load_model\n",
 93 |     "from keras.utils import to_categorical\n",
 94 |     "import math\n",
 95 |     "import pickle\n",
 96 |     "from sklearn.preprocessing import StandardScaler\n"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {
103 |     "ExecuteTime": {
104 |      "end_time": "2019-03-10T19:03:30.665916Z",
105 |      "start_time": "2019-03-10T19:03:30.600628Z"
106 |     }
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "index = \"dowjones\"\n",
111 |     "# index = \"frankfurt\"\n",
112 |     "with open(f\"../data/{index}_calculated/absolute_periods750_250_240.txt\", \"rb\") as fp:   # Unpickling\n",
113 |     "    dataset = pickle.load(fp)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {
120 |     "ExecuteTime": {
121 |      "end_time": "2019-03-10T19:03:30.674429Z",
122 |      "start_time": "2019-03-10T19:03:30.667818Z"
123 |     }
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "timestep = 240\n",
128 |     "feature = 31"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 7,
134 |    "metadata": {
135 |     "ExecuteTime": {
136 |      "end_time": "2019-03-10T19:03:30.747804Z",
137 |      "start_time": "2019-03-10T19:03:30.675703Z"
138 |     }
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "def long_short_postion(returns, k):\n",
143 |     "    position_ = np.copy(returns)\n",
144 |     "    short = np.argpartition(position_, k)[:k]\n",
145 |     "    neutral = np.argpartition(position_, len(position_) - k)[:(len(position_) - k)]\n",
146 |     "    position_[:] = 1\n",
147 |     "    position_[neutral] = 0\n",
148 |     "    position_[short] = -1\n",
149 |     "    return position_"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 8,
155 |    "metadata": {
156 |     "ExecuteTime": {
157 |      "end_time": "2019-03-10T19:04:14.940189Z",
158 |      "start_time": "2019-03-10T19:03:30.750019Z"
159 |     }
160 |    },
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "0.5104516129032258\n",
167 |       "0.48812903225806453\n",
168 |       "0.5107096774193548\n",
169 |       "0.5192258064516129\n",
170 |       "0.5370322580645162\n",
171 |       "0.5296774193548387\n",
172 |       "0.5021935483870967\n",
173 |       "0.5174193548387097\n",
174 |       "0.4967741935483871\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "positions = []\n",
180 |     "for i in range(len(dataset[0])):\n",
181 |     "    model_period = f\"../model/LSTM/{index}2_absolute__1feature_period{i}.h5\"\n",
182 |     "    regressor = load_model(model_period, custom_objects={\"softmax\": softmax})\n",
183 |     "    x_train = dataset[0][i][0].values\n",
184 |     "    scaler = StandardScaler().fit(x_train)\n",
185 |     "    \n",
186 |     "    x_test = scaler.transform(dataset[1][i][0])\n",
187 |     "    y_test = to_categorical(dataset[1][i][1].values, 2)\n",
188 |     "#     print(f\"Period {i}\")\n",
189 |     "#     print(f\"x test shape: {x_test.shape}\")\n",
190 |     "#     print(f\"y test shape: {y_test.shape}\")\n",
191 |     "#     print(f\"x_final shape: {x_final.shape}\")\n",
192 |     "#     print(f\"y_final shape: {y_final.shape}\")\n",
193 |     "    x_series = [x_test[i:i + timestep, j]\n",
194 |     "                for i in range(x_test.shape[0] - timestep) for j in range(feature)]\n",
195 |     "    y_series = [y_test[i + timestep, j]\n",
196 |     "                for i in range(y_test.shape[0] - timestep) for j in range(feature)]\n",
197 |     "    x_final = np.array(x_series)\n",
198 |     "    y_final = np.array(y_series)\n",
199 |     "    x_final = np.reshape(x_final, (x_final.shape[0], x_final.shape[1], 1))\n",
200 |     "\n",
201 |     "    predicted =  regressor.predict(x_final)\n",
202 |     "#     predicted = np.reshape\n",
203 |     "#     print(predicted.shape)\n",
204 |     "    predicted = np.reshape(predicted[:, 1], (250, 31))\n",
205 |     "    label = predicted > 0.5\n",
206 |     "    label = label * 1 # Convert boolean to int\n",
207 |     "    print(sum(y_test[-250:, :, 1] == label).sum()/label.size)\n",
208 |     "    positions.append(predicted)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 9,
214 |    "metadata": {
215 |     "ExecuteTime": {
216 |      "end_time": "2019-03-10T19:04:15.164991Z",
217 |      "start_time": "2019-03-10T19:04:14.942215Z"
218 |     }
219 |    },
220 |    "outputs": [
221 |     {
222 |      "data": {
223 |       "text/html": [
224 |        "<div>\n",
225 |        "<style scoped>\n",
226 |        "    .dataframe tbody tr th:only-of-type {\n",
227 |        "        vertical-align: middle;\n",
228 |        "    }\n",
229 |        "\n",
230 |        "    .dataframe tbody tr th {\n",
231 |        "        vertical-align: top;\n",
232 |        "    }\n",
233 |        "\n",
234 |        "    .dataframe thead th {\n",
235 |        "        text-align: right;\n",
236 |        "    }\n",
237 |        "</style>\n",
238 |        "<table border=\"1\" class=\"dataframe\">\n",
239 |        "  <thead>\n",
240 |        "    <tr style=\"text-align: right;\">\n",
241 |        "      <th>Name</th>\n",
242 |        "      <th>AABA</th>\n",
243 |        "      <th>AAPL</th>\n",
244 |        "      <th>AMZN</th>\n",
245 |        "      <th>AXP</th>\n",
246 |        "      <th>BA</th>\n",
247 |        "      <th>CAT</th>\n",
248 |        "      <th>CSCO</th>\n",
249 |        "      <th>CVX</th>\n",
250 |        "      <th>DIS</th>\n",
251 |        "      <th>GE</th>\n",
252 |        "      <th>...</th>\n",
253 |        "      <th>MSFT</th>\n",
254 |        "      <th>NKE</th>\n",
255 |        "      <th>PFE</th>\n",
256 |        "      <th>PG</th>\n",
257 |        "      <th>TRV</th>\n",
258 |        "      <th>UNH</th>\n",
259 |        "      <th>UTX</th>\n",
260 |        "      <th>VZ</th>\n",
261 |        "      <th>WMT</th>\n",
262 |        "      <th>XOM</th>\n",
263 |        "    </tr>\n",
264 |        "    <tr>\n",
265 |        "      <th>Date</th>\n",
266 |        "      <th></th>\n",
267 |        "      <th></th>\n",
268 |        "      <th></th>\n",
269 |        "      <th></th>\n",
270 |        "      <th></th>\n",
271 |        "      <th></th>\n",
272 |        "      <th></th>\n",
273 |        "      <th></th>\n",
274 |        "      <th></th>\n",
275 |        "      <th></th>\n",
276 |        "      <th></th>\n",
277 |        "      <th></th>\n",
278 |        "      <th></th>\n",
279 |        "      <th></th>\n",
280 |        "      <th></th>\n",
281 |        "      <th></th>\n",
282 |        "      <th></th>\n",
283 |        "      <th></th>\n",
284 |        "      <th></th>\n",
285 |        "      <th></th>\n",
286 |        "      <th></th>\n",
287 |        "    </tr>\n",
288 |        "  </thead>\n",
289 |        "  <tbody>\n",
290 |        "    <tr>\n",
291 |        "      <th>2006-01-03</th>\n",
292 |        "      <td>40.91</td>\n",
293 |        "      <td>10.68</td>\n",
294 |        "      <td>47.58</td>\n",
295 |        "      <td>52.58</td>\n",
296 |        "      <td>70.44</td>\n",
297 |        "      <td>57.80</td>\n",
298 |        "      <td>17.45</td>\n",
299 |        "      <td>59.08</td>\n",
300 |        "      <td>24.40</td>\n",
301 |        "      <td>35.37</td>\n",
302 |        "      <td>...</td>\n",
303 |        "      <td>26.84</td>\n",
304 |        "      <td>10.74</td>\n",
305 |        "      <td>23.78</td>\n",
306 |        "      <td>58.78</td>\n",
307 |        "      <td>45.99</td>\n",
308 |        "      <td>61.73</td>\n",
309 |        "      <td>56.53</td>\n",
310 |        "      <td>30.38</td>\n",
311 |        "      <td>46.23</td>\n",
312 |        "      <td>58.47</td>\n",
313 |        "    </tr>\n",
314 |        "    <tr>\n",
315 |        "      <th>2006-01-04</th>\n",
316 |        "      <td>40.97</td>\n",
317 |        "      <td>10.71</td>\n",
318 |        "      <td>47.25</td>\n",
319 |        "      <td>51.95</td>\n",
320 |        "      <td>71.17</td>\n",
321 |        "      <td>59.27</td>\n",
322 |        "      <td>17.85</td>\n",
323 |        "      <td>58.91</td>\n",
324 |        "      <td>23.99</td>\n",
325 |        "      <td>35.32</td>\n",
326 |        "      <td>...</td>\n",
327 |        "      <td>26.97</td>\n",
328 |        "      <td>10.69</td>\n",
329 |        "      <td>24.55</td>\n",
330 |        "      <td>58.89</td>\n",
331 |        "      <td>46.50</td>\n",
332 |        "      <td>61.88</td>\n",
333 |        "      <td>56.19</td>\n",
334 |        "      <td>31.27</td>\n",
335 |        "      <td>46.32</td>\n",
336 |        "      <td>58.57</td>\n",
337 |        "    </tr>\n",
338 |        "    <tr>\n",
339 |        "      <th>2006-01-05</th>\n",
340 |        "      <td>41.53</td>\n",
341 |        "      <td>10.63</td>\n",
342 |        "      <td>47.65</td>\n",
343 |        "      <td>52.50</td>\n",
344 |        "      <td>70.33</td>\n",
345 |        "      <td>59.27</td>\n",
346 |        "      <td>18.35</td>\n",
347 |        "      <td>58.19</td>\n",
348 |        "      <td>24.41</td>\n",
349 |        "      <td>35.23</td>\n",
350 |        "      <td>...</td>\n",
351 |        "      <td>26.99</td>\n",
352 |        "      <td>10.76</td>\n",
353 |        "      <td>24.58</td>\n",
354 |        "      <td>58.70</td>\n",
355 |        "      <td>46.95</td>\n",
356 |        "      <td>61.69</td>\n",
357 |        "      <td>55.98</td>\n",
358 |        "      <td>31.63</td>\n",
359 |        "      <td>45.69</td>\n",
360 |        "      <td>58.28</td>\n",
361 |        "    </tr>\n",
362 |        "    <tr>\n",
363 |        "      <th>2006-01-06</th>\n",
364 |        "      <td>43.21</td>\n",
365 |        "      <td>10.90</td>\n",
366 |        "      <td>47.87</td>\n",
367 |        "      <td>52.68</td>\n",
368 |        "      <td>69.35</td>\n",
369 |        "      <td>60.45</td>\n",
370 |        "      <td>18.77</td>\n",
371 |        "      <td>59.25</td>\n",
372 |        "      <td>24.74</td>\n",
373 |        "      <td>35.47</td>\n",
374 |        "      <td>...</td>\n",
375 |        "      <td>26.91</td>\n",
376 |        "      <td>10.72</td>\n",
377 |        "      <td>24.85</td>\n",
378 |        "      <td>58.64</td>\n",
379 |        "      <td>47.21</td>\n",
380 |        "      <td>62.90</td>\n",
381 |        "      <td>56.16</td>\n",
382 |        "      <td>31.35</td>\n",
383 |        "      <td>45.88</td>\n",
384 |        "      <td>59.43</td>\n",
385 |        "    </tr>\n",
386 |        "    <tr>\n",
387 |        "      <th>2006-01-09</th>\n",
388 |        "      <td>43.42</td>\n",
389 |        "      <td>10.86</td>\n",
390 |        "      <td>47.08</td>\n",
391 |        "      <td>53.99</td>\n",
392 |        "      <td>68.77</td>\n",
393 |        "      <td>61.55</td>\n",
394 |        "      <td>19.06</td>\n",
395 |        "      <td>58.95</td>\n",
396 |        "      <td>25.00</td>\n",
397 |        "      <td>35.38</td>\n",
398 |        "      <td>...</td>\n",
399 |        "      <td>26.86</td>\n",
400 |        "      <td>10.88</td>\n",
401 |        "      <td>24.85</td>\n",
402 |        "      <td>59.08</td>\n",
403 |        "      <td>47.23</td>\n",
404 |        "      <td>61.40</td>\n",
405 |        "      <td>56.80</td>\n",
406 |        "      <td>31.48</td>\n",
407 |        "      <td>45.71</td>\n",
408 |        "      <td>59.40</td>\n",
409 |        "    </tr>\n",
410 |        "  </tbody>\n",
411 |        "</table>\n",
412 |        "<p>5 rows × 31 columns</p>\n",
413 |        "</div>"
414 |       ],
415 |       "text/plain": [
416 |        "Name         AABA   AAPL   AMZN    AXP     BA    CAT   CSCO    CVX    DIS  \\\n",
417 |        "Date                                                                        \n",
418 |        "2006-01-03  40.91  10.68  47.58  52.58  70.44  57.80  17.45  59.08  24.40   \n",
419 |        "2006-01-04  40.97  10.71  47.25  51.95  71.17  59.27  17.85  58.91  23.99   \n",
420 |        "2006-01-05  41.53  10.63  47.65  52.50  70.33  59.27  18.35  58.19  24.41   \n",
421 |        "2006-01-06  43.21  10.90  47.87  52.68  69.35  60.45  18.77  59.25  24.74   \n",
422 |        "2006-01-09  43.42  10.86  47.08  53.99  68.77  61.55  19.06  58.95  25.00   \n",
423 |        "\n",
424 |        "Name           GE  ...   MSFT    NKE    PFE     PG    TRV    UNH    UTX  \\\n",
425 |        "Date               ...                                                    \n",
426 |        "2006-01-03  35.37  ...  26.84  10.74  23.78  58.78  45.99  61.73  56.53   \n",
427 |        "2006-01-04  35.32  ...  26.97  10.69  24.55  58.89  46.50  61.88  56.19   \n",
428 |        "2006-01-05  35.23  ...  26.99  10.76  24.58  58.70  46.95  61.69  55.98   \n",
429 |        "2006-01-06  35.47  ...  26.91  10.72  24.85  58.64  47.21  62.90  56.16   \n",
430 |        "2006-01-09  35.38  ...  26.86  10.88  24.85  59.08  47.23  61.40  56.80   \n",
431 |        "\n",
432 |        "Name           VZ    WMT    XOM  \n",
433 |        "Date                             \n",
434 |        "2006-01-03  30.38  46.23  58.47  \n",
435 |        "2006-01-04  31.27  46.32  58.57  \n",
436 |        "2006-01-05  31.63  45.69  58.28  \n",
437 |        "2006-01-06  31.35  45.88  59.43  \n",
438 |        "2006-01-09  31.48  45.71  59.40  \n",
439 |        "\n",
440 |        "[5 rows x 31 columns]"
441 |       ]
442 |      },
443 |      "execution_count": 9,
444 |      "metadata": {},
445 |      "output_type": "execute_result"
446 |     }
447 |    ],
448 |    "source": [
449 |     "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n",
450 |     "stocks = stocks[[\"Close\", \"Name\"]]\n",
451 |     "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n",
452 |     "stocks.head()"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": 10,
458 |    "metadata": {
459 |     "ExecuteTime": {
460 |      "end_time": "2019-03-10T19:04:15.779895Z",
461 |      "start_time": "2019-03-10T19:04:15.166478Z"
462 |     }
463 |    },
464 |    "outputs": [
465 |     {
466 |      "data": {
467 |       "text/html": [
468 |        "<div>\n",
469 |        "<style scoped>\n",
470 |        "    .dataframe tbody tr th:only-of-type {\n",
471 |        "        vertical-align: middle;\n",
472 |        "    }\n",
473 |        "\n",
474 |        "    .dataframe tbody tr th {\n",
475 |        "        vertical-align: top;\n",
476 |        "    }\n",
477 |        "\n",
478 |        "    .dataframe thead th {\n",
479 |        "        text-align: right;\n",
480 |        "    }\n",
481 |        "</style>\n",
482 |        "<table border=\"1\" class=\"dataframe\">\n",
483 |        "  <thead>\n",
484 |        "    <tr style=\"text-align: right;\">\n",
485 |        "      <th>Name</th>\n",
486 |        "      <th>AABA</th>\n",
487 |        "      <th>AAPL</th>\n",
488 |        "      <th>AMZN</th>\n",
489 |        "      <th>AXP</th>\n",
490 |        "      <th>BA</th>\n",
491 |        "      <th>CAT</th>\n",
492 |        "      <th>CSCO</th>\n",
493 |        "      <th>CVX</th>\n",
494 |        "      <th>DIS</th>\n",
495 |        "      <th>GE</th>\n",
496 |        "      <th>...</th>\n",
497 |        "      <th>MSFT</th>\n",
498 |        "      <th>NKE</th>\n",
499 |        "      <th>PFE</th>\n",
500 |        "      <th>PG</th>\n",
501 |        "      <th>TRV</th>\n",
502 |        "      <th>UNH</th>\n",
503 |        "      <th>UTX</th>\n",
504 |        "      <th>VZ</th>\n",
505 |        "      <th>WMT</th>\n",
506 |        "      <th>XOM</th>\n",
507 |        "    </tr>\n",
508 |        "    <tr>\n",
509 |        "      <th>Date</th>\n",
510 |        "      <th></th>\n",
511 |        "      <th></th>\n",
512 |        "      <th></th>\n",
513 |        "      <th></th>\n",
514 |        "      <th></th>\n",
515 |        "      <th></th>\n",
516 |        "      <th></th>\n",
517 |        "      <th></th>\n",
518 |        "      <th></th>\n",
519 |        "      <th></th>\n",
520 |        "      <th></th>\n",
521 |        "      <th></th>\n",
522 |        "      <th></th>\n",
523 |        "      <th></th>\n",
524 |        "      <th></th>\n",
525 |        "      <th></th>\n",
526 |        "      <th></th>\n",
527 |        "      <th></th>\n",
528 |        "      <th></th>\n",
529 |        "      <th></th>\n",
530 |        "      <th></th>\n",
531 |        "    </tr>\n",
532 |        "  </thead>\n",
533 |        "  <tbody>\n",
534 |        "    <tr>\n",
535 |        "      <th>2008-12-26</th>\n",
536 |        "      <td>0.001623</td>\n",
537 |        "      <td>0.009053</td>\n",
538 |        "      <td>0.006610</td>\n",
539 |        "      <td>-0.003339</td>\n",
540 |        "      <td>0.010219</td>\n",
541 |        "      <td>0.019327</td>\n",
542 |        "      <td>-0.004893</td>\n",
543 |        "      <td>0.010485</td>\n",
544 |        "      <td>0.008640</td>\n",
545 |        "      <td>-0.008690</td>\n",
546 |        "      <td>...</td>\n",
547 |        "      <td>-0.002087</td>\n",
548 |        "      <td>0.013029</td>\n",
549 |        "      <td>0.005291</td>\n",
550 |        "      <td>0.002318</td>\n",
551 |        "      <td>0.019443</td>\n",
552 |        "      <td>0.013097</td>\n",
553 |        "      <td>-0.000391</td>\n",
554 |        "      <td>0.010350</td>\n",
555 |        "      <td>-0.001623</td>\n",
556 |        "      <td>0.018606</td>\n",
557 |        "    </tr>\n",
558 |        "    <tr>\n",
559 |        "      <th>2008-12-29</th>\n",
560 |        "      <td>-0.037277</td>\n",
561 |        "      <td>0.008972</td>\n",
562 |        "      <td>-0.045964</td>\n",
563 |        "      <td>-0.011725</td>\n",
564 |        "      <td>-0.013323</td>\n",
565 |        "      <td>-0.008895</td>\n",
566 |        "      <td>-0.015980</td>\n",
567 |        "      <td>0.017058</td>\n",
568 |        "      <td>-0.032011</td>\n",
569 |        "      <td>-0.019411</td>\n",
570 |        "      <td>...</td>\n",
571 |        "      <td>-0.008887</td>\n",
572 |        "      <td>-0.016077</td>\n",
573 |        "      <td>0.011111</td>\n",
574 |        "      <td>-0.005452</td>\n",
575 |        "      <td>0.026136</td>\n",
576 |        "      <td>-0.027757</td>\n",
577 |        "      <td>0.003912</td>\n",
578 |        "      <td>-0.001506</td>\n",
579 |        "      <td>-0.004336</td>\n",
580 |        "      <td>0.010753</td>\n",
581 |        "    </tr>\n",
582 |        "    <tr>\n",
583 |        "      <th>2008-12-30</th>\n",
584 |        "      <td>0.007576</td>\n",
585 |        "      <td>-0.003234</td>\n",
586 |        "      <td>0.027530</td>\n",
587 |        "      <td>0.016949</td>\n",
588 |        "      <td>0.031508</td>\n",
589 |        "      <td>0.031176</td>\n",
590 |        "      <td>0.013741</td>\n",
591 |        "      <td>0.025577</td>\n",
592 |        "      <td>0.047042</td>\n",
593 |        "      <td>0.010217</td>\n",
594 |        "      <td>...</td>\n",
595 |        "      <td>0.020042</td>\n",
596 |        "      <td>0.014706</td>\n",
597 |        "      <td>0.026605</td>\n",
598 |        "      <td>0.015282</td>\n",
599 |        "      <td>0.030978</td>\n",
600 |        "      <td>0.057489</td>\n",
601 |        "      <td>0.033314</td>\n",
602 |        "      <td>0.002716</td>\n",
603 |        "      <td>-0.001089</td>\n",
604 |        "      <td>0.007306</td>\n",
605 |        "    </tr>\n",
606 |        "    <tr>\n",
607 |        "      <th>2008-12-31</th>\n",
608 |        "      <td>0.019215</td>\n",
609 |        "      <td>-0.011354</td>\n",
610 |        "      <td>0.010244</td>\n",
611 |        "      <td>0.030556</td>\n",
612 |        "      <td>0.034424</td>\n",
613 |        "      <td>0.023133</td>\n",
614 |        "      <td>0.004313</td>\n",
615 |        "      <td>0.008040</td>\n",
616 |        "      <td>0.009342</td>\n",
617 |        "      <td>0.024020</td>\n",
618 |        "      <td>...</td>\n",
619 |        "      <td>0.005171</td>\n",
620 |        "      <td>0.026570</td>\n",
621 |        "      <td>-0.002254</td>\n",
622 |        "      <td>0.011453</td>\n",
623 |        "      <td>0.006009</td>\n",
624 |        "      <td>-0.016272</td>\n",
625 |        "      <td>0.010558</td>\n",
626 |        "      <td>0.020163</td>\n",
627 |        "      <td>0.018347</td>\n",
628 |        "      <td>0.015778</td>\n",
629 |        "    </tr>\n",
630 |        "    <tr>\n",
631 |        "      <th>2009-01-02</th>\n",
632 |        "      <td>0.053279</td>\n",
633 |        "      <td>0.063167</td>\n",
634 |        "      <td>0.060062</td>\n",
635 |        "      <td>0.042049</td>\n",
636 |        "      <td>0.060464</td>\n",
637 |        "      <td>0.050146</td>\n",
638 |        "      <td>0.040491</td>\n",
639 |        "      <td>0.034473</td>\n",
640 |        "      <td>0.054209</td>\n",
641 |        "      <td>0.053704</td>\n",
642 |        "      <td>...</td>\n",
643 |        "      <td>0.045782</td>\n",
644 |        "      <td>0.040000</td>\n",
645 |        "      <td>0.031621</td>\n",
646 |        "      <td>0.015852</td>\n",
647 |        "      <td>0.000000</td>\n",
648 |        "      <td>0.037218</td>\n",
649 |        "      <td>0.025187</td>\n",
650 |        "      <td>0.021829</td>\n",
651 |        "      <td>0.019979</td>\n",
652 |        "      <td>0.022673</td>\n",
653 |        "    </tr>\n",
654 |        "  </tbody>\n",
655 |        "</table>\n",
656 |        "<p>5 rows × 31 columns</p>\n",
657 |        "</div>"
658 |       ],
659 |       "text/plain": [
660 |        "Name            AABA      AAPL      AMZN       AXP        BA       CAT  \\\n",
661 |        "Date                                                                     \n",
662 |        "2008-12-26  0.001623  0.009053  0.006610 -0.003339  0.010219  0.019327   \n",
663 |        "2008-12-29 -0.037277  0.008972 -0.045964 -0.011725 -0.013323 -0.008895   \n",
664 |        "2008-12-30  0.007576 -0.003234  0.027530  0.016949  0.031508  0.031176   \n",
665 |        "2008-12-31  0.019215 -0.011354  0.010244  0.030556  0.034424  0.023133   \n",
666 |        "2009-01-02  0.053279  0.063167  0.060062  0.042049  0.060464  0.050146   \n",
667 |        "\n",
668 |        "Name            CSCO       CVX       DIS        GE  ...      MSFT       NKE  \\\n",
669 |        "Date                                                ...                       \n",
670 |        "2008-12-26 -0.004893  0.010485  0.008640 -0.008690  ... -0.002087  0.013029   \n",
671 |        "2008-12-29 -0.015980  0.017058 -0.032011 -0.019411  ... -0.008887 -0.016077   \n",
672 |        "2008-12-30  0.013741  0.025577  0.047042  0.010217  ...  0.020042  0.014706   \n",
673 |        "2008-12-31  0.004313  0.008040  0.009342  0.024020  ...  0.005171  0.026570   \n",
674 |        "2009-01-02  0.040491  0.034473  0.054209  0.053704  ...  0.045782  0.040000   \n",
675 |        "\n",
676 |        "Name             PFE        PG       TRV       UNH       UTX        VZ  \\\n",
677 |        "Date                                                                     \n",
678 |        "2008-12-26  0.005291  0.002318  0.019443  0.013097 -0.000391  0.010350   \n",
679 |        "2008-12-29  0.011111 -0.005452  0.026136 -0.027757  0.003912 -0.001506   \n",
680 |        "2008-12-30  0.026605  0.015282  0.030978  0.057489  0.033314  0.002716   \n",
681 |        "2008-12-31 -0.002254  0.011453  0.006009 -0.016272  0.010558  0.020163   \n",
682 |        "2009-01-02  0.031621  0.015852  0.000000  0.037218  0.025187  0.021829   \n",
683 |        "\n",
684 |        "Name             WMT       XOM  \n",
685 |        "Date                            \n",
686 |        "2008-12-26 -0.001623  0.018606  \n",
687 |        "2008-12-29 -0.004336  0.010753  \n",
688 |        "2008-12-30 -0.001089  0.007306  \n",
689 |        "2008-12-31  0.018347  0.015778  \n",
690 |        "2009-01-02  0.019979  0.022673  \n",
691 |        "\n",
692 |        "[5 rows x 31 columns]"
693 |       ]
694 |      },
695 |      "execution_count": 10,
696 |      "metadata": {},
697 |      "output_type": "execute_result"
698 |     }
699 |    ],
700 |    "source": [
701 |     "real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)\n",
702 |     "real_returns = real_returns.dropna()\n",
703 |     "real_returns[750:3000].head()"
704 |    ]
705 |   },
706 |   {
707 |    "cell_type": "code",
708 |    "execution_count": 11,
709 |    "metadata": {
710 |     "ExecuteTime": {
711 |      "end_time": "2019-03-10T19:04:15.827807Z",
712 |      "start_time": "2019-03-10T19:04:15.781310Z"
713 |     }
714 |    },
715 |    "outputs": [
716 |     {
717 |      "data": {
718 |       "text/plain": [
719 |        "(2250, 31)"
720 |       ]
721 |      },
722 |      "execution_count": 11,
723 |      "metadata": {},
724 |      "output_type": "execute_result"
725 |     }
726 |    ],
727 |    "source": [
728 |     "all_positions = np.concatenate(positions,axis=0)\n",
729 |     "all_positions.shape"
730 |    ]
731 |   },
732 |   {
733 |    "cell_type": "code",
734 |    "execution_count": 12,
735 |    "metadata": {
736 |     "ExecuteTime": {
737 |      "end_time": "2019-03-10T19:04:19.673728Z",
738 |      "start_time": "2019-03-10T19:04:15.829485Z"
739 |     }
740 |    },
741 |    "outputs": [
742 |     {
743 |      "name": "stdout",
744 |      "output_type": "stream",
745 |      "text": [
746 |       "rebalance = 0.416847796589599\n",
747 |       "rebalance1 = 0.7287858977877059\n",
748 |       "rebalance = 0.988530751320795\n",
749 |       "rebalance1 = 1.058599060576066\n",
750 |       "rebalance = 2.2003725835255565\n",
751 |       "rebalance1 = 1.5484746988471223\n",
752 |       "rebalance = 1.0373938949183026\n",
753 |       "rebalance1 = 1.0532690812815328\n",
754 |       "rebalance = 1.2411430431165373\n",
755 |       "rebalance1 = 1.144256999889448\n",
756 |       "rebalance = 1.1235350642658526\n",
757 |       "rebalance1 = 1.0831576083988692\n",
758 |       "rebalance = 0.9623747025919739\n",
759 |       "rebalance1 = 0.9998904990603397\n",
760 |       "rebalance = 0.9177365598653195\n",
761 |       "rebalance1 = 0.9738202513502956\n",
762 |       "rebalance = 0.9114912909403768\n",
763 |       "rebalance1 = 0.9685717723658719\n",
764 |       "rebalance = 0.9048209816604091\n",
765 |       "rebalance1 = 0.963766259664476\n",
766 |       "rebalance = 0.8799231762933988\n",
767 |       "rebalance1 = 0.9489119605846448\n",
768 |       "rebalance = 0.8327474649310889\n",
769 |       "rebalance1 = 0.9221604112598187\n",
770 |       "rebalance = 0.8459709771506109\n",
771 |       "rebalance1 = 0.9285075023608181\n",
772 |       "rebalance = 0.8591237981217743\n",
773 |       "rebalance1 = 0.9348027933880991\n",
774 |       "rebalance = 0.8302393252717782\n",
775 |       "rebalance1 = 0.9183123644921956\n"
776 |      ]
777 |     }
778 |    ],
779 |    "source": [
780 |     "for k in range(1, 16):\n",
781 |     "    probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)\n",
782 |     "    position = probabilities.apply(lambda x: long_short_postion(x, k), axis=1, result_type='broadcast')\n",
783 |     "    final_returns = real_returns[750:3000].mul(position)\n",
784 |     "#     no_rebalance = (final_returns + 1).product().sum()/(2 * k)\n",
785 |     "    rebalance = (1 + final_returns.sum(axis = 1)/k).product()\n",
786 |     "    rebalance1 = (1 + final_returns.sum(axis = 1)/(2 * k)).product()\n",
787 |     "#     print(f\"no rebalance = {no_rebalance}\")\n",
788 |     "    print(f\"rebalance = {rebalance}\")\n",
789 |     "    print(f\"rebalance1 = {rebalance1}\")"
790 |    ]
791 |   }
792 |  ],
793 |  "metadata": {
794 |   "kernelspec": {
795 |    "display_name": "projet_S5",
796 |    "language": "python",
797 |    "name": "projet_s5"
798 |   },
799 |   "language_info": {
800 |    "codemirror_mode": {
801 |     "name": "ipython",
802 |     "version": 3
803 |    },
804 |    "file_extension": ".py",
805 |    "mimetype": "text/x-python",
806 |    "name": "python",
807 |    "nbconvert_exporter": "python",
808 |    "pygments_lexer": "ipython3",
809 |    "version": "3.6.7"
810 |   },
811 |   "toc": {
812 |    "base_numbering": 1,
813 |    "nav_menu": {},
814 |    "number_sections": true,
815 |    "sideBar": true,
816 |    "skip_h1_title": false,
817 |    "title_cell": "Table of Contents",
818 |    "title_sidebar": "Contents",
819 |    "toc_cell": false,
820 |    "toc_position": {},
821 |    "toc_section_display": true,
822 |    "toc_window_display": false
823 |   }
824 |  },
825 |  "nbformat": 4,
826 |  "nbformat_minor": 2
827 | }
828 | 


--------------------------------------------------------------------------------
/notebook/[Official] Backtesting LSTM - Absolute Return.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-03-10T18:50:02.855975Z",
  9 |      "start_time": "2019-03-10T18:50:01.880636Z"
 10 |     },
 11 |     "scrolled": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "# List all device\n",
 16 |     "from tensorflow.python.client import device_lib\n",
 17 |     "# print(device_lib.list_local_devices())"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {
 24 |     "ExecuteTime": {
 25 |      "end_time": "2019-03-10T18:50:02.896741Z",
 26 |      "start_time": "2019-03-10T18:50:02.857968Z"
 27 |     },
 28 |     "scrolled": true
 29 |    },
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stderr",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "Using TensorFlow backend.\n"
 36 |      ]
 37 |     },
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "[]"
 42 |       ]
 43 |      },
 44 |      "execution_count": 2,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "# Check available GPU\n",
 51 |     "from keras import backend as K\n",
 52 |     "K.tensorflow_backend._get_available_gpus()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2019-03-10T18:50:02.929988Z",
 61 |      "start_time": "2019-03-10T18:50:02.898478Z"
 62 |     }
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "import os\n",
 67 |     "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n",
 68 |     "# The GPU id to use, usually either \"0\" or \"1\";\n",
 69 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\";  "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {
 76 |     "ExecuteTime": {
 77 |      "end_time": "2019-03-10T18:50:03.195356Z",
 78 |      "start_time": "2019-03-10T18:50:02.933022Z"
 79 |     }
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# Importing the libraries\n",
 84 |     "import numpy as np\n",
 85 |     "import pandas as pd\n",
 86 |     "from keras.models import Sequential\n",
 87 |     "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda\n",
 88 |     "from keras.preprocessing.sequence import TimeseriesGenerator\n",
 89 |     "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
 90 |     "from keras.activations import softmax\n",
 91 |     "from keras.optimizers import SGD\n",
 92 |     "from keras.models import load_model\n",
 93 |     "from keras.utils import to_categorical\n",
 94 |     "import math\n",
 95 |     "import pickle\n",
 96 |     "from sklearn.preprocessing import StandardScaler\n"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {
103 |     "ExecuteTime": {
104 |      "end_time": "2019-03-10T18:50:03.262046Z",
105 |      "start_time": "2019-03-10T18:50:03.196828Z"
106 |     }
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "index = \"dowjones\"\n",
111 |     "# index = \"frankfurt\"\n",
112 |     "with open(f\"../data/{index}_calculated/absolute_periods750_250_240.txt\", \"rb\") as fp:   # Unpickling\n",
113 |     "    dataset = pickle.load(fp)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {
120 |     "ExecuteTime": {
121 |      "end_time": "2019-03-10T18:50:03.293144Z",
122 |      "start_time": "2019-03-10T18:50:03.263450Z"
123 |     }
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "timestep = 240"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 7,
133 |    "metadata": {
134 |     "ExecuteTime": {
135 |      "end_time": "2019-03-10T18:50:03.377283Z",
136 |      "start_time": "2019-03-10T18:50:03.294391Z"
137 |     }
138 |    },
139 |    "outputs": [],
140 |    "source": [
141 |     "def long_short_postion(returns, k):\n",
142 |     "    position_ = np.copy(returns)\n",
143 |     "    short = np.argpartition(position_, k)[:k]\n",
144 |     "    neutral = np.argpartition(position_, len(position_) - k)[:(len(position_) - k)]\n",
145 |     "    position_[:] = 1\n",
146 |     "    position_[neutral] = 0\n",
147 |     "    position_[short] = -1\n",
148 |     "    return position_"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 8,
154 |    "metadata": {
155 |     "ExecuteTime": {
156 |      "end_time": "2019-03-10T18:50:21.861882Z",
157 |      "start_time": "2019-03-10T18:50:03.379109Z"
158 |     }
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "positions = []\n",
163 |     "for i in range(len(dataset[0])):\n",
164 |     "    model_period = f\"../model/LSTM/{index}2_absolute__period{i}.h5\"\n",
165 |     "    regressor = load_model(model_period, custom_objects={\"softmax\": softmax})\n",
166 |     "    x_train = dataset[0][i][0].values\n",
167 |     "    scaler = StandardScaler().fit(x_train)\n",
168 |     "    \n",
169 |     "    x_test = scaler.transform(dataset[1][i][0])\n",
170 |     "    y_test = to_categorical(dataset[1][i][1].values, 2)\n",
171 |     "#     print(f\"Period {i}\")\n",
172 |     "#     print(f\"x test shape: {x_test.shape}\")\n",
173 |     "#     print(f\"y test shape: {y_test.shape}\")\n",
174 |     "#     print(f\"x_final shape: {x_final.shape}\")\n",
175 |     "#     print(f\"y_final shape: {y_final.shape}\")\n",
176 |     "    x_series = [x_test[i:i + timestep, :]\n",
177 |     "                for i in range(x_test.shape[0] - timestep)]\n",
178 |     "    y_series = [y_test[i + timestep]\n",
179 |     "                for i in range(y_test.shape[0] - timestep)]\n",
180 |     "    x_final = np.array(x_series)\n",
181 |     "    y_final = np.array(y_series)\n",
182 |     "\n",
183 |     "\n",
184 |     "\n",
185 |     "    predicted =  regressor.predict(x_final)\n",
186 |     "    label = predicted > 0.5\n",
187 |     "    label = label * 1 # Convert boolean to int\n",
188 |     "#     print((sum(y_test[:, :, 1] == label[:, :, 1])/(y_test.size/2)).sum())\n",
189 |     "    positions.append(predicted[:, :, 1])"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 9,
195 |    "metadata": {
196 |     "ExecuteTime": {
197 |      "end_time": "2019-03-10T18:50:22.146192Z",
198 |      "start_time": "2019-03-10T18:50:21.864031Z"
199 |     },
200 |     "scrolled": true
201 |    },
202 |    "outputs": [
203 |     {
204 |      "data": {
205 |       "text/html": [
206 |        "<div>\n",
207 |        "<style scoped>\n",
208 |        "    .dataframe tbody tr th:only-of-type {\n",
209 |        "        vertical-align: middle;\n",
210 |        "    }\n",
211 |        "\n",
212 |        "    .dataframe tbody tr th {\n",
213 |        "        vertical-align: top;\n",
214 |        "    }\n",
215 |        "\n",
216 |        "    .dataframe thead th {\n",
217 |        "        text-align: right;\n",
218 |        "    }\n",
219 |        "</style>\n",
220 |        "<table border=\"1\" class=\"dataframe\">\n",
221 |        "  <thead>\n",
222 |        "    <tr style=\"text-align: right;\">\n",
223 |        "      <th>Name</th>\n",
224 |        "      <th>AABA</th>\n",
225 |        "      <th>AAPL</th>\n",
226 |        "      <th>AMZN</th>\n",
227 |        "      <th>AXP</th>\n",
228 |        "      <th>BA</th>\n",
229 |        "      <th>CAT</th>\n",
230 |        "      <th>CSCO</th>\n",
231 |        "      <th>CVX</th>\n",
232 |        "      <th>DIS</th>\n",
233 |        "      <th>GE</th>\n",
234 |        "      <th>...</th>\n",
235 |        "      <th>MSFT</th>\n",
236 |        "      <th>NKE</th>\n",
237 |        "      <th>PFE</th>\n",
238 |        "      <th>PG</th>\n",
239 |        "      <th>TRV</th>\n",
240 |        "      <th>UNH</th>\n",
241 |        "      <th>UTX</th>\n",
242 |        "      <th>VZ</th>\n",
243 |        "      <th>WMT</th>\n",
244 |        "      <th>XOM</th>\n",
245 |        "    </tr>\n",
246 |        "    <tr>\n",
247 |        "      <th>Date</th>\n",
248 |        "      <th></th>\n",
249 |        "      <th></th>\n",
250 |        "      <th></th>\n",
251 |        "      <th></th>\n",
252 |        "      <th></th>\n",
253 |        "      <th></th>\n",
254 |        "      <th></th>\n",
255 |        "      <th></th>\n",
256 |        "      <th></th>\n",
257 |        "      <th></th>\n",
258 |        "      <th></th>\n",
259 |        "      <th></th>\n",
260 |        "      <th></th>\n",
261 |        "      <th></th>\n",
262 |        "      <th></th>\n",
263 |        "      <th></th>\n",
264 |        "      <th></th>\n",
265 |        "      <th></th>\n",
266 |        "      <th></th>\n",
267 |        "      <th></th>\n",
268 |        "      <th></th>\n",
269 |        "    </tr>\n",
270 |        "  </thead>\n",
271 |        "  <tbody>\n",
272 |        "    <tr>\n",
273 |        "      <th>2006-01-03</th>\n",
274 |        "      <td>40.91</td>\n",
275 |        "      <td>10.68</td>\n",
276 |        "      <td>47.58</td>\n",
277 |        "      <td>52.58</td>\n",
278 |        "      <td>70.44</td>\n",
279 |        "      <td>57.80</td>\n",
280 |        "      <td>17.45</td>\n",
281 |        "      <td>59.08</td>\n",
282 |        "      <td>24.40</td>\n",
283 |        "      <td>35.37</td>\n",
284 |        "      <td>...</td>\n",
285 |        "      <td>26.84</td>\n",
286 |        "      <td>10.74</td>\n",
287 |        "      <td>23.78</td>\n",
288 |        "      <td>58.78</td>\n",
289 |        "      <td>45.99</td>\n",
290 |        "      <td>61.73</td>\n",
291 |        "      <td>56.53</td>\n",
292 |        "      <td>30.38</td>\n",
293 |        "      <td>46.23</td>\n",
294 |        "      <td>58.47</td>\n",
295 |        "    </tr>\n",
296 |        "    <tr>\n",
297 |        "      <th>2006-01-04</th>\n",
298 |        "      <td>40.97</td>\n",
299 |        "      <td>10.71</td>\n",
300 |        "      <td>47.25</td>\n",
301 |        "      <td>51.95</td>\n",
302 |        "      <td>71.17</td>\n",
303 |        "      <td>59.27</td>\n",
304 |        "      <td>17.85</td>\n",
305 |        "      <td>58.91</td>\n",
306 |        "      <td>23.99</td>\n",
307 |        "      <td>35.32</td>\n",
308 |        "      <td>...</td>\n",
309 |        "      <td>26.97</td>\n",
310 |        "      <td>10.69</td>\n",
311 |        "      <td>24.55</td>\n",
312 |        "      <td>58.89</td>\n",
313 |        "      <td>46.50</td>\n",
314 |        "      <td>61.88</td>\n",
315 |        "      <td>56.19</td>\n",
316 |        "      <td>31.27</td>\n",
317 |        "      <td>46.32</td>\n",
318 |        "      <td>58.57</td>\n",
319 |        "    </tr>\n",
320 |        "    <tr>\n",
321 |        "      <th>2006-01-05</th>\n",
322 |        "      <td>41.53</td>\n",
323 |        "      <td>10.63</td>\n",
324 |        "      <td>47.65</td>\n",
325 |        "      <td>52.50</td>\n",
326 |        "      <td>70.33</td>\n",
327 |        "      <td>59.27</td>\n",
328 |        "      <td>18.35</td>\n",
329 |        "      <td>58.19</td>\n",
330 |        "      <td>24.41</td>\n",
331 |        "      <td>35.23</td>\n",
332 |        "      <td>...</td>\n",
333 |        "      <td>26.99</td>\n",
334 |        "      <td>10.76</td>\n",
335 |        "      <td>24.58</td>\n",
336 |        "      <td>58.70</td>\n",
337 |        "      <td>46.95</td>\n",
338 |        "      <td>61.69</td>\n",
339 |        "      <td>55.98</td>\n",
340 |        "      <td>31.63</td>\n",
341 |        "      <td>45.69</td>\n",
342 |        "      <td>58.28</td>\n",
343 |        "    </tr>\n",
344 |        "    <tr>\n",
345 |        "      <th>2006-01-06</th>\n",
346 |        "      <td>43.21</td>\n",
347 |        "      <td>10.90</td>\n",
348 |        "      <td>47.87</td>\n",
349 |        "      <td>52.68</td>\n",
350 |        "      <td>69.35</td>\n",
351 |        "      <td>60.45</td>\n",
352 |        "      <td>18.77</td>\n",
353 |        "      <td>59.25</td>\n",
354 |        "      <td>24.74</td>\n",
355 |        "      <td>35.47</td>\n",
356 |        "      <td>...</td>\n",
357 |        "      <td>26.91</td>\n",
358 |        "      <td>10.72</td>\n",
359 |        "      <td>24.85</td>\n",
360 |        "      <td>58.64</td>\n",
361 |        "      <td>47.21</td>\n",
362 |        "      <td>62.90</td>\n",
363 |        "      <td>56.16</td>\n",
364 |        "      <td>31.35</td>\n",
365 |        "      <td>45.88</td>\n",
366 |        "      <td>59.43</td>\n",
367 |        "    </tr>\n",
368 |        "    <tr>\n",
369 |        "      <th>2006-01-09</th>\n",
370 |        "      <td>43.42</td>\n",
371 |        "      <td>10.86</td>\n",
372 |        "      <td>47.08</td>\n",
373 |        "      <td>53.99</td>\n",
374 |        "      <td>68.77</td>\n",
375 |        "      <td>61.55</td>\n",
376 |        "      <td>19.06</td>\n",
377 |        "      <td>58.95</td>\n",
378 |        "      <td>25.00</td>\n",
379 |        "      <td>35.38</td>\n",
380 |        "      <td>...</td>\n",
381 |        "      <td>26.86</td>\n",
382 |        "      <td>10.88</td>\n",
383 |        "      <td>24.85</td>\n",
384 |        "      <td>59.08</td>\n",
385 |        "      <td>47.23</td>\n",
386 |        "      <td>61.40</td>\n",
387 |        "      <td>56.80</td>\n",
388 |        "      <td>31.48</td>\n",
389 |        "      <td>45.71</td>\n",
390 |        "      <td>59.40</td>\n",
391 |        "    </tr>\n",
392 |        "  </tbody>\n",
393 |        "</table>\n",
394 |        "<p>5 rows × 31 columns</p>\n",
395 |        "</div>"
396 |       ],
397 |       "text/plain": [
398 |        "Name         AABA   AAPL   AMZN    AXP     BA    CAT   CSCO    CVX    DIS  \\\n",
399 |        "Date                                                                        \n",
400 |        "2006-01-03  40.91  10.68  47.58  52.58  70.44  57.80  17.45  59.08  24.40   \n",
401 |        "2006-01-04  40.97  10.71  47.25  51.95  71.17  59.27  17.85  58.91  23.99   \n",
402 |        "2006-01-05  41.53  10.63  47.65  52.50  70.33  59.27  18.35  58.19  24.41   \n",
403 |        "2006-01-06  43.21  10.90  47.87  52.68  69.35  60.45  18.77  59.25  24.74   \n",
404 |        "2006-01-09  43.42  10.86  47.08  53.99  68.77  61.55  19.06  58.95  25.00   \n",
405 |        "\n",
406 |        "Name           GE  ...   MSFT    NKE    PFE     PG    TRV    UNH    UTX  \\\n",
407 |        "Date               ...                                                    \n",
408 |        "2006-01-03  35.37  ...  26.84  10.74  23.78  58.78  45.99  61.73  56.53   \n",
409 |        "2006-01-04  35.32  ...  26.97  10.69  24.55  58.89  46.50  61.88  56.19   \n",
410 |        "2006-01-05  35.23  ...  26.99  10.76  24.58  58.70  46.95  61.69  55.98   \n",
411 |        "2006-01-06  35.47  ...  26.91  10.72  24.85  58.64  47.21  62.90  56.16   \n",
412 |        "2006-01-09  35.38  ...  26.86  10.88  24.85  59.08  47.23  61.40  56.80   \n",
413 |        "\n",
414 |        "Name           VZ    WMT    XOM  \n",
415 |        "Date                             \n",
416 |        "2006-01-03  30.38  46.23  58.47  \n",
417 |        "2006-01-04  31.27  46.32  58.57  \n",
418 |        "2006-01-05  31.63  45.69  58.28  \n",
419 |        "2006-01-06  31.35  45.88  59.43  \n",
420 |        "2006-01-09  31.48  45.71  59.40  \n",
421 |        "\n",
422 |        "[5 rows x 31 columns]"
423 |       ]
424 |      },
425 |      "execution_count": 9,
426 |      "metadata": {},
427 |      "output_type": "execute_result"
428 |     }
429 |    ],
430 |    "source": [
431 |     "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n",
432 |     "stocks = stocks[[\"Close\", \"Name\"]]\n",
433 |     "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n",
434 |     "stocks.head()"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": 10,
440 |    "metadata": {
441 |     "ExecuteTime": {
442 |      "end_time": "2019-03-10T18:50:22.196696Z",
443 |      "start_time": "2019-03-10T18:50:22.149149Z"
444 |     }
445 |    },
446 |    "outputs": [
447 |     {
448 |      "data": {
449 |       "text/html": [
450 |        "<div>\n",
451 |        "<style scoped>\n",
452 |        "    .dataframe tbody tr th:only-of-type {\n",
453 |        "        vertical-align: middle;\n",
454 |        "    }\n",
455 |        "\n",
456 |        "    .dataframe tbody tr th {\n",
457 |        "        vertical-align: top;\n",
458 |        "    }\n",
459 |        "\n",
460 |        "    .dataframe thead th {\n",
461 |        "        text-align: right;\n",
462 |        "    }\n",
463 |        "</style>\n",
464 |        "<table border=\"1\" class=\"dataframe\">\n",
465 |        "  <thead>\n",
466 |        "    <tr style=\"text-align: right;\">\n",
467 |        "      <th>Name</th>\n",
468 |        "      <th>AABA</th>\n",
469 |        "      <th>AAPL</th>\n",
470 |        "      <th>AMZN</th>\n",
471 |        "      <th>AXP</th>\n",
472 |        "      <th>BA</th>\n",
473 |        "      <th>CAT</th>\n",
474 |        "      <th>CSCO</th>\n",
475 |        "      <th>CVX</th>\n",
476 |        "      <th>DIS</th>\n",
477 |        "      <th>GE</th>\n",
478 |        "      <th>...</th>\n",
479 |        "      <th>MSFT</th>\n",
480 |        "      <th>NKE</th>\n",
481 |        "      <th>PFE</th>\n",
482 |        "      <th>PG</th>\n",
483 |        "      <th>TRV</th>\n",
484 |        "      <th>UNH</th>\n",
485 |        "      <th>UTX</th>\n",
486 |        "      <th>VZ</th>\n",
487 |        "      <th>WMT</th>\n",
488 |        "      <th>XOM</th>\n",
489 |        "    </tr>\n",
490 |        "    <tr>\n",
491 |        "      <th>Date</th>\n",
492 |        "      <th></th>\n",
493 |        "      <th></th>\n",
494 |        "      <th></th>\n",
495 |        "      <th></th>\n",
496 |        "      <th></th>\n",
497 |        "      <th></th>\n",
498 |        "      <th></th>\n",
499 |        "      <th></th>\n",
500 |        "      <th></th>\n",
501 |        "      <th></th>\n",
502 |        "      <th></th>\n",
503 |        "      <th></th>\n",
504 |        "      <th></th>\n",
505 |        "      <th></th>\n",
506 |        "      <th></th>\n",
507 |        "      <th></th>\n",
508 |        "      <th></th>\n",
509 |        "      <th></th>\n",
510 |        "      <th></th>\n",
511 |        "      <th></th>\n",
512 |        "      <th></th>\n",
513 |        "    </tr>\n",
514 |        "  </thead>\n",
515 |        "  <tbody>\n",
516 |        "    <tr>\n",
517 |        "      <th>2008-12-26</th>\n",
518 |        "      <td>0.001623</td>\n",
519 |        "      <td>0.009053</td>\n",
520 |        "      <td>0.006610</td>\n",
521 |        "      <td>-0.003339</td>\n",
522 |        "      <td>0.010219</td>\n",
523 |        "      <td>0.019327</td>\n",
524 |        "      <td>-0.004893</td>\n",
525 |        "      <td>0.010485</td>\n",
526 |        "      <td>0.008640</td>\n",
527 |        "      <td>-0.008690</td>\n",
528 |        "      <td>...</td>\n",
529 |        "      <td>-0.002087</td>\n",
530 |        "      <td>0.013029</td>\n",
531 |        "      <td>0.005291</td>\n",
532 |        "      <td>0.002318</td>\n",
533 |        "      <td>0.019443</td>\n",
534 |        "      <td>0.013097</td>\n",
535 |        "      <td>-0.000391</td>\n",
536 |        "      <td>0.010350</td>\n",
537 |        "      <td>-0.001623</td>\n",
538 |        "      <td>0.018606</td>\n",
539 |        "    </tr>\n",
540 |        "    <tr>\n",
541 |        "      <th>2008-12-29</th>\n",
542 |        "      <td>-0.037277</td>\n",
543 |        "      <td>0.008972</td>\n",
544 |        "      <td>-0.045964</td>\n",
545 |        "      <td>-0.011725</td>\n",
546 |        "      <td>-0.013323</td>\n",
547 |        "      <td>-0.008895</td>\n",
548 |        "      <td>-0.015980</td>\n",
549 |        "      <td>0.017058</td>\n",
550 |        "      <td>-0.032011</td>\n",
551 |        "      <td>-0.019411</td>\n",
552 |        "      <td>...</td>\n",
553 |        "      <td>-0.008887</td>\n",
554 |        "      <td>-0.016077</td>\n",
555 |        "      <td>0.011111</td>\n",
556 |        "      <td>-0.005452</td>\n",
557 |        "      <td>0.026136</td>\n",
558 |        "      <td>-0.027757</td>\n",
559 |        "      <td>0.003912</td>\n",
560 |        "      <td>-0.001506</td>\n",
561 |        "      <td>-0.004336</td>\n",
562 |        "      <td>0.010753</td>\n",
563 |        "    </tr>\n",
564 |        "    <tr>\n",
565 |        "      <th>2008-12-30</th>\n",
566 |        "      <td>0.007576</td>\n",
567 |        "      <td>-0.003234</td>\n",
568 |        "      <td>0.027530</td>\n",
569 |        "      <td>0.016949</td>\n",
570 |        "      <td>0.031508</td>\n",
571 |        "      <td>0.031176</td>\n",
572 |        "      <td>0.013741</td>\n",
573 |        "      <td>0.025577</td>\n",
574 |        "      <td>0.047042</td>\n",
575 |        "      <td>0.010217</td>\n",
576 |        "      <td>...</td>\n",
577 |        "      <td>0.020042</td>\n",
578 |        "      <td>0.014706</td>\n",
579 |        "      <td>0.026605</td>\n",
580 |        "      <td>0.015282</td>\n",
581 |        "      <td>0.030978</td>\n",
582 |        "      <td>0.057489</td>\n",
583 |        "      <td>0.033314</td>\n",
584 |        "      <td>0.002716</td>\n",
585 |        "      <td>-0.001089</td>\n",
586 |        "      <td>0.007306</td>\n",
587 |        "    </tr>\n",
588 |        "    <tr>\n",
589 |        "      <th>2008-12-31</th>\n",
590 |        "      <td>0.019215</td>\n",
591 |        "      <td>-0.011354</td>\n",
592 |        "      <td>0.010244</td>\n",
593 |        "      <td>0.030556</td>\n",
594 |        "      <td>0.034424</td>\n",
595 |        "      <td>0.023133</td>\n",
596 |        "      <td>0.004313</td>\n",
597 |        "      <td>0.008040</td>\n",
598 |        "      <td>0.009342</td>\n",
599 |        "      <td>0.024020</td>\n",
600 |        "      <td>...</td>\n",
601 |        "      <td>0.005171</td>\n",
602 |        "      <td>0.026570</td>\n",
603 |        "      <td>-0.002254</td>\n",
604 |        "      <td>0.011453</td>\n",
605 |        "      <td>0.006009</td>\n",
606 |        "      <td>-0.016272</td>\n",
607 |        "      <td>0.010558</td>\n",
608 |        "      <td>0.020163</td>\n",
609 |        "      <td>0.018347</td>\n",
610 |        "      <td>0.015778</td>\n",
611 |        "    </tr>\n",
612 |        "    <tr>\n",
613 |        "      <th>2009-01-02</th>\n",
614 |        "      <td>0.053279</td>\n",
615 |        "      <td>0.063167</td>\n",
616 |        "      <td>0.060062</td>\n",
617 |        "      <td>0.042049</td>\n",
618 |        "      <td>0.060464</td>\n",
619 |        "      <td>0.050146</td>\n",
620 |        "      <td>0.040491</td>\n",
621 |        "      <td>0.034473</td>\n",
622 |        "      <td>0.054209</td>\n",
623 |        "      <td>0.053704</td>\n",
624 |        "      <td>...</td>\n",
625 |        "      <td>0.045782</td>\n",
626 |        "      <td>0.040000</td>\n",
627 |        "      <td>0.031621</td>\n",
628 |        "      <td>0.015852</td>\n",
629 |        "      <td>0.000000</td>\n",
630 |        "      <td>0.037218</td>\n",
631 |        "      <td>0.025187</td>\n",
632 |        "      <td>0.021829</td>\n",
633 |        "      <td>0.019979</td>\n",
634 |        "      <td>0.022673</td>\n",
635 |        "    </tr>\n",
636 |        "  </tbody>\n",
637 |        "</table>\n",
638 |        "<p>5 rows × 31 columns</p>\n",
639 |        "</div>"
640 |       ],
641 |       "text/plain": [
642 |        "Name            AABA      AAPL      AMZN       AXP        BA       CAT  \\\n",
643 |        "Date                                                                     \n",
644 |        "2008-12-26  0.001623  0.009053  0.006610 -0.003339  0.010219  0.019327   \n",
645 |        "2008-12-29 -0.037277  0.008972 -0.045964 -0.011725 -0.013323 -0.008895   \n",
646 |        "2008-12-30  0.007576 -0.003234  0.027530  0.016949  0.031508  0.031176   \n",
647 |        "2008-12-31  0.019215 -0.011354  0.010244  0.030556  0.034424  0.023133   \n",
648 |        "2009-01-02  0.053279  0.063167  0.060062  0.042049  0.060464  0.050146   \n",
649 |        "\n",
650 |        "Name            CSCO       CVX       DIS        GE  ...      MSFT       NKE  \\\n",
651 |        "Date                                                ...                       \n",
652 |        "2008-12-26 -0.004893  0.010485  0.008640 -0.008690  ... -0.002087  0.013029   \n",
653 |        "2008-12-29 -0.015980  0.017058 -0.032011 -0.019411  ... -0.008887 -0.016077   \n",
654 |        "2008-12-30  0.013741  0.025577  0.047042  0.010217  ...  0.020042  0.014706   \n",
655 |        "2008-12-31  0.004313  0.008040  0.009342  0.024020  ...  0.005171  0.026570   \n",
656 |        "2009-01-02  0.040491  0.034473  0.054209  0.053704  ...  0.045782  0.040000   \n",
657 |        "\n",
658 |        "Name             PFE        PG       TRV       UNH       UTX        VZ  \\\n",
659 |        "Date                                                                     \n",
660 |        "2008-12-26  0.005291  0.002318  0.019443  0.013097 -0.000391  0.010350   \n",
661 |        "2008-12-29  0.011111 -0.005452  0.026136 -0.027757  0.003912 -0.001506   \n",
662 |        "2008-12-30  0.026605  0.015282  0.030978  0.057489  0.033314  0.002716   \n",
663 |        "2008-12-31 -0.002254  0.011453  0.006009 -0.016272  0.010558  0.020163   \n",
664 |        "2009-01-02  0.031621  0.015852  0.000000  0.037218  0.025187  0.021829   \n",
665 |        "\n",
666 |        "Name             WMT       XOM  \n",
667 |        "Date                            \n",
668 |        "2008-12-26 -0.001623  0.018606  \n",
669 |        "2008-12-29 -0.004336  0.010753  \n",
670 |        "2008-12-30 -0.001089  0.007306  \n",
671 |        "2008-12-31  0.018347  0.015778  \n",
672 |        "2009-01-02  0.019979  0.022673  \n",
673 |        "\n",
674 |        "[5 rows x 31 columns]"
675 |       ]
676 |      },
677 |      "execution_count": 10,
678 |      "metadata": {},
679 |      "output_type": "execute_result"
680 |     }
681 |    ],
682 |    "source": [
683 |     "real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)\n",
684 |     "real_returns = real_returns.dropna()\n",
685 |     "real_returns[750:3000].head()"
686 |    ]
687 |   },
688 |   {
689 |    "cell_type": "code",
690 |    "execution_count": 11,
691 |    "metadata": {
692 |     "ExecuteTime": {
693 |      "end_time": "2019-03-10T18:50:22.232861Z",
694 |      "start_time": "2019-03-10T18:50:22.199243Z"
695 |     }
696 |    },
697 |    "outputs": [
698 |     {
699 |      "data": {
700 |       "text/plain": [
701 |        "(2250, 31)"
702 |       ]
703 |      },
704 |      "execution_count": 11,
705 |      "metadata": {},
706 |      "output_type": "execute_result"
707 |     }
708 |    ],
709 |    "source": [
710 |     "all_positions = np.concatenate(positions,axis=0)\n",
711 |     "all_positions.shape"
712 |    ]
713 |   },
714 |   {
715 |    "cell_type": "code",
716 |    "execution_count": 12,
717 |    "metadata": {
718 |     "ExecuteTime": {
719 |      "end_time": "2019-03-10T18:50:26.373525Z",
720 |      "start_time": "2019-03-10T18:50:22.234334Z"
721 |     }
722 |    },
723 |    "outputs": [
724 |     {
725 |      "name": "stdout",
726 |      "output_type": "stream",
727 |      "text": [
728 |       "rebalance = 0.36855354986190514\n",
729 |       "rebalance1 = 0.6621851939596246\n",
730 |       "rebalance = 0.4080995445308407\n",
731 |       "rebalance1 = 0.6678730157955822\n",
732 |       "rebalance = 0.5921330183114122\n",
733 |       "rebalance1 = 0.7894692748857888\n",
734 |       "rebalance = 0.5970559403035915\n",
735 |       "rebalance1 = 0.7879871997916339\n",
736 |       "rebalance = 0.7232641469124205\n",
737 |       "rebalance1 = 0.8639078907198249\n",
738 |       "rebalance = 0.8128304975543116\n",
739 |       "rebalance1 = 0.9135528001160027\n",
740 |       "rebalance = 0.785759152130262\n",
741 |       "rebalance1 = 0.8967744833065788\n",
742 |       "rebalance = 0.7776428519233565\n",
743 |       "rebalance1 = 0.8909891094888095\n",
744 |       "rebalance = 0.7535956852175906\n",
745 |       "rebalance1 = 0.8763090744146368\n",
746 |       "rebalance = 0.8567053574970215\n",
747 |       "rebalance1 = 0.9335921800003164\n",
748 |       "rebalance = 0.8181464485682514\n",
749 |       "rebalance1 = 0.9115270819091531\n",
750 |       "rebalance = 0.8744274366140588\n",
751 |       "rebalance1 = 0.9416789642005597\n",
752 |       "rebalance = 0.9681492772013753\n",
753 |       "rebalance1 = 0.9901472574490726\n",
754 |       "rebalance = 0.9247589275299702\n",
755 |       "rebalance1 = 0.9670643327244407\n",
756 |       "rebalance = 0.9597354807771837\n",
757 |       "rebalance1 = 0.9848117044643548\n"
758 |      ]
759 |     }
760 |    ],
761 |    "source": [
762 |     "for k in range(1, 16):\n",
763 |     "    probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)\n",
764 |     "    position = probabilities.apply(lambda x: long_short_postion(x, k), axis=1, result_type='broadcast')\n",
765 |     "    final_returns = real_returns[750:3000].mul(position)\n",
766 |     "#     no_rebalance = (final_returns + 1).product().sum()/(2 * k)\n",
767 |     "    rebalance = (1 + final_returns.sum(axis = 1)/k).product()\n",
768 |     "    rebalance1 = (1 + final_returns.sum(axis = 1)/(2 * k)).product()\n",
769 |     "#     print(f\"no rebalance = {no_rebalance}\")\n",
770 |     "    print(f\"rebalance = {rebalance}\")\n",
771 |     "    print(f\"rebalance1 = {rebalance1}\")"
772 |    ]
773 |   }
774 |  ],
775 |  "metadata": {
776 |   "kernelspec": {
777 |    "display_name": "projet_S5",
778 |    "language": "python",
779 |    "name": "projet_s5"
780 |   },
781 |   "language_info": {
782 |    "codemirror_mode": {
783 |     "name": "ipython",
784 |     "version": 3
785 |    },
786 |    "file_extension": ".py",
787 |    "mimetype": "text/x-python",
788 |    "name": "python",
789 |    "nbconvert_exporter": "python",
790 |    "pygments_lexer": "ipython3",
791 |    "version": "3.6.7"
792 |   },
793 |   "toc": {
794 |    "base_numbering": 1,
795 |    "nav_menu": {},
796 |    "number_sections": true,
797 |    "sideBar": true,
798 |    "skip_h1_title": false,
799 |    "title_cell": "Table of Contents",
800 |    "title_sidebar": "Contents",
801 |    "toc_cell": false,
802 |    "toc_position": {},
803 |    "toc_section_display": true,
804 |    "toc_window_display": false
805 |   }
806 |  },
807 |  "nbformat": 4,
808 |  "nbformat_minor": 2
809 | }
810 | 


--------------------------------------------------------------------------------
/notebook/[Official] Backtesting LSTM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-03-13T22:04:54.889499Z",
  9 |      "start_time": "2019-03-13T22:04:53.977846Z"
 10 |     },
 11 |     "scrolled": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "# List all device\n",
 16 |     "from tensorflow.python.client import device_lib\n",
 17 |     "# print(device_lib.list_local_devices())"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {
 24 |     "ExecuteTime": {
 25 |      "end_time": "2019-03-13T22:04:54.930443Z",
 26 |      "start_time": "2019-03-13T22:04:54.890979Z"
 27 |     },
 28 |     "scrolled": true
 29 |    },
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stderr",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "Using TensorFlow backend.\n"
 36 |      ]
 37 |     },
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "[]"
 42 |       ]
 43 |      },
 44 |      "execution_count": 2,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "# Check available GPU\n",
 51 |     "from keras import backend as K\n",
 52 |     "K.tensorflow_backend._get_available_gpus()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2019-03-13T22:04:54.962585Z",
 61 |      "start_time": "2019-03-13T22:04:54.931895Z"
 62 |     }
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "import os\n",
 67 |     "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n",
 68 |     "# The GPU id to use, usually either \"0\" or \"1\";\n",
 69 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\";  "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {
 76 |     "ExecuteTime": {
 77 |      "end_time": "2019-03-13T22:04:55.245739Z",
 78 |      "start_time": "2019-03-13T22:04:54.964457Z"
 79 |     }
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# Importing the libraries\n",
 84 |     "import numpy as np\n",
 85 |     "import pandas as pd\n",
 86 |     "from keras.models import Sequential\n",
 87 |     "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda\n",
 88 |     "from keras.preprocessing.sequence import TimeseriesGenerator\n",
 89 |     "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
 90 |     "from keras.activations import softmax\n",
 91 |     "from keras.optimizers import SGD\n",
 92 |     "from keras.models import load_model\n",
 93 |     "from keras.utils import to_categorical\n",
 94 |     "import math\n",
 95 |     "import pickle\n",
 96 |     "from sklearn.preprocessing import StandardScaler\n"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {
103 |     "ExecuteTime": {
104 |      "end_time": "2019-03-13T22:04:55.298260Z",
105 |      "start_time": "2019-03-13T22:04:55.247176Z"
106 |     }
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "index = \"dowjones\"\n",
111 |     "# index = \"frankfurt\"\n",
112 |     "with open(f\"../data/{index}_calculated/periods750_250_240.txt\", \"rb\") as fp:   # Unpickling\n",
113 |     "    dataset = pickle.load(fp)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {
120 |     "ExecuteTime": {
121 |      "end_time": "2019-03-13T22:04:55.337362Z",
122 |      "start_time": "2019-03-13T22:04:55.299503Z"
123 |     }
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "timestep = 240"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 7,
133 |    "metadata": {
134 |     "ExecuteTime": {
135 |      "end_time": "2019-03-13T22:04:55.406816Z",
136 |      "start_time": "2019-03-13T22:04:55.340316Z"
137 |     }
138 |    },
139 |    "outputs": [],
140 |    "source": [
141 |     "def long_short_postion(returns, k):\n",
142 |     "    position_ = np.copy(returns)\n",
143 |     "    short = np.argpartition(position_, k)[:k]\n",
144 |     "    neutral = np.argpartition(position_, len(position_) - k)[:(len(position_) - k)]\n",
145 |     "    position_[:] = 1\n",
146 |     "    position_[neutral] = 0\n",
147 |     "    position_[short] = -1\n",
148 |     "    return position_"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 8,
154 |    "metadata": {
155 |     "ExecuteTime": {
156 |      "end_time": "2019-03-13T22:04:56.521521Z",
157 |      "start_time": "2019-03-13T22:04:55.411702Z"
158 |     }
159 |    },
160 |    "outputs": [
161 |     {
162 |      "ename": "ValueError",
163 |      "evalue": "Error when checking input: expected lstm_10_input to have shape (240, 1) but got array with shape (240, 31)",
164 |      "output_type": "error",
165 |      "traceback": [
166 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
167 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
168 |       "\u001b[0;32m<ipython-input-8-3986ac14611f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m     \u001b[0mpredicted\u001b[0m \u001b[0;34m=\u001b[0m  \u001b[0mregressor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_final\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     25\u001b[0m     \u001b[0mlabel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     26\u001b[0m     \u001b[0mlabel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;31m# Convert boolean to int\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
169 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps)\u001b[0m\n\u001b[1;32m   1145\u001b[0m                              'argument.')\n\u001b[1;32m   1146\u001b[0m         \u001b[0;31m# Validate user data.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1147\u001b[0;31m         \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_standardize_user_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1148\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstateful\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1149\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
170 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_standardize_user_data\u001b[0;34m(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)\u001b[0m\n\u001b[1;32m    747\u001b[0m             \u001b[0mfeed_input_shapes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    748\u001b[0m             \u001b[0mcheck_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m  \u001b[0;31m# Don't enforce the batch size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 749\u001b[0;31m             exception_prefix='input')\n\u001b[0m\u001b[1;32m    750\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    751\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
171 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training_utils.py\u001b[0m in \u001b[0;36mstandardize_input_data\u001b[0;34m(data, names, shapes, check_batch_axis, exception_prefix)\u001b[0m\n\u001b[1;32m    135\u001b[0m                             \u001b[0;34m': expected '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' to have shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    136\u001b[0m                             \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' but got array with shape '\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 137\u001b[0;31m                             str(data_shape))\n\u001b[0m\u001b[1;32m    138\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
172 |       "\u001b[0;31mValueError\u001b[0m: Error when checking input: expected lstm_10_input to have shape (240, 1) but got array with shape (240, 31)"
173 |      ]
174 |     }
175 |    ],
176 |    "source": [
177 |     "positions = []\n",
178 |     "for i in range(len(dataset[0])):\n",
179 |     "    model_period = f\"../model/LSTM/{index}2_period{i}.h5\"\n",
180 |     "    regressor = load_model(model_period, custom_objects={\"softmax\": softmax})\n",
181 |     "    x_train = dataset[0][i][0].values\n",
182 |     "    scaler = StandardScaler().fit(x_train)\n",
183 |     "    \n",
184 |     "    x_test = scaler.transform(dataset[1][i][0])\n",
185 |     "    y_test = to_categorical(dataset[1][i][1].values, 2)\n",
186 |     "#     print(f\"Period {i}\")\n",
187 |     "#     print(f\"x test shape: {x_test.shape}\")\n",
188 |     "#     print(f\"y test shape: {y_test.shape}\")\n",
189 |     "#     print(f\"x_final shape: {x_final.shape}\")\n",
190 |     "#     print(f\"y_final shape: {y_final.shape}\")\n",
191 |     "    x_series = [x_test[i:i + timestep, :]\n",
192 |     "                for i in range(x_test.shape[0] - timestep)]\n",
193 |     "    y_series = [y_test[i + timestep]\n",
194 |     "                for i in range(y_test.shape[0] - timestep)]\n",
195 |     "    x_final = np.array(x_series)\n",
196 |     "    y_final = np.array(y_series)\n",
197 |     "\n",
198 |     "\n",
199 |     "\n",
200 |     "    predicted =  regressor.predict(x_final)\n",
201 |     "    label = predicted > 0.5\n",
202 |     "    label = label * 1 # Convert boolean to int\n",
203 |     "#     print((sum(y_test[:, :, 1] == label[:, :, 1])/(y_test.size/2)).sum())\n",
204 |     "    positions.append(predicted[:, :, 1])"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "ExecuteTime": {
212 |      "end_time": "2019-03-13T22:04:56.524199Z",
213 |      "start_time": "2019-03-13T22:04:53.997Z"
214 |     }
215 |    },
216 |    "outputs": [],
217 |    "source": [
218 |     "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n",
219 |     "stocks = stocks[[\"Close\", \"Name\"]]\n",
220 |     "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n",
221 |     "stocks.head()"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {
228 |     "ExecuteTime": {
229 |      "end_time": "2019-03-13T22:04:56.525496Z",
230 |      "start_time": "2019-03-13T22:04:53.999Z"
231 |     }
232 |    },
233 |    "outputs": [],
234 |    "source": [
235 |     "real_returns = (stocks - stocks.shift(1)) / stocks.shift(1)\n",
236 |     "real_returns = real_returns.dropna()\n",
237 |     "real_returns[750:3000].head()"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {
244 |     "ExecuteTime": {
245 |      "end_time": "2019-03-13T22:04:56.526851Z",
246 |      "start_time": "2019-03-13T22:04:54.000Z"
247 |     }
248 |    },
249 |    "outputs": [],
250 |    "source": [
251 |     "all_positions = np.concatenate(positions,axis=0)\n",
252 |     "all_positions.shape"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {
259 |     "ExecuteTime": {
260 |      "end_time": "2019-03-13T22:04:56.528322Z",
261 |      "start_time": "2019-03-13T22:04:54.002Z"
262 |     }
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "for k in range(1, 16):\n",
267 |     "    probabilities = pd.DataFrame(data=all_positions, index=real_returns[750:3000].index, columns=real_returns.columns)\n",
268 |     "    position = probabilities.apply(lambda x: long_short_postion(x, k), axis=1, result_type='broadcast')\n",
269 |     "    final_returns = real_returns[750:3000].mul(position)\n",
270 |     "#     no_rebalance = (final_returns + 1).product().sum()/(2 * k)\n",
271 |     "#     rebalance = (1 + final_returns.sum(axis = 1)/k).product()\n",
272 |     "    rebalance1 = (1 + final_returns.sum(axis = 1)/(2 * k)).product()\n",
273 |     "#     print(f\"no rebalance = {no_rebalance}\")\n",
274 |     "#     print(f\"rebalance = {rebalance}\")\n",
275 |     "    print(f\"rebalance = {rebalance1}\")"
276 |    ]
277 |   }
278 |  ],
279 |  "metadata": {
280 |   "kernelspec": {
281 |    "display_name": "projet_S5",
282 |    "language": "python",
283 |    "name": "projet_s5"
284 |   },
285 |   "language_info": {
286 |    "codemirror_mode": {
287 |     "name": "ipython",
288 |     "version": 3
289 |    },
290 |    "file_extension": ".py",
291 |    "mimetype": "text/x-python",
292 |    "name": "python",
293 |    "nbconvert_exporter": "python",
294 |    "pygments_lexer": "ipython3",
295 |    "version": "3.6.7"
296 |   },
297 |   "toc": {
298 |    "base_numbering": 1,
299 |    "nav_menu": {},
300 |    "number_sections": true,
301 |    "sideBar": true,
302 |    "skip_h1_title": false,
303 |    "title_cell": "Table of Contents",
304 |    "title_sidebar": "Contents",
305 |    "toc_cell": false,
306 |    "toc_position": {},
307 |    "toc_section_display": true,
308 |    "toc_window_display": false
309 |   }
310 |  },
311 |  "nbformat": 4,
312 |  "nbformat_minor": 2
313 | }
314 | 


--------------------------------------------------------------------------------
/notebook/[Official] Benchmark Machine Learning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-03-07T12:08:14.069749Z",
  9 |      "start_time": "2019-03-07T12:08:13.085083Z"
 10 |     }
 11 |    },
 12 |    "outputs": [
 13 |     {
 14 |      "name": "stderr",
 15 |      "output_type": "stream",
 16 |      "text": [
 17 |       "Using TensorFlow backend.\n"
 18 |      ]
 19 |     }
 20 |    ],
 21 |    "source": [
 22 |     "# Importing the libraries\n",
 23 |     "import numpy as np\n",
 24 |     "import pandas as pd\n",
 25 |     "from keras.models import Sequential\n",
 26 |     "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda\n",
 27 |     "from keras.preprocessing.sequence import TimeseriesGenerator\n",
 28 |     "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
 29 |     "from keras.activations import softmax\n",
 30 |     "from keras.optimizers import SGD\n",
 31 |     "import math\n",
 32 |     "import pickle"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 2,
 38 |    "metadata": {
 39 |     "ExecuteTime": {
 40 |      "end_time": "2019-03-07T12:08:15.158729Z",
 41 |      "start_time": "2019-03-07T12:08:14.071377Z"
 42 |     }
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "with open(\"../data/dowjones_calculated/periods.txt\", \"rb\") as fp:   # Unpickling\n",
 47 |     "    dataset = pickle.load(fp)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {
 54 |     "ExecuteTime": {
 55 |      "end_time": "2019-03-07T12:08:15.660764Z",
 56 |      "start_time": "2019-03-07T12:08:15.165391Z"
 57 |     }
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "X_train = dataset[0][0][0]\n",
 62 |     "meanX = X_train.mean(axis=1)\n",
 63 |     "stdX = X_train.std(axis = 1)\n",
 64 |     "X_train = X_train.sub(meanX, axis=0)\n",
 65 |     "X_train = X_train.div(stdX, axis = 0)\n",
 66 |     "X_train = X_train.values\n",
 67 |     "\n",
 68 |     "y_train = dataset[0][0][1].values\n",
 69 |     "\n",
 70 |     "X_test = dataset[1][0][0]\n",
 71 |     "meanX = X_test.mean(axis=1)\n",
 72 |     "stdX = X_test.std(axis = 1)\n",
 73 |     "X_test = X_test.sub(meanX, axis=0)\n",
 74 |     "X_test = X_test.div(stdX, axis = 0)\n",
 75 |     "X_test = X_test.values\n",
 76 |     "\n",
 77 |     "y_test = dataset[1][0][1].values"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 10,
 83 |    "metadata": {
 84 |     "ExecuteTime": {
 85 |      "end_time": "2019-03-07T12:26:28.894255Z",
 86 |      "start_time": "2019-03-07T12:26:28.887726Z"
 87 |     }
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "y_train = y_train * 1.0\n",
 92 |     "y_test = y_test * 1.0"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 4,
 98 |    "metadata": {
 99 |     "ExecuteTime": {
100 |      "end_time": "2019-03-07T10:26:07.602938Z",
101 |      "start_time": "2019-03-07T10:26:07.565938Z"
102 |     }
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "def get_one_hot(targets, nb_classes):\n",
107 |     "    res = np.eye(nb_classes)[np.array(targets).reshape(-1)]\n",
108 |     "    return res.reshape(list(targets.shape)+[nb_classes])\n",
109 |     "# y_train = get_one_hot(y_train, 2)\n",
110 |     "# y_test = get_one_hot(y_test, 2)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 5,
116 |    "metadata": {
117 |     "ExecuteTime": {
118 |      "end_time": "2019-03-07T10:26:07.693204Z",
119 |      "start_time": "2019-03-07T10:26:07.605116Z"
120 |     }
121 |    },
122 |    "outputs": [
123 |     {
124 |      "name": "stdout",
125 |      "output_type": "stream",
126 |      "text": [
127 |       "x train shape: (750, 31)\n",
128 |       "y train shape: (750, 31)\n",
129 |       "x test shape: (490, 31)\n",
130 |       "y test shape: (490, 31)\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "print(f\"x train shape: {X_train.shape}\")\n",
136 |     "print(f\"y train shape: {y_train.shape}\")\n",
137 |     "print(f\"x test shape: {X_test.shape}\")\n",
138 |     "print(f\"y test shape: {y_test.shape}\")\n",
139 |     "# print(f\"predicted shape: {predicted.shape}\")"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 6,
145 |    "metadata": {
146 |     "ExecuteTime": {
147 |      "end_time": "2019-03-07T10:26:07.755153Z",
148 |      "start_time": "2019-03-07T10:26:07.697677Z"
149 |     }
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "timestep = 10"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 7,
159 |    "metadata": {
160 |     "ExecuteTime": {
161 |      "end_time": "2019-03-07T10:26:07.825818Z",
162 |      "start_time": "2019-03-07T10:26:07.756645Z"
163 |     }
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "data = X_train\n",
168 |     "targets = y_train\n",
169 |     "\n",
170 |     "train_gen = TimeseriesGenerator(data, targets,\n",
171 |     "                               length=timestep, sampling_rate=1,\n",
172 |     "                               batch_size=(X_train.shape[0] - timestep))"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 8,
178 |    "metadata": {
179 |     "ExecuteTime": {
180 |      "end_time": "2019-03-07T10:26:07.892629Z",
181 |      "start_time": "2019-03-07T10:26:07.831831Z"
182 |     }
183 |    },
184 |    "outputs": [],
185 |    "source": [
186 |     "data = X_test\n",
187 |     "targets = y_test\n",
188 |     "\n",
189 |     "test_gen = TimeseriesGenerator(data, targets,\n",
190 |     "                               length=timestep, sampling_rate=1,\n",
191 |     "                               batch_size=(X_test.shape[0] - timestep))"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 9,
197 |    "metadata": {
198 |     "ExecuteTime": {
199 |      "end_time": "2019-03-07T10:26:08.001932Z",
200 |      "start_time": "2019-03-07T10:26:07.897006Z"
201 |     }
202 |    },
203 |    "outputs": [],
204 |    "source": [
205 |     "X_train = train_gen[0][0]\n",
206 |     "y_train = train_gen[0][1]\n",
207 |     "X_test = test_gen[0][0]\n",
208 |     "y_test = test_gen[0][1]"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 10,
214 |    "metadata": {
215 |     "ExecuteTime": {
216 |      "end_time": "2019-03-07T10:26:08.029448Z",
217 |      "start_time": "2019-03-07T10:26:08.003458Z"
218 |     }
219 |    },
220 |    "outputs": [
221 |     {
222 |      "name": "stdout",
223 |      "output_type": "stream",
224 |      "text": [
225 |       "x train shape: (740, 10, 31)\n",
226 |       "y train shape: (740, 31)\n",
227 |       "x test shape: (480, 10, 31)\n",
228 |       "y test shape: (480, 31)\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "print(f\"x train shape: {X_train.shape}\")\n",
234 |     "print(f\"y train shape: {y_train.shape}\")\n",
235 |     "print(f\"x test shape: {X_test.shape}\")\n",
236 |     "print(f\"y test shape: {y_test.shape}\")"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 11,
242 |    "metadata": {
243 |     "ExecuteTime": {
244 |      "end_time": "2019-03-07T10:26:08.094488Z",
245 |      "start_time": "2019-03-07T10:26:08.033868Z"
246 |     }
247 |    },
248 |    "outputs": [],
249 |    "source": [
250 |     "X_train = X_train.transpose((0,2,1))\n",
251 |     "X_train = np.reshape(X_train, (X_train.shape[0] * X_train.shape[1], timestep))\n",
252 |     "y_train = np.reshape(y_train, (y_train.shape[0] * y_train.shape[1], 1))\n",
253 |     "\n",
254 |     "X_test = X_test.transpose((0,2,1))\n",
255 |     "X_test = np.reshape(X_test, (X_test.shape[0] * X_test.shape[1], timestep))\n",
256 |     "y_test = np.reshape(y_test, (y_test.shape[0] * y_test.shape[1], 1))"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": 12,
262 |    "metadata": {
263 |     "ExecuteTime": {
264 |      "end_time": "2019-03-07T10:26:08.175919Z",
265 |      "start_time": "2019-03-07T10:26:08.095912Z"
266 |     }
267 |    },
268 |    "outputs": [
269 |     {
270 |      "name": "stdout",
271 |      "output_type": "stream",
272 |      "text": [
273 |       "x train shape: (22940, 10)\n",
274 |       "y train shape: (22940, 1)\n",
275 |       "x test shape: (14880, 10)\n",
276 |       "y test shape: (14880, 1)\n"
277 |      ]
278 |     }
279 |    ],
280 |    "source": [
281 |     "print(f\"x train shape: {X_train.shape}\")\n",
282 |     "print(f\"y train shape: {y_train.shape}\")\n",
283 |     "print(f\"x test shape: {X_test.shape}\")\n",
284 |     "print(f\"y test shape: {y_test.shape}\")"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": 12,
290 |    "metadata": {
291 |     "ExecuteTime": {
292 |      "end_time": "2019-03-07T12:32:10.282439Z",
293 |      "start_time": "2019-03-07T12:32:06.645476Z"
294 |     }
295 |    },
296 |    "outputs": [],
297 |    "source": [
298 |     "from sklearn.ensemble import RandomForestClassifier\n",
299 |     "from sklearn.ensemble import RandomForestRegressor"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": 14,
305 |    "metadata": {
306 |     "ExecuteTime": {
307 |      "end_time": "2019-03-07T10:26:09.762268Z",
308 |      "start_time": "2019-03-07T10:26:08.516032Z"
309 |     }
310 |    },
311 |    "outputs": [
312 |     {
313 |      "name": "stderr",
314 |      "output_type": "stream",
315 |      "text": [
316 |       "/home/tqa/anaconda3/envs/projet_S5/lib/python3.6/site-packages/sklearn/ensemble/forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
317 |       "  \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n",
318 |       "/home/tqa/anaconda3/envs/projet_S5/lib/python3.6/site-packages/ipykernel_launcher.py:6: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
319 |       "  \n"
320 |      ]
321 |     },
322 |     {
323 |      "data": {
324 |       "text/plain": [
325 |        "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
326 |        "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
327 |        "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
328 |        "            min_samples_leaf=1, min_samples_split=2,\n",
329 |        "            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=2,\n",
330 |        "            oob_score=False, random_state=0, verbose=0, warm_start=False)"
331 |       ]
332 |      },
333 |      "execution_count": 14,
334 |      "metadata": {},
335 |      "output_type": "execute_result"
336 |     }
337 |    ],
338 |    "source": [
339 |     "# Create a random forest Classifier. By convention, clf means 'Classifier'\n",
340 |     "clf = RandomForestClassifier(n_jobs=2, random_state=0)\n",
341 |     "\n",
342 |     "# Train the Classifier to take the training features and learn how they relate\n",
343 |     "# to the training y (the species)\n",
344 |     "clf.fit(X_train, y_train)"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": 13,
350 |    "metadata": {
351 |     "ExecuteTime": {
352 |      "end_time": "2019-03-07T12:32:25.714297Z",
353 |      "start_time": "2019-03-07T12:32:12.218136Z"
354 |     }
355 |    },
356 |    "outputs": [
357 |     {
358 |      "data": {
359 |       "text/plain": [
360 |        "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
361 |        "           max_features='auto', max_leaf_nodes=None,\n",
362 |        "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
363 |        "           min_samples_leaf=1, min_samples_split=2,\n",
364 |        "           min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=None,\n",
365 |        "           oob_score=False, random_state=42, verbose=0, warm_start=False)"
366 |       ]
367 |      },
368 |      "execution_count": 13,
369 |      "metadata": {},
370 |      "output_type": "execute_result"
371 |     }
372 |    ],
373 |    "source": [
374 |     "rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)\n",
375 |     "\n",
376 |     "rf.fit(X_train, y_train)"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": 15,
382 |    "metadata": {
383 |     "ExecuteTime": {
384 |      "end_time": "2019-03-07T10:26:10.749119Z",
385 |      "start_time": "2019-03-07T10:26:09.764213Z"
386 |     }
387 |    },
388 |    "outputs": [],
389 |    "source": [
390 |     "from xgboost import XGBClassifier"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 16,
396 |    "metadata": {
397 |     "ExecuteTime": {
398 |      "end_time": "2019-03-07T10:26:12.714370Z",
399 |      "start_time": "2019-03-07T10:26:10.751271Z"
400 |     },
401 |     "scrolled": true
402 |    },
403 |    "outputs": [
404 |     {
405 |      "name": "stderr",
406 |      "output_type": "stream",
407 |      "text": [
408 |       "/home/tqa/anaconda3/envs/projet_S5/lib/python3.6/site-packages/sklearn/preprocessing/label.py:219: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
409 |       "  y = column_or_1d(y, warn=True)\n",
410 |       "/home/tqa/anaconda3/envs/projet_S5/lib/python3.6/site-packages/sklearn/preprocessing/label.py:252: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
411 |       "  y = column_or_1d(y, warn=True)\n"
412 |      ]
413 |     },
414 |     {
415 |      "data": {
416 |       "text/plain": [
417 |        "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
418 |        "       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,\n",
419 |        "       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,\n",
420 |        "       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,\n",
421 |        "       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
422 |        "       silent=True, subsample=1)"
423 |       ]
424 |      },
425 |      "execution_count": 16,
426 |      "metadata": {},
427 |      "output_type": "execute_result"
428 |     }
429 |    ],
430 |    "source": [
431 |     "# fit model no training data\n",
432 |     "model = XGBClassifier()\n",
433 |     "model.fit(X_train, y_train)"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "code",
438 |    "execution_count": 17,
439 |    "metadata": {
440 |     "ExecuteTime": {
441 |      "end_time": "2019-03-07T10:26:12.821883Z",
442 |      "start_time": "2019-03-07T10:26:12.715817Z"
443 |     }
444 |    },
445 |    "outputs": [],
446 |    "source": [
447 |     "predict = clf.predict(X_test)"
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "code",
452 |    "execution_count": 18,
453 |    "metadata": {
454 |     "ExecuteTime": {
455 |      "end_time": "2019-03-07T10:26:12.894092Z",
456 |      "start_time": "2019-03-07T10:26:12.834796Z"
457 |     }
458 |    },
459 |    "outputs": [],
460 |    "source": [
461 |     "predict = model.predict(X_test)"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "code",
466 |    "execution_count": 15,
467 |    "metadata": {
468 |     "ExecuteTime": {
469 |      "end_time": "2019-03-07T12:35:55.590386Z",
470 |      "start_time": "2019-03-07T12:35:55.456304Z"
471 |     }
472 |    },
473 |    "outputs": [],
474 |    "source": [
475 |     "predict = rf.predict(X_test)"
476 |    ]
477 |   },
478 |   {
479 |    "cell_type": "code",
480 |    "execution_count": 19,
481 |    "metadata": {
482 |     "ExecuteTime": {
483 |      "end_time": "2019-03-07T12:38:03.267099Z",
484 |      "start_time": "2019-03-07T12:38:03.258752Z"
485 |     }
486 |    },
487 |    "outputs": [],
488 |    "source": [
489 |     "b = predict.tolist()"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": 20,
495 |    "metadata": {
496 |     "ExecuteTime": {
497 |      "end_time": "2019-03-07T10:26:13.150956Z",
498 |      "start_time": "2019-03-07T10:26:12.914204Z"
499 |     }
500 |    },
501 |    "outputs": [],
502 |    "source": [
503 |     "a = y_test.flatten().tolist()"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "code",
508 |    "execution_count": 21,
509 |    "metadata": {
510 |     "ExecuteTime": {
511 |      "end_time": "2019-03-07T10:26:13.808747Z",
512 |      "start_time": "2019-03-07T10:26:13.158687Z"
513 |     }
514 |    },
515 |    "outputs": [
516 |     {
517 |      "data": {
518 |       "text/plain": [
519 |        "0.5420698924731183"
520 |       ]
521 |      },
522 |      "execution_count": 21,
523 |      "metadata": {},
524 |      "output_type": "execute_result"
525 |     }
526 |    ],
527 |    "source": [
528 |     "sum(i == j for i,j in zip(a,b))/len(a)"
529 |    ]
530 |   },
531 |   {
532 |    "cell_type": "code",
533 |    "execution_count": 21,
534 |    "metadata": {
535 |     "ExecuteTime": {
536 |      "end_time": "2019-03-07T12:38:19.654303Z",
537 |      "start_time": "2019-03-07T12:38:19.650975Z"
538 |     }
539 |    },
540 |    "outputs": [],
541 |    "source": [
542 |     "c = np.array(b)"
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "code",
547 |    "execution_count": 23,
548 |    "metadata": {
549 |     "ExecuteTime": {
550 |      "end_time": "2019-03-07T12:38:24.506514Z",
551 |      "start_time": "2019-03-07T12:38:24.486118Z"
552 |     }
553 |    },
554 |    "outputs": [
555 |     {
556 |      "ename": "ValueError",
557 |      "evalue": "cannot reshape array of size 15190 into shape (15,31)",
558 |      "output_type": "error",
559 |      "traceback": [
560 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
561 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
562 |       "\u001b[0;32m<ipython-input-23-351865053696>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m//\u001b[0m\u001b[0;36m31\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m31\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
563 |       "\u001b[0;31mValueError\u001b[0m: cannot reshape array of size 15190 into shape (15,31)"
564 |      ]
565 |     }
566 |    ],
567 |    "source": [
568 |     "d = c.reshape(c.shape[0]//31, 31)"
569 |    ]
570 |   },
571 |   {
572 |    "cell_type": "code",
573 |    "execution_count": 25,
574 |    "metadata": {
575 |     "ExecuteTime": {
576 |      "end_time": "2019-03-07T10:26:16.351638Z",
577 |      "start_time": "2019-03-07T10:26:15.767304Z"
578 |     }
579 |    },
580 |    "outputs": [],
581 |    "source": [
582 |     "e = y_test.reshape(y_test.shape[0]//31, 31)"
583 |    ]
584 |   },
585 |   {
586 |    "cell_type": "code",
587 |    "execution_count": 29,
588 |    "metadata": {
589 |     "ExecuteTime": {
590 |      "end_time": "2019-03-07T10:26:17.631733Z",
591 |      "start_time": "2019-03-07T10:26:17.554547Z"
592 |     }
593 |    },
594 |    "outputs": [
595 |     {
596 |      "data": {
597 |       "text/plain": [
598 |        "0.5420698924731183"
599 |       ]
600 |      },
601 |      "execution_count": 29,
602 |      "metadata": {},
603 |      "output_type": "execute_result"
604 |     }
605 |    ],
606 |    "source": [
607 |     "sum(sum(d == e))/d.size"
608 |    ]
609 |   }
610 |  ],
611 |  "metadata": {
612 |   "kernelspec": {
613 |    "display_name": "projet_S5",
614 |    "language": "python",
615 |    "name": "projet_s5"
616 |   },
617 |   "language_info": {
618 |    "codemirror_mode": {
619 |     "name": "ipython",
620 |     "version": 3
621 |    },
622 |    "file_extension": ".py",
623 |    "mimetype": "text/x-python",
624 |    "name": "python",
625 |    "nbconvert_exporter": "python",
626 |    "pygments_lexer": "ipython3",
627 |    "version": "3.6.7"
628 |   },
629 |   "toc": {
630 |    "base_numbering": 1,
631 |    "nav_menu": {},
632 |    "number_sections": true,
633 |    "sideBar": true,
634 |    "skip_h1_title": false,
635 |    "title_cell": "Table of Contents",
636 |    "title_sidebar": "Contents",
637 |    "toc_cell": false,
638 |    "toc_position": {},
639 |    "toc_section_display": true,
640 |    "toc_window_display": false
641 |   }
642 |  },
643 |  "nbformat": 4,
644 |  "nbformat_minor": 2
645 | }
646 | 


--------------------------------------------------------------------------------
/notebook/[Official] Long Short Term Memory.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-03-10T11:29:30.263351Z",
  9 |      "start_time": "2019-03-10T11:29:29.331896Z"
 10 |     },
 11 |     "scrolled": true
 12 |    },
 13 |    "outputs": [],
 14 |    "source": [
 15 |     "# List all device\n",
 16 |     "from tensorflow.python.client import device_lib\n",
 17 |     "# print(device_lib.list_local_devices())"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {
 24 |     "ExecuteTime": {
 25 |      "end_time": "2019-03-10T11:29:30.600350Z",
 26 |      "start_time": "2019-03-10T11:29:30.265493Z"
 27 |     },
 28 |     "scrolled": true
 29 |    },
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stderr",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "Using TensorFlow backend.\n"
 36 |      ]
 37 |     },
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "['/job:localhost/replica:0/task:0/device:GPU:0']"
 42 |       ]
 43 |      },
 44 |      "execution_count": 2,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "# Check available GPU\n",
 51 |     "from keras import backend as K\n",
 52 |     "K.tensorflow_backend._get_available_gpus()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2019-03-10T11:29:30.625708Z",
 61 |      "start_time": "2019-03-10T11:29:30.601959Z"
 62 |     }
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "import os\n",
 67 |     "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n",
 68 |     "# The GPU id to use, usually either \"0\" or \"1\";\n",
 69 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\";  "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {
 76 |     "ExecuteTime": {
 77 |      "end_time": "2019-03-10T11:29:31.251738Z",
 78 |      "start_time": "2019-03-10T11:29:30.628052Z"
 79 |     }
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# Importing the libraries\n",
 84 |     "import numpy as np\n",
 85 |     "import pandas as pd\n",
 86 |     "from keras.models import Sequential\n",
 87 |     "from keras.layers import Dense, LSTM, Dropout, Reshape, Lambda, GRU, BatchNormalization, Bidirectional\n",
 88 |     "from keras.preprocessing.sequence import TimeseriesGenerator\n",
 89 |     "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
 90 |     "from keras.activations import softmax\n",
 91 |     "from keras.optimizers import SGD, RMSprop\n",
 92 |     "import math\n",
 93 |     "import pickle\n",
 94 |     "import matplotlib.pyplot as plt\n",
 95 |     "from keras.utils import to_categorical\n",
 96 |     "from sklearn.preprocessing import StandardScaler"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {
103 |     "ExecuteTime": {
104 |      "end_time": "2019-03-10T11:29:31.271692Z",
105 |      "start_time": "2019-03-10T11:29:31.253924Z"
106 |     }
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "index = \"dowjones\"\n",
111 |     "index = \"frankfurt\"\n",
112 |     "with open(f\"../data/{index}_calculated/periods750_250_240.txt\", \"rb\") as fp:   # Unpickling\n",
113 |     "    dataset = pickle.load(fp)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {
120 |     "ExecuteTime": {
121 |      "end_time": "2019-03-10T11:29:31.326427Z",
122 |      "start_time": "2019-03-10T11:29:31.273077Z"
123 |     }
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "def normalize_data(df):\n",
128 |     "    \"\"\"normalize a dataframe.\"\"\"\n",
129 |     "    mean = df.mean(axis=1)\n",
130 |     "    std = df.std(axis=1)\n",
131 |     "    df = df.sub(mean, axis=0)\n",
132 |     "    df = df.div(std, axis=0)\n",
133 |     "    df = df.values\n",
134 |     "    return df\n",
135 |     "def get_one_hot(targets, nb_classes):\n",
136 |     "    res = np.eye(nb_classes)[np.array(targets).reshape(-1)]\n",
137 |     "    return res.reshape(list(targets.shape)+[nb_classes])"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 7,
143 |    "metadata": {
144 |     "ExecuteTime": {
145 |      "end_time": "2019-03-10T11:29:31.390751Z",
146 |      "start_time": "2019-03-10T11:29:31.332033Z"
147 |     }
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "i = 7\n",
152 |     "timestep = 240"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 8,
158 |    "metadata": {
159 |     "ExecuteTime": {
160 |      "end_time": "2019-03-10T11:29:31.491879Z",
161 |      "start_time": "2019-03-10T11:29:31.392404Z"
162 |     }
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "# x_train = dataset[0][i][0]['AMZN'].values * 1000\n",
167 |     "# y_train = dataset[0][i][1]['AMZN'].values * 1.0\n",
168 |     "# x_test = dataset[1][i][0]['AMZN'].values * 1000\n",
169 |     "# y_test = dataset[1][i][1]['AMZN'].values * 1.0\n",
170 |     "\n",
171 |     "# x_train = dataset[0][i][0].values\n",
172 |     "# x_train = (x_train - x_train.mean())/x_train.std()\n",
173 |     "# y_train = dataset[0][i][1].values * 1.0\n",
174 |     "# x_test = dataset[1][i][0].values\n",
175 |     "# x_test = (x_test - x_test.mean())/x_test.std()\n",
176 |     "# y_test = dataset[1][i][1].values * 1.0\n",
177 |     "\n",
178 |     "# x_train = dataset[0][i][0].values * 1000\n",
179 |     "# x_test = dataset[1][i][0].values * 1000\n",
180 |     "\n",
181 |     "x_train = dataset[0][i][0].values\n",
182 |     "x_test = dataset[1][i][0].values\n",
183 |     "\n",
184 |     "scaler = StandardScaler().fit(x_train)\n",
185 |     "\n",
186 |     "x_train = scaler.transform(x_train)\n",
187 |     "x_test = scaler.transform(x_test)\n",
188 |     "\n",
189 |     "# x_train = normalize_data(dataset[0][i][0])\n",
190 |     "# x_test = normalize_data(dataset[1][i][0])\n",
191 |     "\n",
192 |     "# y_train = get_one_hot(dataset[0][i][1].values, 2) * 1.0\n",
193 |     "# y_test = get_one_hot(dataset[1][i][1].values, 2) * 1.0\n",
194 |     "y_train = to_categorical(dataset[0][i][1].values, 2)\n",
195 |     "y_test = to_categorical(dataset[1][i][1].values, 2)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 9,
201 |    "metadata": {
202 |     "ExecuteTime": {
203 |      "end_time": "2019-03-10T11:29:31.548816Z",
204 |      "start_time": "2019-03-10T11:29:31.494231Z"
205 |     }
206 |    },
207 |    "outputs": [
208 |     {
209 |      "name": "stdout",
210 |      "output_type": "stream",
211 |      "text": [
212 |       "x train shape: (750, 62)\n",
213 |       "y train shape: (750, 62, 2)\n",
214 |       "x test shape: (490, 62)\n",
215 |       "y test shape: (490, 62, 2)\n"
216 |      ]
217 |     }
218 |    ],
219 |    "source": [
220 |     "print(f\"x train shape: {x_train.shape}\")\n",
221 |     "print(f\"y train shape: {y_train.shape}\")\n",
222 |     "print(f\"x test shape: {x_test.shape}\")\n",
223 |     "print(f\"y test shape: {y_test.shape}\")"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 10,
229 |    "metadata": {
230 |     "ExecuteTime": {
231 |      "end_time": "2019-03-10T11:29:31.710273Z",
232 |      "start_time": "2019-03-10T11:29:31.553791Z"
233 |     }
234 |    },
235 |    "outputs": [
236 |     {
237 |      "name": "stdout",
238 |      "output_type": "stream",
239 |      "text": [
240 |       "x shape: (31620, 240)\n",
241 |       "y shape: (31620, 2)\n"
242 |      ]
243 |     }
244 |    ],
245 |    "source": [
246 |     "# The second range will be looped first\n",
247 |     "# x_series = [x_train[i:i+240] for i in range(750 - 240)]\n",
248 |     "# y_series = [y_train[i+240] for i in range(750 - 240)]\n",
249 |     "x_series = [x_train[i:i+timestep, j] for i in range(x_train.shape[0] - timestep) for j in range(x_train.shape[1])]\n",
250 |     "y_series = [y_train[i+timestep, j] for i in range(y_train.shape[0] - timestep) for j in range(y_train.shape[1])]\n",
251 |     "x = np.array(x_series)\n",
252 |     "y = np.array(y_series)\n",
253 |     "print(f\"x shape: {x.shape}\")\n",
254 |     "print(f\"y shape: {y.shape}\")"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 11,
260 |    "metadata": {
261 |     "ExecuteTime": {
262 |      "end_time": "2019-03-10T11:29:31.714291Z",
263 |      "start_time": "2019-03-10T11:29:31.711668Z"
264 |     }
265 |    },
266 |    "outputs": [
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "x shape: (31620, 240, 1)\n"
272 |      ]
273 |     }
274 |    ],
275 |    "source": [
276 |     "x = np.reshape(x, (x.shape[0], x.shape[1], 1))\n",
277 |     "print(f\"x shape: {x.shape}\")\n"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 12,
283 |    "metadata": {
284 |     "ExecuteTime": {
285 |      "end_time": "2019-03-10T11:29:32.910595Z",
286 |      "start_time": "2019-03-10T11:29:31.715759Z"
287 |     }
288 |    },
289 |    "outputs": [
290 |     {
291 |      "name": "stdout",
292 |      "output_type": "stream",
293 |      "text": [
294 |       "_________________________________________________________________\n",
295 |       "Layer (type)                 Output Shape              Param #   \n",
296 |       "=================================================================\n",
297 |       "lstm_1 (LSTM)                (None, 240, 25)           2700      \n",
298 |       "_________________________________________________________________\n",
299 |       "lstm_2 (LSTM)                (None, 240, 100)          50400     \n",
300 |       "_________________________________________________________________\n",
301 |       "lstm_3 (LSTM)                (None, 240, 100)          80400     \n",
302 |       "_________________________________________________________________\n",
303 |       "lstm_4 (LSTM)                (None, 100)               80400     \n",
304 |       "_________________________________________________________________\n",
305 |       "dense_1 (Dense)              (None, 2)                 202       \n",
306 |       "=================================================================\n",
307 |       "Total params: 214,102\n",
308 |       "Trainable params: 214,102\n",
309 |       "Non-trainable params: 0\n",
310 |       "_________________________________________________________________\n"
311 |      ]
312 |     }
313 |    ],
314 |    "source": [
315 |     "dropout_rate = 0.1\n",
316 |     "# expected input data shape: (batch_size, timesteps, data_dim)\n",
317 |     "regressor = Sequential()\n",
318 |     "\n",
319 |     "# regressor.add(Bidirectional(LSTM(units=25, input_shape=(timestep, 1), dropout=dropout_rate)))\n",
320 |     "regressor.add(LSTM(units=25, input_shape=(timestep, 1), return_sequences = True,dropout=dropout_rate))\n",
321 |     "regressor.add(LSTM(units=100, return_sequences = True,dropout=dropout_rate))\n",
322 |     "regressor.add(LSTM(units=100, return_sequences = True,dropout=dropout_rate))\n",
323 |     "regressor.add(LSTM(units=100, input_shape=(timestep, 1), dropout=dropout_rate))\n",
324 |     "# regressor.add(LSTM(units=25, batch_input_shape=(527, timestep, 1), dropout=dropout_rate, stateful=False))\n",
325 |     "# regressor.add(LSTM(units=25, batch_input_shape=(527, timestep, 1), dropout=dropout_rate))\n",
326 |     "# regressor.add(LSTM(units=25, return_sequences = True,dropout=dropout_rate, stateful=False))\n",
327 |     "# regressor.add(LSTM(units=25, return_sequences = True,dropout=dropout_rate, stateful=False))\n",
328 |     "# regressor.add(LSTM(units=25, dropout=dropout_rate, stateful=False))\n",
329 |     "# regressor.add(LSTM(units=25, input_shape=(timestep, 1), dropout=dropout_rate))\n",
330 |     "# regressor.add(GRU(units=25, input_shape=(timestep, 1), dropout=dropout_rate))\n",
331 |     "# regressor.add(Dense(100, input_shape=(timestep, ), activation='relu'))\n",
332 |     "# regressor.add(Dense(100, activation='relu'))\n",
333 |     "regressor.add(Dense(2, activation='softmax'))\n",
334 |     "regressor.compile(loss='binary_crossentropy',\n",
335 |     "                  optimizer='rmsprop',\n",
336 |     "                  metrics=['accuracy'])\n",
337 |     "regressor.summary()"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 13,
343 |    "metadata": {
344 |     "ExecuteTime": {
345 |      "end_time": "2019-03-10T11:36:27.480440Z",
346 |      "start_time": "2019-03-10T11:29:32.912147Z"
347 |     },
348 |     "scrolled": true
349 |    },
350 |    "outputs": [
351 |     {
352 |      "name": "stdout",
353 |      "output_type": "stream",
354 |      "text": [
355 |       "Train on 25296 samples, validate on 6324 samples\n",
356 |       "Epoch 1/1000\n",
357 |       "25296/25296 [==============================] - 52s 2ms/step - loss: 0.6930 - acc: 0.5104 - val_loss: 0.6935 - val_acc: 0.5016\n",
358 |       "Epoch 2/1000\n",
359 |       "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5145 - val_loss: 0.6933 - val_acc: 0.5016\n",
360 |       "Epoch 3/1000\n",
361 |       "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6929 - acc: 0.5144 - val_loss: 0.6934 - val_acc: 0.5016\n",
362 |       "Epoch 4/1000\n",
363 |       "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6927 - acc: 0.5142 - val_loss: 0.6932 - val_acc: 0.5022\n",
364 |       "Epoch 5/1000\n",
365 |       "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5147 - val_loss: 0.6932 - val_acc: 0.5019\n",
366 |       "Epoch 6/1000\n",
367 |       "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5146 - val_loss: 0.6937 - val_acc: 0.5016\n",
368 |       "Epoch 7/1000\n",
369 |       "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5140 - val_loss: 0.6936 - val_acc: 0.5016\n",
370 |       "Epoch 8/1000\n",
371 |       "25296/25296 [==============================] - 51s 2ms/step - loss: 0.6928 - acc: 0.5136 - val_loss: 0.6932 - val_acc: 0.5016\n",
372 |       "Epoch 9/1000\n",
373 |       " 1581/25296 [>.............................] - ETA: 47s - loss: 0.6928 - acc: 0.5155"
374 |      ]
375 |     },
376 |     {
377 |      "ename": "KeyboardInterrupt",
378 |      "evalue": "",
379 |      "output_type": "error",
380 |      "traceback": [
381 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
382 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
383 |       "\u001b[0;32m<ipython-input-13-5ce52589fe96>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;31m#              ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m result = regressor.fit(x, y, epochs=1000,batch_size=527, validation_split=0.2, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=200),\n\u001b[0;32m----> 4\u001b[0;31m              ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0;31m# regressor.fit(x, y, epochs=1000,batch_size=500, validation_split=0.2, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=20),\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;31m#              ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
384 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[1;32m   1037\u001b[0m                                         \u001b[0minitial_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minitial_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1038\u001b[0m                                         \u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1039\u001b[0;31m                                         validation_steps=validation_steps)\n\u001b[0m\u001b[1;32m   1040\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1041\u001b[0m     def evaluate(self, x=None, y=None,\n",
385 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/engine/training_arrays.py\u001b[0m in \u001b[0;36mfit_loop\u001b[0;34m(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)\u001b[0m\n\u001b[1;32m    197\u001b[0m                     \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 199\u001b[0;31m                 \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    200\u001b[0m                 \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    201\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout_labels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mouts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
386 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   2713\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_legacy_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2714\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2715\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2716\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2717\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mpy_any\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mis_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
387 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   2673\u001b[0m             \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_metadata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2674\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2675\u001b[0;31m             \u001b[0mfetched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_callable_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0marray_vals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2676\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mfetched\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
388 |       "\u001b[0;32m~/anaconda3/envs/projet_S5/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1437\u001b[0m           ret = tf_session.TF_SessionRunCallable(\n\u001b[1;32m   1438\u001b[0m               \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_handle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1439\u001b[0;31m               run_metadata_ptr)\n\u001b[0m\u001b[1;32m   1440\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1441\u001b[0m           \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
389 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
390 |      ]
391 |     }
392 |    ],
393 |    "source": [
394 |     "# result = regressor.fit(x, y, epochs=1000,batch_size=527, validation_split=0.2, shuffle=False, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=200),\n",
395 |     "#              ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\n",
396 |     "result = regressor.fit(x, y, epochs=1000,batch_size=527, validation_split=0.2, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=200),\n",
397 |     "             ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\n",
398 |     "# regressor.fit(x, y, epochs=1000,batch_size=500, validation_split=0.2, callbacks = [EarlyStopping(monitor='val_loss', mode='min', patience=20),\n",
399 |     "#              ModelCheckpoint(filepath='../model/LSTM/best_model.h5', monitor='val_acc', save_best_only=True)])\n"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": null,
405 |    "metadata": {
406 |     "ExecuteTime": {
407 |      "end_time": "2019-03-10T11:36:27.483294Z",
408 |      "start_time": "2019-03-10T11:29:29.366Z"
409 |     }
410 |    },
411 |    "outputs": [],
412 |    "source": [
413 |     "plt.plot(result.history[\"val_acc\"])\n",
414 |     "plt.plot(result.history[\"acc\"])"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": null,
420 |    "metadata": {
421 |     "ExecuteTime": {
422 |      "end_time": "2019-03-10T11:36:27.484346Z",
423 |      "start_time": "2019-03-10T11:29:29.368Z"
424 |     }
425 |    },
426 |    "outputs": [],
427 |    "source": [
428 |     "plt.plot(result.history[\"val_loss\"])\n",
429 |     "plt.plot(result.history[\"loss\"])"
430 |    ]
431 |   }
432 |  ],
433 |  "metadata": {
434 |   "kernelspec": {
435 |    "display_name": "projet_S5",
436 |    "language": "python",
437 |    "name": "projet_s5"
438 |   },
439 |   "language_info": {
440 |    "codemirror_mode": {
441 |     "name": "ipython",
442 |     "version": 3
443 |    },
444 |    "file_extension": ".py",
445 |    "mimetype": "text/x-python",
446 |    "name": "python",
447 |    "nbconvert_exporter": "python",
448 |    "pygments_lexer": "ipython3",
449 |    "version": "3.6.8"
450 |   },
451 |   "toc": {
452 |    "base_numbering": 1,
453 |    "nav_menu": {},
454 |    "number_sections": true,
455 |    "sideBar": true,
456 |    "skip_h1_title": false,
457 |    "title_cell": "Table of Contents",
458 |    "title_sidebar": "Contents",
459 |    "toc_cell": false,
460 |    "toc_position": {},
461 |    "toc_section_display": true,
462 |    "toc_window_display": false
463 |   }
464 |  },
465 |  "nbformat": 4,
466 |  "nbformat_minor": 2
467 | }
468 | 


--------------------------------------------------------------------------------
/notebook/[Official] [Trading Algorithm - Baseline 1] Equally weighted portfolio.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2019-03-07T10:37:49.257364Z",
  9 |      "start_time": "2019-03-07T10:37:49.057116Z"
 10 |     }
 11 |    },
 12 |    "outputs": [],
 13 |    "source": [
 14 |     "import numpy as np\n",
 15 |     "import pandas as pd\n"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {
 22 |     "ExecuteTime": {
 23 |      "end_time": "2019-03-07T10:37:49.847010Z",
 24 |      "start_time": "2019-03-07T10:37:49.259067Z"
 25 |     }
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th>Name</th>\n",
 49 |        "      <th>AABA</th>\n",
 50 |        "      <th>AAPL</th>\n",
 51 |        "      <th>AMZN</th>\n",
 52 |        "      <th>AXP</th>\n",
 53 |        "      <th>BA</th>\n",
 54 |        "      <th>CAT</th>\n",
 55 |        "      <th>CSCO</th>\n",
 56 |        "      <th>CVX</th>\n",
 57 |        "      <th>DIS</th>\n",
 58 |        "      <th>GE</th>\n",
 59 |        "      <th>...</th>\n",
 60 |        "      <th>MSFT</th>\n",
 61 |        "      <th>NKE</th>\n",
 62 |        "      <th>PFE</th>\n",
 63 |        "      <th>PG</th>\n",
 64 |        "      <th>TRV</th>\n",
 65 |        "      <th>UNH</th>\n",
 66 |        "      <th>UTX</th>\n",
 67 |        "      <th>VZ</th>\n",
 68 |        "      <th>WMT</th>\n",
 69 |        "      <th>XOM</th>\n",
 70 |        "    </tr>\n",
 71 |        "    <tr>\n",
 72 |        "      <th>Date</th>\n",
 73 |        "      <th></th>\n",
 74 |        "      <th></th>\n",
 75 |        "      <th></th>\n",
 76 |        "      <th></th>\n",
 77 |        "      <th></th>\n",
 78 |        "      <th></th>\n",
 79 |        "      <th></th>\n",
 80 |        "      <th></th>\n",
 81 |        "      <th></th>\n",
 82 |        "      <th></th>\n",
 83 |        "      <th></th>\n",
 84 |        "      <th></th>\n",
 85 |        "      <th></th>\n",
 86 |        "      <th></th>\n",
 87 |        "      <th></th>\n",
 88 |        "      <th></th>\n",
 89 |        "      <th></th>\n",
 90 |        "      <th></th>\n",
 91 |        "      <th></th>\n",
 92 |        "      <th></th>\n",
 93 |        "      <th></th>\n",
 94 |        "    </tr>\n",
 95 |        "  </thead>\n",
 96 |        "  <tbody>\n",
 97 |        "    <tr>\n",
 98 |        "      <th>2006-01-03</th>\n",
 99 |        "      <td>40.91</td>\n",
100 |        "      <td>10.68</td>\n",
101 |        "      <td>47.58</td>\n",
102 |        "      <td>52.58</td>\n",
103 |        "      <td>70.44</td>\n",
104 |        "      <td>57.80</td>\n",
105 |        "      <td>17.45</td>\n",
106 |        "      <td>59.08</td>\n",
107 |        "      <td>24.40</td>\n",
108 |        "      <td>35.37</td>\n",
109 |        "      <td>...</td>\n",
110 |        "      <td>26.84</td>\n",
111 |        "      <td>10.74</td>\n",
112 |        "      <td>23.78</td>\n",
113 |        "      <td>58.78</td>\n",
114 |        "      <td>45.99</td>\n",
115 |        "      <td>61.73</td>\n",
116 |        "      <td>56.53</td>\n",
117 |        "      <td>30.38</td>\n",
118 |        "      <td>46.23</td>\n",
119 |        "      <td>58.47</td>\n",
120 |        "    </tr>\n",
121 |        "    <tr>\n",
122 |        "      <th>2006-01-04</th>\n",
123 |        "      <td>40.97</td>\n",
124 |        "      <td>10.71</td>\n",
125 |        "      <td>47.25</td>\n",
126 |        "      <td>51.95</td>\n",
127 |        "      <td>71.17</td>\n",
128 |        "      <td>59.27</td>\n",
129 |        "      <td>17.85</td>\n",
130 |        "      <td>58.91</td>\n",
131 |        "      <td>23.99</td>\n",
132 |        "      <td>35.32</td>\n",
133 |        "      <td>...</td>\n",
134 |        "      <td>26.97</td>\n",
135 |        "      <td>10.69</td>\n",
136 |        "      <td>24.55</td>\n",
137 |        "      <td>58.89</td>\n",
138 |        "      <td>46.50</td>\n",
139 |        "      <td>61.88</td>\n",
140 |        "      <td>56.19</td>\n",
141 |        "      <td>31.27</td>\n",
142 |        "      <td>46.32</td>\n",
143 |        "      <td>58.57</td>\n",
144 |        "    </tr>\n",
145 |        "    <tr>\n",
146 |        "      <th>2006-01-05</th>\n",
147 |        "      <td>41.53</td>\n",
148 |        "      <td>10.63</td>\n",
149 |        "      <td>47.65</td>\n",
150 |        "      <td>52.50</td>\n",
151 |        "      <td>70.33</td>\n",
152 |        "      <td>59.27</td>\n",
153 |        "      <td>18.35</td>\n",
154 |        "      <td>58.19</td>\n",
155 |        "      <td>24.41</td>\n",
156 |        "      <td>35.23</td>\n",
157 |        "      <td>...</td>\n",
158 |        "      <td>26.99</td>\n",
159 |        "      <td>10.76</td>\n",
160 |        "      <td>24.58</td>\n",
161 |        "      <td>58.70</td>\n",
162 |        "      <td>46.95</td>\n",
163 |        "      <td>61.69</td>\n",
164 |        "      <td>55.98</td>\n",
165 |        "      <td>31.63</td>\n",
166 |        "      <td>45.69</td>\n",
167 |        "      <td>58.28</td>\n",
168 |        "    </tr>\n",
169 |        "    <tr>\n",
170 |        "      <th>2006-01-06</th>\n",
171 |        "      <td>43.21</td>\n",
172 |        "      <td>10.90</td>\n",
173 |        "      <td>47.87</td>\n",
174 |        "      <td>52.68</td>\n",
175 |        "      <td>69.35</td>\n",
176 |        "      <td>60.45</td>\n",
177 |        "      <td>18.77</td>\n",
178 |        "      <td>59.25</td>\n",
179 |        "      <td>24.74</td>\n",
180 |        "      <td>35.47</td>\n",
181 |        "      <td>...</td>\n",
182 |        "      <td>26.91</td>\n",
183 |        "      <td>10.72</td>\n",
184 |        "      <td>24.85</td>\n",
185 |        "      <td>58.64</td>\n",
186 |        "      <td>47.21</td>\n",
187 |        "      <td>62.90</td>\n",
188 |        "      <td>56.16</td>\n",
189 |        "      <td>31.35</td>\n",
190 |        "      <td>45.88</td>\n",
191 |        "      <td>59.43</td>\n",
192 |        "    </tr>\n",
193 |        "    <tr>\n",
194 |        "      <th>2006-01-09</th>\n",
195 |        "      <td>43.42</td>\n",
196 |        "      <td>10.86</td>\n",
197 |        "      <td>47.08</td>\n",
198 |        "      <td>53.99</td>\n",
199 |        "      <td>68.77</td>\n",
200 |        "      <td>61.55</td>\n",
201 |        "      <td>19.06</td>\n",
202 |        "      <td>58.95</td>\n",
203 |        "      <td>25.00</td>\n",
204 |        "      <td>35.38</td>\n",
205 |        "      <td>...</td>\n",
206 |        "      <td>26.86</td>\n",
207 |        "      <td>10.88</td>\n",
208 |        "      <td>24.85</td>\n",
209 |        "      <td>59.08</td>\n",
210 |        "      <td>47.23</td>\n",
211 |        "      <td>61.40</td>\n",
212 |        "      <td>56.80</td>\n",
213 |        "      <td>31.48</td>\n",
214 |        "      <td>45.71</td>\n",
215 |        "      <td>59.40</td>\n",
216 |        "    </tr>\n",
217 |        "  </tbody>\n",
218 |        "</table>\n",
219 |        "<p>5 rows × 31 columns</p>\n",
220 |        "</div>"
221 |       ],
222 |       "text/plain": [
223 |        "Name         AABA   AAPL   AMZN    AXP     BA    CAT   CSCO    CVX    DIS  \\\n",
224 |        "Date                                                                        \n",
225 |        "2006-01-03  40.91  10.68  47.58  52.58  70.44  57.80  17.45  59.08  24.40   \n",
226 |        "2006-01-04  40.97  10.71  47.25  51.95  71.17  59.27  17.85  58.91  23.99   \n",
227 |        "2006-01-05  41.53  10.63  47.65  52.50  70.33  59.27  18.35  58.19  24.41   \n",
228 |        "2006-01-06  43.21  10.90  47.87  52.68  69.35  60.45  18.77  59.25  24.74   \n",
229 |        "2006-01-09  43.42  10.86  47.08  53.99  68.77  61.55  19.06  58.95  25.00   \n",
230 |        "\n",
231 |        "Name           GE  ...     MSFT    NKE    PFE     PG    TRV    UNH    UTX  \\\n",
232 |        "Date               ...                                                      \n",
233 |        "2006-01-03  35.37  ...    26.84  10.74  23.78  58.78  45.99  61.73  56.53   \n",
234 |        "2006-01-04  35.32  ...    26.97  10.69  24.55  58.89  46.50  61.88  56.19   \n",
235 |        "2006-01-05  35.23  ...    26.99  10.76  24.58  58.70  46.95  61.69  55.98   \n",
236 |        "2006-01-06  35.47  ...    26.91  10.72  24.85  58.64  47.21  62.90  56.16   \n",
237 |        "2006-01-09  35.38  ...    26.86  10.88  24.85  59.08  47.23  61.40  56.80   \n",
238 |        "\n",
239 |        "Name           VZ    WMT    XOM  \n",
240 |        "Date                             \n",
241 |        "2006-01-03  30.38  46.23  58.47  \n",
242 |        "2006-01-04  31.27  46.32  58.57  \n",
243 |        "2006-01-05  31.63  45.69  58.28  \n",
244 |        "2006-01-06  31.35  45.88  59.43  \n",
245 |        "2006-01-09  31.48  45.71  59.40  \n",
246 |        "\n",
247 |        "[5 rows x 31 columns]"
248 |       ]
249 |      },
250 |      "execution_count": 2,
251 |      "metadata": {},
252 |      "output_type": "execute_result"
253 |     }
254 |    ],
255 |    "source": [
256 |     "stocks = pd.read_csv(\"../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv\", index_col='Date',parse_dates=['Date'])\n",
257 |     "stocks = stocks[[\"Close\", \"Name\"]]\n",
258 |     "stocks = stocks.pivot_table(values='Close', index=stocks.index, columns='Name', aggfunc='first')\n",
259 |     "stocks.head()"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 3,
265 |    "metadata": {
266 |     "ExecuteTime": {
267 |      "end_time": "2019-03-07T10:37:49.909612Z",
268 |      "start_time": "2019-03-07T10:37:49.848989Z"
269 |     },
270 |     "scrolled": true
271 |    },
272 |    "outputs": [
273 |     {
274 |      "data": {
275 |       "text/html": [
276 |        "<div>\n",
277 |        "<style scoped>\n",
278 |        "    .dataframe tbody tr th:only-of-type {\n",
279 |        "        vertical-align: middle;\n",
280 |        "    }\n",
281 |        "\n",
282 |        "    .dataframe tbody tr th {\n",
283 |        "        vertical-align: top;\n",
284 |        "    }\n",
285 |        "\n",
286 |        "    .dataframe thead th {\n",
287 |        "        text-align: right;\n",
288 |        "    }\n",
289 |        "</style>\n",
290 |        "<table border=\"1\" class=\"dataframe\">\n",
291 |        "  <thead>\n",
292 |        "    <tr style=\"text-align: right;\">\n",
293 |        "      <th>Name</th>\n",
294 |        "      <th>AABA</th>\n",
295 |        "      <th>AAPL</th>\n",
296 |        "      <th>AMZN</th>\n",
297 |        "      <th>AXP</th>\n",
298 |        "      <th>BA</th>\n",
299 |        "      <th>CAT</th>\n",
300 |        "      <th>CSCO</th>\n",
301 |        "      <th>CVX</th>\n",
302 |        "      <th>DIS</th>\n",
303 |        "      <th>GE</th>\n",
304 |        "      <th>...</th>\n",
305 |        "      <th>MSFT</th>\n",
306 |        "      <th>NKE</th>\n",
307 |        "      <th>PFE</th>\n",
308 |        "      <th>PG</th>\n",
309 |        "      <th>TRV</th>\n",
310 |        "      <th>UNH</th>\n",
311 |        "      <th>UTX</th>\n",
312 |        "      <th>VZ</th>\n",
313 |        "      <th>WMT</th>\n",
314 |        "      <th>XOM</th>\n",
315 |        "    </tr>\n",
316 |        "    <tr>\n",
317 |        "      <th>Date</th>\n",
318 |        "      <th></th>\n",
319 |        "      <th></th>\n",
320 |        "      <th></th>\n",
321 |        "      <th></th>\n",
322 |        "      <th></th>\n",
323 |        "      <th></th>\n",
324 |        "      <th></th>\n",
325 |        "      <th></th>\n",
326 |        "      <th></th>\n",
327 |        "      <th></th>\n",
328 |        "      <th></th>\n",
329 |        "      <th></th>\n",
330 |        "      <th></th>\n",
331 |        "      <th></th>\n",
332 |        "      <th></th>\n",
333 |        "      <th></th>\n",
334 |        "      <th></th>\n",
335 |        "      <th></th>\n",
336 |        "      <th></th>\n",
337 |        "      <th></th>\n",
338 |        "      <th></th>\n",
339 |        "    </tr>\n",
340 |        "  </thead>\n",
341 |        "  <tbody>\n",
342 |        "    <tr>\n",
343 |        "      <th>2006-01-04</th>\n",
344 |        "      <td>1.001467</td>\n",
345 |        "      <td>1.002809</td>\n",
346 |        "      <td>0.993064</td>\n",
347 |        "      <td>0.988018</td>\n",
348 |        "      <td>1.010363</td>\n",
349 |        "      <td>1.025433</td>\n",
350 |        "      <td>1.022923</td>\n",
351 |        "      <td>0.997123</td>\n",
352 |        "      <td>0.983197</td>\n",
353 |        "      <td>0.998586</td>\n",
354 |        "      <td>...</td>\n",
355 |        "      <td>1.004844</td>\n",
356 |        "      <td>0.995345</td>\n",
357 |        "      <td>1.032380</td>\n",
358 |        "      <td>1.001871</td>\n",
359 |        "      <td>1.011089</td>\n",
360 |        "      <td>1.002430</td>\n",
361 |        "      <td>0.993985</td>\n",
362 |        "      <td>1.029296</td>\n",
363 |        "      <td>1.001947</td>\n",
364 |        "      <td>1.001710</td>\n",
365 |        "    </tr>\n",
366 |        "    <tr>\n",
367 |        "      <th>2006-01-05</th>\n",
368 |        "      <td>1.013669</td>\n",
369 |        "      <td>0.992530</td>\n",
370 |        "      <td>1.008466</td>\n",
371 |        "      <td>1.010587</td>\n",
372 |        "      <td>0.988197</td>\n",
373 |        "      <td>1.000000</td>\n",
374 |        "      <td>1.028011</td>\n",
375 |        "      <td>0.987778</td>\n",
376 |        "      <td>1.017507</td>\n",
377 |        "      <td>0.997452</td>\n",
378 |        "      <td>...</td>\n",
379 |        "      <td>1.000742</td>\n",
380 |        "      <td>1.006548</td>\n",
381 |        "      <td>1.001222</td>\n",
382 |        "      <td>0.996774</td>\n",
383 |        "      <td>1.009677</td>\n",
384 |        "      <td>0.996930</td>\n",
385 |        "      <td>0.996263</td>\n",
386 |        "      <td>1.011513</td>\n",
387 |        "      <td>0.986399</td>\n",
388 |        "      <td>0.995049</td>\n",
389 |        "    </tr>\n",
390 |        "    <tr>\n",
391 |        "      <th>2006-01-06</th>\n",
392 |        "      <td>1.040453</td>\n",
393 |        "      <td>1.025400</td>\n",
394 |        "      <td>1.004617</td>\n",
395 |        "      <td>1.003429</td>\n",
396 |        "      <td>0.986066</td>\n",
397 |        "      <td>1.019909</td>\n",
398 |        "      <td>1.022888</td>\n",
399 |        "      <td>1.018216</td>\n",
400 |        "      <td>1.013519</td>\n",
401 |        "      <td>1.006812</td>\n",
402 |        "      <td>...</td>\n",
403 |        "      <td>0.997036</td>\n",
404 |        "      <td>0.996283</td>\n",
405 |        "      <td>1.010985</td>\n",
406 |        "      <td>0.998978</td>\n",
407 |        "      <td>1.005538</td>\n",
408 |        "      <td>1.019614</td>\n",
409 |        "      <td>1.003215</td>\n",
410 |        "      <td>0.991148</td>\n",
411 |        "      <td>1.004158</td>\n",
412 |        "      <td>1.019732</td>\n",
413 |        "    </tr>\n",
414 |        "    <tr>\n",
415 |        "      <th>2006-01-09</th>\n",
416 |        "      <td>1.004860</td>\n",
417 |        "      <td>0.996330</td>\n",
418 |        "      <td>0.983497</td>\n",
419 |        "      <td>1.024867</td>\n",
420 |        "      <td>0.991637</td>\n",
421 |        "      <td>1.018197</td>\n",
422 |        "      <td>1.015450</td>\n",
423 |        "      <td>0.994937</td>\n",
424 |        "      <td>1.010509</td>\n",
425 |        "      <td>0.997463</td>\n",
426 |        "      <td>...</td>\n",
427 |        "      <td>0.998142</td>\n",
428 |        "      <td>1.014925</td>\n",
429 |        "      <td>1.000000</td>\n",
430 |        "      <td>1.007503</td>\n",
431 |        "      <td>1.000424</td>\n",
432 |        "      <td>0.976153</td>\n",
433 |        "      <td>1.011396</td>\n",
434 |        "      <td>1.004147</td>\n",
435 |        "      <td>0.996295</td>\n",
436 |        "      <td>0.999495</td>\n",
437 |        "    </tr>\n",
438 |        "    <tr>\n",
439 |        "      <th>2006-01-10</th>\n",
440 |        "      <td>0.989866</td>\n",
441 |        "      <td>1.063536</td>\n",
442 |        "      <td>0.969626</td>\n",
443 |        "      <td>0.997036</td>\n",
444 |        "      <td>1.004799</td>\n",
445 |        "      <td>0.995938</td>\n",
446 |        "      <td>0.995278</td>\n",
447 |        "      <td>1.004919</td>\n",
448 |        "      <td>1.012800</td>\n",
449 |        "      <td>0.994630</td>\n",
450 |        "      <td>...</td>\n",
451 |        "      <td>1.005212</td>\n",
452 |        "      <td>1.001838</td>\n",
453 |        "      <td>0.983501</td>\n",
454 |        "      <td>0.996107</td>\n",
455 |        "      <td>0.992801</td>\n",
456 |        "      <td>1.019707</td>\n",
457 |        "      <td>1.000704</td>\n",
458 |        "      <td>1.004130</td>\n",
459 |        "      <td>1.003282</td>\n",
460 |        "      <td>1.007744</td>\n",
461 |        "    </tr>\n",
462 |        "  </tbody>\n",
463 |        "</table>\n",
464 |        "<p>5 rows × 31 columns</p>\n",
465 |        "</div>"
466 |       ],
467 |       "text/plain": [
468 |        "Name            AABA      AAPL      AMZN       AXP        BA       CAT  \\\n",
469 |        "Date                                                                     \n",
470 |        "2006-01-04  1.001467  1.002809  0.993064  0.988018  1.010363  1.025433   \n",
471 |        "2006-01-05  1.013669  0.992530  1.008466  1.010587  0.988197  1.000000   \n",
472 |        "2006-01-06  1.040453  1.025400  1.004617  1.003429  0.986066  1.019909   \n",
473 |        "2006-01-09  1.004860  0.996330  0.983497  1.024867  0.991637  1.018197   \n",
474 |        "2006-01-10  0.989866  1.063536  0.969626  0.997036  1.004799  0.995938   \n",
475 |        "\n",
476 |        "Name            CSCO       CVX       DIS        GE    ...         MSFT  \\\n",
477 |        "Date                                                  ...                \n",
478 |        "2006-01-04  1.022923  0.997123  0.983197  0.998586    ...     1.004844   \n",
479 |        "2006-01-05  1.028011  0.987778  1.017507  0.997452    ...     1.000742   \n",
480 |        "2006-01-06  1.022888  1.018216  1.013519  1.006812    ...     0.997036   \n",
481 |        "2006-01-09  1.015450  0.994937  1.010509  0.997463    ...     0.998142   \n",
482 |        "2006-01-10  0.995278  1.004919  1.012800  0.994630    ...     1.005212   \n",
483 |        "\n",
484 |        "Name             NKE       PFE        PG       TRV       UNH       UTX  \\\n",
485 |        "Date                                                                     \n",
486 |        "2006-01-04  0.995345  1.032380  1.001871  1.011089  1.002430  0.993985   \n",
487 |        "2006-01-05  1.006548  1.001222  0.996774  1.009677  0.996930  0.996263   \n",
488 |        "2006-01-06  0.996283  1.010985  0.998978  1.005538  1.019614  1.003215   \n",
489 |        "2006-01-09  1.014925  1.000000  1.007503  1.000424  0.976153  1.011396   \n",
490 |        "2006-01-10  1.001838  0.983501  0.996107  0.992801  1.019707  1.000704   \n",
491 |        "\n",
492 |        "Name              VZ       WMT       XOM  \n",
493 |        "Date                                      \n",
494 |        "2006-01-04  1.029296  1.001947  1.001710  \n",
495 |        "2006-01-05  1.011513  0.986399  0.995049  \n",
496 |        "2006-01-06  0.991148  1.004158  1.019732  \n",
497 |        "2006-01-09  1.004147  0.996295  0.999495  \n",
498 |        "2006-01-10  1.004130  1.003282  1.007744  \n",
499 |        "\n",
500 |        "[5 rows x 31 columns]"
501 |       ]
502 |      },
503 |      "execution_count": 3,
504 |      "metadata": {},
505 |      "output_type": "execute_result"
506 |     }
507 |    ],
508 |    "source": [
509 |     "real_returns = stocks / stocks.shift(1)\n",
510 |     "real_returns = real_returns.dropna()\n",
511 |     "real_returns.head()"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "markdown",
516 |    "metadata": {},
517 |    "source": [
518 |     "Construct an equally weighted portfolio at the beginning, no modification"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": 6,
524 |    "metadata": {
525 |     "ExecuteTime": {
526 |      "end_time": "2019-03-07T10:38:05.942847Z",
527 |      "start_time": "2019-03-07T10:38:05.923858Z"
528 |     }
529 |    },
530 |    "outputs": [
531 |     {
532 |      "data": {
533 |       "text/plain": [
534 |        "Name\n",
535 |        "AABA      1.703652\n",
536 |        "AAPL     15.553472\n",
537 |        "AMZN     24.930303\n",
538 |        "AXP       1.857342\n",
539 |        "BA        4.167905\n",
540 |        "CAT       2.612698\n",
541 |        "CSCO      2.204494\n",
542 |        "CVX       2.084414\n",
543 |        "DIS       4.404010\n",
544 |        "GE        0.499659\n",
545 |        "GOOGL     4.782169\n",
546 |        "GS        1.907548\n",
547 |        "HD        4.535423\n",
548 |        "IBM       1.844540\n",
549 |        "INTC      1.776532\n",
550 |        "JNJ       2.264994\n",
551 |        "JPM       2.656100\n",
552 |        "KO        2.294732\n",
553 |        "MCD       5.036862\n",
554 |        "MMM       2.895334\n",
555 |        "MRK       1.708513\n",
556 |        "MSFT      3.174303\n",
557 |        "NKE       5.736704\n",
558 |        "PFE       1.550533\n",
559 |        "PG        1.553785\n",
560 |        "TRV       2.982429\n",
561 |        "UNH       3.429493\n",
562 |        "UTX       2.132306\n",
563 |        "VZ        1.701935\n",
564 |        "WMT       2.106818\n",
565 |        "XOM       1.406517\n",
566 |        "dtype: float64"
567 |       ]
568 |      },
569 |      "execution_count": 6,
570 |      "metadata": {},
571 |      "output_type": "execute_result"
572 |     }
573 |    ],
574 |    "source": [
575 |     "real_returns.product()"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": 4,
581 |    "metadata": {
582 |     "ExecuteTime": {
583 |      "end_time": "2019-03-07T10:37:49.946281Z",
584 |      "start_time": "2019-03-07T10:37:49.911687Z"
585 |     }
586 |    },
587 |    "outputs": [
588 |     {
589 |      "data": {
590 |       "text/plain": [
591 |        "3.790178028501573"
592 |       ]
593 |      },
594 |      "execution_count": 4,
595 |      "metadata": {},
596 |      "output_type": "execute_result"
597 |     }
598 |    ],
599 |    "source": [
600 |     "real_returns.product().mean()"
601 |    ]
602 |   },
603 |   {
604 |    "cell_type": "markdown",
605 |    "metadata": {},
606 |    "source": [
607 |     "Equally weighted portfolio"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": 5,
613 |    "metadata": {
614 |     "ExecuteTime": {
615 |      "end_time": "2019-03-07T10:37:50.013158Z",
616 |      "start_time": "2019-03-07T10:37:49.949700Z"
617 |     }
618 |    },
619 |    "outputs": [
620 |     {
621 |      "data": {
622 |       "text/plain": [
623 |        "3.5255573996595255"
624 |       ]
625 |      },
626 |      "execution_count": 5,
627 |      "metadata": {},
628 |      "output_type": "execute_result"
629 |     }
630 |    ],
631 |    "source": [
632 |     "real_returns.mean(axis = 1).product()"
633 |    ]
634 |   }
635 |  ],
636 |  "metadata": {
637 |   "kernelspec": {
638 |    "display_name": "projet_S5",
639 |    "language": "python",
640 |    "name": "projet_s5"
641 |   },
642 |   "language_info": {
643 |    "codemirror_mode": {
644 |     "name": "ipython",
645 |     "version": 3
646 |    },
647 |    "file_extension": ".py",
648 |    "mimetype": "text/x-python",
649 |    "name": "python",
650 |    "nbconvert_exporter": "python",
651 |    "pygments_lexer": "ipython3",
652 |    "version": "3.6.8"
653 |   },
654 |   "toc": {
655 |    "base_numbering": 1,
656 |    "nav_menu": {},
657 |    "number_sections": true,
658 |    "sideBar": true,
659 |    "skip_h1_title": false,
660 |    "title_cell": "Table of Contents",
661 |    "title_sidebar": "Contents",
662 |    "toc_cell": false,
663 |    "toc_position": {},
664 |    "toc_section_display": true,
665 |    "toc_window_display": false
666 |   }
667 |  },
668 |  "nbformat": 4,
669 |  "nbformat_minor": 2
670 | }
671 | 


--------------------------------------------------------------------------------
/notebook/best_model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tqa236/LSTM_algo_trading/ddef49af393069df2ec1dbd3843fed79e65ba141/notebook/best_model.h5


--------------------------------------------------------------------------------
/src/calculate_returns.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Calculate returns and labels."""
 4 | 
 5 | import argparse
 6 | 
 7 | import pandas as pd
 8 | 
 9 | from utils import (calculate_absolute_class, calculate_class,
10 |                    calculate_log_returns, calculate_returns)
11 | 
12 | 
13 | def main():
14 |     """Run main program."""
15 |     index = "dowjones"
16 |     index = "frankfurt"
17 |     parser = argparse.ArgumentParser(
18 |         description="Parse arguments for models.")
19 |     parser.add_argument(
20 |         "--indir", help="Dataset directory.",
21 |         default="../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv")
22 | 
23 |     parser.add_argument('--outdir', help='Model directory.',
24 |                         default="../model/dowjones/sample.csv")
25 |     # args = parser.parse_args()
26 |     # dataset = pd.read_csv(args.indir,
27 |     #                       index_col='Date',
28 |     #                       parse_dates=['Date'])
29 |     dataset = pd.read_csv(f"../data/frankfurt_calculated/stocks.csv",
30 |                           index_col='Date',
31 |                           parse_dates=['Date'])
32 |     returns = calculate_returns(dataset)
33 |     log_returns = calculate_log_returns(dataset)
34 |     labels = calculate_class(returns)
35 |     absolute_labels = calculate_absolute_class(returns)
36 |     log_labels = calculate_class(log_returns)
37 |     absolute_log_labels = calculate_absolute_class(log_returns)
38 |     # returns = (returns - returns.mean()) / returns.std()
39 |     print(f"Returns shape: {returns.shape}")
40 |     print(f"Labels shape: {labels.shape}")
41 |     returns.to_csv(f"../data/{index}_calculated/returns1.csv")
42 |     labels.to_csv(f"../data/{index}_calculated/labels1.csv")
43 |     absolute_labels.to_csv(f"../data/{index}_calculated/absolute_labels1.csv")
44 |     log_returns.to_csv(f"../data/{index}_calculated/log_returns1.csv")
45 |     log_labels.to_csv(f"../data/{index}_calculated/log_labels1.csv")
46 |     absolute_log_labels.to_csv(
47 |         f"../data/{index}_calculated/absolute_log_labels1.csv")
48 | 
49 |     print("Done.")
50 |     return 0
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     main()
55 | 


--------------------------------------------------------------------------------
/src/divide_period.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Divide the data into period."""
 4 | 
 5 | import argparse
 6 | import pickle
 7 | 
 8 | import pandas as pd
 9 | 
10 | 
11 | def divide_period(returns, labels, train_length=750, test_length=250,
12 |                   timesteps=240):
13 |     """Divide the data into period."""
14 |     num_period = int((len(labels) - train_length) / test_length)
15 |     trains = [(returns[test_length * i: train_length + test_length * i],
16 |                labels[test_length * i: train_length + test_length * i])
17 |               for i in range(num_period)]
18 |     tests = [(returns[train_length - timesteps + test_length * i:
19 |                       train_length + test_length * (i + 1)],
20 |               labels[train_length - timesteps + test_length * i:
21 |                      train_length + test_length * (i + 1)])
22 |              for i in range(num_period)]
23 | 
24 |     return (trains, tests)
25 | 
26 | 
27 | def main():
28 |     """Run main program."""
29 |     train_length = 750
30 |     test_length = 250
31 |     timesteps = 240
32 |     index = "dowjones"
33 |     index = "frankfurt"
34 |     parser = argparse.ArgumentParser(
35 |         description="Parse arguments for models.")
36 |     parser.add_argument("--returns", help="Dataset directory.",
37 |                         default=f"../data/{index}_calculated/returns1.csv")
38 |     parser.add_argument(
39 |         "--labels", help="Dataset directory.",
40 |         default=f"../data/{index}_calculated/labels1.csv")
41 |     parser.add_argument('--outdir', help='Model directory.',
42 |                         default=f"../data/{index}_calculated/periods"
43 |                         f"{train_length}_{test_length}_{timesteps}.txt")
44 | 
45 |     args = parser.parse_args()
46 |     returns = pd.read_csv(args.returns, index_col='Date',
47 |                           parse_dates=['Date'])
48 |     labels = pd.read_csv(args.labels, index_col='Date',
49 |                          parse_dates=['Date'])
50 | 
51 |     periods = divide_period(
52 |         returns, labels, train_length, test_length, timesteps)
53 |     print("Training set")
54 |     print(f"Returns shape for 1 period: {periods[0][0][0].shape}")
55 |     print(f"Labels shape for 1 period: {periods[0][0][1].shape}")
56 |     print("Test set")
57 |     print(f"Returns shape for 1 period: {periods[1][0][0].shape}")
58 |     print(f"Labels shape for 1 period: {periods[1][0][1].shape}")
59 | 
60 |     with open(args.outdir, "wb") as file:
61 |         pickle.dump(periods, file)
62 |     print("Done.")
63 |     return 0
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/src/make_dataframe.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Calculate returns and labels."""
 4 | 
 5 | import argparse
 6 | 
 7 | import pandas as pd
 8 | 
 9 | 
10 | def main():
11 |     """Run main program."""
12 |     parser = argparse.ArgumentParser(
13 |         description="Parse arguments for models.")
14 |     parser.add_argument(
15 |         "--indir", help="Dataset directory.",
16 |         default="../data/frankfurt/FSE_metadata.csv")
17 | 
18 |     parser.add_argument('--outdir', help='Model directory.',
19 |                         default="../data/frankfurt_calculated/stocks.csv")
20 |     args = parser.parse_args()
21 |     tickers = pd.read_csv(args.indir)
22 |     choose_from = tickers["from_date"] < "2001-01-01"
23 |     choose_to = tickers["to_date"] > "2018-01-01"
24 |     tickers = tickers[choose_from & choose_to]
25 |     stock = pd.read_csv('../data/frankfurt/stocks/AAD_X.csv',
26 |                         index_col='Date', parse_dates=['Date'])
27 |     stocks = pd.DataFrame(index=stock.index)
28 |     stocks = stocks.loc['2001-01-01':'2018-01-01']
29 |     for ticker in tickers.code:
30 |         stock = pd.read_csv(f'../data/frankfurt/stocks/{ticker}.csv',
31 |                             index_col='Date', parse_dates=['Date'])
32 |         stocks[ticker] = stock["Close"].loc['2001-01-01':'2018-01-01']
33 |     stocks = stocks.dropna(axis=1)
34 |     print(f"Stocks shape: {stocks.shape}")
35 |     stocks.to_csv(args.outdir)
36 | 
37 |     print("Done.")
38 |     return 0
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     main()
43 | 


--------------------------------------------------------------------------------
/src/make_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Make dataset."""
 4 | import pandas as pd
 5 | 
 6 | import quandl
 7 | 
 8 | 
 9 | def download_data(metadata_df, api=None):
10 |     """Download data from Quandl."""
11 |     for ticker in metadata_df["code"]:
12 |         print(ticker)
13 |         # try:
14 |         symbol = "FSE/" + ticker
15 |         quandl.ApiConfig.api_key = api
16 |         mydata = quandl.get(symbol)
17 |         mydata.to_csv("../data/frankfurt/stocks_tmp/" + ticker + ".csv")
18 |         # except:
19 |         #     pass
20 | 
21 | 
22 | def main():
23 |     """Run main program."""
24 |     metadata_df = pd.read_csv("../data/frankfurt/FSE_metadata.csv")
25 |     api = pd.read_csv("../data/personal_data/quandl_API.txt",
26 |                       header=None)[0][0]
27 |     download_data(metadata_df, api)
28 |     return 0
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     main()
33 | 


--------------------------------------------------------------------------------
/src/random_forest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Train an test a random forest model."""
 4 | 
 5 | import argparse
 6 | import pickle
 7 | 
 8 | import numpy as np
 9 | from sklearn.ensemble import RandomForestClassifier
10 | 
11 | from utils import generate_time_series_sample, normalize_data
12 | 
13 | 
14 | def train(dataset, model_name, timestep=20):
15 |     """Train an LSTM model."""
16 |     positions = []
17 |     for i in range(len(dataset[0])):
18 |         # model_period = f"{model_name}_period{i}.h5"
19 | 
20 |         x_train, y_train = generate_time_series_sample(
21 |             normalize_data(dataset[0][i][0]),
22 |             dataset[0][i][1].values, timestep)
23 | 
24 |         x_test, y_test = generate_time_series_sample(
25 |             normalize_data(dataset[1][i][0]),
26 |             dataset[1][i][1].values, timestep)
27 | 
28 |         x_train = x_train.transpose((0, 2, 1))
29 |         x_train = np.reshape(
30 |             x_train, (x_train.shape[0] * x_train.shape[1], timestep))
31 |         y_train = np.reshape(y_train, (y_train.shape[0] * y_train.shape[1]))
32 | 
33 |         x_test = x_test.transpose((0, 2, 1))
34 |         x_test = np.reshape(
35 |             x_test, (x_test.shape[0] * x_test.shape[1], timestep))
36 |         y_test = np.reshape(y_test, (y_test.shape[0] * y_test.shape[1]))
37 |         print(f"x train shape: {x_train.shape}")
38 |         print(f"y train shape: {y_train.shape}")
39 |         print(f"x test shape: {x_test.shape}")
40 |         print(f"y test shape: {y_test.shape}")
41 | 
42 |         clf = RandomForestClassifier(n_jobs=2, random_state=0, max_depth=5)
43 |         clf.fit(x_train, y_train)
44 |         predict = clf.predict(x_test)
45 |         predict = predict.reshape(predict.shape[0] // 31, 31)[-250:]
46 |         position = dataset[1][i][1].values[-250:, :]
47 |         result = sum(sum(predict == position)) / predict.size
48 | 
49 |         predict1 = clf.predict(x_test)
50 |         predict1 = predict1.reshape(predict1.shape[0] // 31, 31)[-300:-250]
51 |         position1 = dataset[1][i][1].values[-300:-250, :]
52 |         result1 = sum(sum(predict1 == position1)) / predict1.size
53 | 
54 |         positions.append(predict)
55 |         print(result)
56 |         print(result1)
57 |     all_positions = np.concatenate(positions, axis=0)
58 |     print(all_positions.shape)
59 | 
60 | 
61 | def main():
62 |     """Run main program."""
63 |     parser = argparse.ArgumentParser(
64 |         description="Parse arguments for models.")
65 |     parser.add_argument("--dataset", help="Dataset directory.",
66 |                         default="../data/dowjones_calculated/periods.txt")
67 |     parser.add_argument('--outdir', help='Model directory.',
68 |                         default='../model/LSTM/my_model1')
69 |     args = parser.parse_args()
70 | 
71 |     with open(args.dataset, "rb") as file:   # Unpickling
72 |         dataset = pickle.load(file)
73 |     train(dataset, args.outdir, 10)
74 | 
75 |     print("Done.")
76 |     return 0
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/src/random_strategy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Create a random strategy to pick the stocks."""
 4 | 
 5 | import argparse
 6 | import time
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | from utils import calculate_returns, long_short_postion
12 | 
13 | 
14 | def generate_random_strategy(returns):
15 |     """Generate a random probability tha"t a stock will beat the market."""
16 |     probabilities = returns
17 |     probabilities = probabilities.apply(
18 |         lambda x: [np.random.rand() for i in x],
19 |         axis=1, result_type='broadcast')
20 |     return probabilities
21 | 
22 | 
23 | def calculate_random_returns(returns, k=10, start=750, end=3000):
24 |     """Calculate the returns of a random trading strategy."""
25 |     probabilities = generate_random_strategy(returns)
26 |     positions = probabilities
27 |     positions.apply(lambda x: long_short_postion(
28 |         x, k), axis=1, result_type='broadcast')
29 |     random_returns = returns.mul(positions)
30 |     random_returns = random_returns[start:end]
31 |     no_rebalance = (random_returns + 1).product().sum() / (2 * k)
32 |     rebalance = (1 + random_returns.sum(axis=1) / (2 * k)).product()
33 |     return [no_rebalance, rebalance]
34 | 
35 | 
36 | def random_trading(returns, k=10, start=750, end=3000, times=1):
37 |     """Make a list of random trading returns."""
38 |     return [calculate_random_returns(
39 |         returns, k, start, end) for i in range(times)]
40 | 
41 | 
42 | def main():
43 |     """Run main program."""
44 |     start = time.time()
45 |     parser = argparse.ArgumentParser(
46 |         description="Parse arguments for models.")
47 |     parser.add_argument(
48 |         "--indir", help="Dataset directory.",
49 |         default="../data/dowjones/all_stocks_2006-01-01_to_2018-01-01.csv")
50 | 
51 |     parser.add_argument('--outdir', help='Model directory.',
52 |                         default="../data/dowjones_calculated/rebalance.csv")
53 |     args = parser.parse_args()
54 |     dataset = pd.read_csv(args.indir, index_col='Date',
55 |                           parse_dates=['Date'])
56 |     returns = calculate_returns(dataset)
57 |     times = 1000
58 |     results = random_trading(returns, times=times)
59 |     pd.DataFrame(data=results).to_csv(
60 |         f"../data/dowjones_calculated/random_trading_{times}times.csv",
61 |         sep=',', index=False, header=["No Rebalance", "Rebalance"])
62 |     print("Done.")
63 |     end = time.time()
64 |     print(end - start)
65 |     return 0
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """Train a LSTM model."""
  4 | 
  5 | import argparse
  6 | import os
  7 | import pickle
  8 | 
  9 | import numpy as np
 10 | from sklearn.preprocessing import StandardScaler
 11 | 
 12 | from keras.callbacks import EarlyStopping, ModelCheckpoint
 13 | from keras.layers import LSTM, Dense, Reshape
 14 | from keras.models import Sequential
 15 | from keras.utils import to_categorical
 16 | 
 17 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 18 | # The GPU id to use, usually either "0" or "1";
 19 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 20 | 
 21 | 
 22 | def get_one_hot(targets, nb_classes):
 23 |     """Convert class array to one hot vector."""
 24 |     res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
 25 |     return res.reshape(list(targets.shape) + [nb_classes])
 26 | 
 27 | 
 28 | def train_one_feature(dataset, model_name, timestep=240, feature=31,
 29 |                       dropout_level=0.1):
 30 |     """Train an LSTM model with 1 feature."""
 31 |     for i in range(len(dataset[0])):
 32 |         model_period = f"{model_name}_1feature_period{i}.h5"
 33 |         x_train = dataset[0][i][0].values
 34 |         scaler = StandardScaler().fit(x_train)
 35 |         x_train = scaler.transform(x_train)
 36 |         y_train = to_categorical(dataset[0][i][1].values, 2)
 37 | 
 38 |         print(f"Period {i}")
 39 |         print(f"x train shape: {x_train.shape}")
 40 |         print(f"y train shape: {y_train.shape}")
 41 |         x_series = [x_train[i:i + timestep, j]
 42 |                     for i in range(x_train.shape[0] - timestep)
 43 |                     for j in range(feature)]
 44 |         y_series = [y_train[i + timestep, j]
 45 |                     for i in range(y_train.shape[0] - timestep)
 46 |                     for j in range(feature)]
 47 |         x_final = np.array(x_series)
 48 |         y_final = np.array(y_series)
 49 |         x_final = np.reshape(x_final, (x_final.shape[0], x_final.shape[1], 1))
 50 |         print(f"x_final shape: {x_final.shape}")
 51 |         print(f"y_final shape: {y_final.shape}")
 52 | 
 53 |         # expected input data shape: (batch_size, timesteps, data_dim)
 54 |         regressor = Sequential()
 55 |         regressor.add(LSTM(units=25, input_shape=(timestep, 1),
 56 |                            dropout=dropout_level,
 57 |                            recurrent_dropout=dropout_level))
 58 |         regressor.add(Dense(2, activation='softmax'))
 59 |         regressor.compile(loss='binary_crossentropy',
 60 |                           optimizer='rmsprop',
 61 |                           metrics=['accuracy'])
 62 |         regressor.summary()
 63 | 
 64 |         regressor.fit(x_final, y_final, batch_size=1000, epochs=1000,
 65 |                       validation_split=0.2,
 66 |                       callbacks=[EarlyStopping(monitor='val_loss',
 67 |                                                mode='min', patience=10),
 68 |                                  ModelCheckpoint(filepath=model_period,
 69 |                                                  monitor='val_acc',
 70 |                                                  save_best_only=True)])
 71 | 
 72 | 
 73 | def train(dataset, model_name, timestep=240, feature=31, dropout_level=0.1):
 74 |     """Train an LSTM model."""
 75 |     for i in range(len(dataset[0])):
 76 |         model_period = f"{model_name}_period{i}.h5"
 77 |         x_train = dataset[0][i][0].values
 78 |         scaler = StandardScaler().fit(x_train)
 79 |         x_train = scaler.transform(x_train)
 80 |         y_train = to_categorical(dataset[0][i][1].values, 2)
 81 | 
 82 |         print(f"Period {i}")
 83 |         print(f"x train shape: {x_train.shape}")
 84 |         print(f"y train shape: {y_train.shape}")
 85 | 
 86 |         x_series = [x_train[i:i + timestep, :]
 87 |                     for i in range(x_train.shape[0] - timestep)]
 88 |         y_series = [y_train[i + timestep]
 89 |                     for i in range(y_train.shape[0] - timestep)]
 90 |         x_final = np.array(x_series)
 91 |         y_final = np.array(y_series)
 92 |         print(f"x_final shape: {x_final.shape}")
 93 |         print(f"y_final shape: {y_final.shape}")
 94 | 
 95 |         # expected input data shape: (batch_size, timesteps, data_dim)
 96 |         regressor = Sequential()
 97 |         regressor.add(LSTM(units=25, input_shape=(timestep, feature),
 98 |                            dropout=dropout_level,
 99 |                            recurrent_dropout=dropout_level))
100 |         regressor.add(Dense(feature * 2, activation='relu'))
101 |         regressor.add(Reshape((feature, 2)))
102 |         regressor.add(Dense(2, activation='softmax'))
103 |         regressor.compile(loss='binary_crossentropy',
104 |                           optimizer='rmsprop',
105 |                           metrics=['accuracy'])
106 |         regressor.summary()
107 | 
108 |         regressor.fit(x_final, y_final, batch_size=1000, epochs=1000,
109 |                       validation_split=0.2,
110 |                       callbacks=[EarlyStopping(monitor='val_loss',
111 |                                                mode='min', patience=10),
112 |                                  ModelCheckpoint(filepath=model_period,
113 |                                                  monitor='val_acc',
114 |                                                  save_best_only=True)])
115 | 
116 | 
117 | def main():
118 |     """Run main program."""
119 |     index = "dowjones"
120 |     # index = "frankfurt"
121 |     parser = argparse.ArgumentParser(
122 |         description="Parse arguments for models.")
123 |     # parser.add_argument("--dataset", help="Dataset directory.",
124 |     #                     default=f"../data/{index}_calculated/"
125 |     #                     f"absolute_periods750_250_240.txt")
126 |     # parser.add_argument('--outdir', help='Model directory.',
127 |     #                     default=f'../model/LSTM/{index}2_absolute')
128 |     parser.add_argument("--dataset", help="Dataset directory.",
129 |                         default=f"../data/{index}_calculated/"
130 |                         f"periods750_250_240.txt")
131 |     parser.add_argument('--outdir', help='Model directory.',
132 |                         default=f'../model/LSTM/{index}_drop0.1_')
133 |     args = parser.parse_args()
134 | 
135 |     with open(args.dataset, "rb") as file:   # Unpickling
136 |         dataset = pickle.load(file)
137 |     train(dataset, args.outdir)
138 |     train_one_feature(dataset, args.outdir)
139 |     print("Done.")
140 |     return 0
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     main()
145 | 


--------------------------------------------------------------------------------
/src/train_one_ticker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Train a LSTM model."""
 4 | 
 5 | import argparse
 6 | import os
 7 | import pickle
 8 | 
 9 | import numpy as np
10 | from sklearn.preprocessing import StandardScaler
11 | 
12 | from keras.callbacks import EarlyStopping, ModelCheckpoint
13 | from keras.layers import LSTM, Dense
14 | from keras.models import Sequential
15 | from keras.utils import to_categorical
16 | 
17 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
18 | # The GPU id to use, usually either "0" or "1";
19 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
20 | 
21 | 
22 | def get_one_hot(targets, nb_classes):
23 |     """Convert class array to one hot vector."""
24 |     res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
25 |     return res.reshape(list(targets.shape) + [nb_classes])
26 | 
27 | 
28 | def train_one_ticker(dataset, model_name, timestep=240, feature=31,
29 |                      dropout_level=0.1):
30 |     """Train an LSTM model with 1 feature."""
31 |     for i in range(len(dataset[0])):
32 |         for j in range(feature):
33 |             model_period = f"{model_name}_period{i}_ticker{j}.h5"
34 |             x_train = dataset[0][i][0].values
35 |             scaler = StandardScaler().fit(x_train)
36 |             x_train = scaler.transform(x_train)
37 |             y_train = to_categorical(dataset[0][i][1].values, 2)
38 | 
39 |             print(f"Period {i}")
40 |             print(f"x train shape: {x_train.shape}")
41 |             print(f"y train shape: {y_train.shape}")
42 | 
43 |             x_series = [x_train[i:i + timestep, j]
44 |                         for i in range(x_train.shape[0] - timestep)]
45 |             y_series = [y_train[i + timestep, j]
46 |                         for i in range(y_train.shape[0] - timestep)]
47 |             x_final = np.array(x_series)
48 |             y_final = np.array(y_series)
49 |             x_final = np.reshape(
50 |                 x_final, (x_final.shape[0], x_final.shape[1], 1))
51 |             print(f"x_final shape: {x_final.shape}")
52 |             print(f"y_final shape: {y_final.shape}")
53 | 
54 |             # expected input data shape: (batch_size, timesteps, data_dim)
55 |             regressor = Sequential()
56 |             regressor.add(LSTM(units=25, input_shape=(timestep, 1),
57 |                                recurrent_dropout=dropout_level,
58 |                                dropout=dropout_level))
59 |             regressor.add(Dense(2, activation='softmax'))
60 |             regressor.compile(loss='binary_crossentropy',
61 |                               optimizer='rmsprop',
62 |                               metrics=['accuracy'])
63 |             regressor.summary()
64 | 
65 |             regressor.fit(x_final, y_final, batch_size=1000, epochs=1000,
66 |                           validation_split=0.2,
67 |                           callbacks=[EarlyStopping(monitor='val_loss',
68 |                                                    mode='min', patience=10),
69 |                                      ModelCheckpoint(filepath=model_period,
70 |                                                      monitor='val_acc',
71 |                                                      save_best_only=True)])
72 | 
73 | 
74 | def main():
75 |     """Run main program."""
76 |     index = "dowjones"
77 |     # index = "frankfurt"
78 |     parser = argparse.ArgumentParser(
79 |         description="Parse arguments for models.")
80 |     parser.add_argument("--dataset", help="Dataset directory.",
81 |                         default=f"../data/{index}_calculated/"
82 |                         f"absolute_periods750_250_240.txt")
83 |     parser.add_argument('--outdir', help='Model directory.',
84 |                         default=f'../model/LSTM/{index}_absolute')
85 | 
86 |     args = parser.parse_args()
87 | 
88 |     with open(args.dataset, "rb") as file:   # Unpickling
89 |         dataset = pickle.load(file)
90 |     train_one_ticker(dataset, args.outdir)
91 |     print("Done.")
92 |     return 0
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """Every function that need to be used more than one time."""
 4 | 
 5 | import numpy as np
 6 | 
 7 | from keras.preprocessing.sequence import TimeseriesGenerator
 8 | 
 9 | 
10 | def generate_random_strategy(returns):
11 |     """Generate a random probability tha"t a stock will beat the market."""
12 |     probabilities = returns
13 |     probabilities = probabilities.apply(
14 |         lambda x: [np.random.rand() for i in x],
15 |         axis=1, result_type='broadcast')
16 |     return probabilities
17 | 
18 | 
19 | def long_short_postion(probabilities, k):
20 |     """
21 |     Make a simple long short strategy.
22 | 
23 |     Decide the stock position based on the probability that it will beat
24 |     the market.
25 |     """
26 |     positions = probabilities
27 |     short = np.argpartition(positions, k)[:k]
28 |     neutral = np.argpartition(positions, len(
29 |         positions) - k)[:(len(positions) - k)]
30 |     positions[:] = 1
31 |     positions[neutral] = 0
32 |     positions[short] = -1
33 |     return positions
34 | 
35 | 
36 | def calculate_class(returns):
37 |     """Find the class for each LSTM sequence based on the median returns."""
38 |     median_returns = returns.median(axis=1)
39 |     labels = returns.iloc[:, :].apply(lambda x: np.where
40 |                                       (x >= median_returns, 1, 0), axis=0)
41 |     return labels
42 | 
43 | 
44 | def calculate_absolute_class(returns):
45 |     """Predict the stock will go up or down."""
46 |     labels = returns.iloc[:, :].apply(lambda x: np.where
47 |                                       (x >= 0, 1, 0), axis=0)
48 |     return labels
49 | 
50 | 
51 | def calculate_returns(stocks):
52 |     """Calculate the real returns of all indices without normalization."""
53 |     # stocks = stocks[["Close", "Name"]]
54 |     # stocks = stocks.pivot_table(
55 |     #     values='Close', index=stocks.index, columns='Name', aggfunc='first')
56 |     returns = (stocks - stocks.shift(1)) / stocks.shift(1)
57 |     returns = returns.dropna()
58 |     return returns
59 | 
60 | 
61 | def calculate_log_returns(stocks):
62 |     """Calculate the log returns of all indices without normalization."""
63 |     # stocks = stocks[["Close", "Name"]]
64 |     # stocks = stocks.pivot_table(
65 |     #     values='Close', index=stocks.index, columns='Name', aggfunc='first')
66 |     returns = np.log(stocks) - np.log(stocks.shift(1))
67 |     returns = returns.dropna()
68 |     return returns
69 | 
70 | 
71 | def normalize_data(df):
72 |     """normalize a dataframe."""
73 |     mean = df.mean(axis=1)
74 |     std = df.std(axis=1)
75 |     df = df.sub(mean, axis=0)
76 |     df = df.div(std, axis=0)
77 |     df = df.values
78 |     return df
79 | 
80 | 
81 | def generate_time_series_sample(data, target, timestep):
82 |     """Generate samples of a time series with a certain length."""
83 |     generator = TimeseriesGenerator(data, target,
84 |                                     length=timestep, sampling_rate=1,
85 |                                     batch_size=(data.shape[0] - timestep))
86 |     return generator[0][0], generator[0][1]
87 | 


--------------------------------------------------------------------------------