├── kernel ├── __init__.py ├── data_import.py └── model.py ├── data └── rebar_mins.csv ├── LICENSE ├── README.md ├── .gitignore └── demo.ipynb /kernel/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_import import load_min_data, prepare_data 2 | from .model import NetModel 3 | -------------------------------------------------------------------------------- /data/rebar_mins.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shannonycj/convolutional-autoencoder-trading/HEAD/data/rebar_mins.csv -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yang Chenjie 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Applying Convolutional Auto-Encoder in Trading 2 | 3 | In this project, I try to build a model to extract useful patterns in financial timeseries for predicting the directions of future price movements. Traditional multi-variate timeseries models (even some [modern approach like LSTM](https://www.researchgate.net/publication/327967988_Predicting_Stock_Prices_Using_LSTM)) tend to look at and extract information from each input features independently, which ignores potential correlations between inpputs. For example, looing at historical volume and adjust close prices jointly could povide new information. As such, people have been exploring using [CNN to learn spatial patterns](https://arxiv.org/pdf/1703.04691.pdf). 4 | 5 | It is well-known that the information/noise ration is low in general for financial time-series. Here we try a novel approach, called Convolutional Auto-Encoder (CAE), which proved [successful in computer visions](https://xifengguo.github.io/papers/ICONIP17-DCEC.pdf). 6 | 7 | This repo contains a set of data points from the commodity-trading market. It consists 3 years, 5-mins open, high, low, close, volume and open interests. We use two years data as training-validation-test sets to build our model, and the last year data to backtest our strategy. 8 | 9 | Our CAE and other utils are contained in 'kernel' folder, and there is a demo.ipynb for demostrating the experiment results. 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /kernel/data_import.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon May 13 10:16:39 2019 5 | 6 | @author: chenjieyang 7 | """ 8 | import pandas as pd 9 | import numpy as np 10 | import datetime 11 | from sklearn.model_selection import train_test_split 12 | from sklearn.preprocessing import MinMaxScaler 13 | 14 | 15 | def load_min_data(train_end='2018-05-13 00:00'): 16 | df = pd.read_csv('data/rebar_mins.csv', header=2) 17 | 18 | def time_parser(t): return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M") 19 | time = list(map(time_parser, df.time)) 20 | df['time'] = time 21 | df.set_index('time', inplace=True) 22 | train_end = datetime.datetime.strptime(train_end, "%Y-%m-%d %H:%M") 23 | df_train = df.loc[df.index < train_end] 24 | df_test = df.loc[df.index >= train_end] 25 | return df_train, df_test 26 | 27 | 28 | def get_idx(i, n, step): 29 | x_start = i * step 30 | x_end = x_start + n 31 | y_start = x_end 32 | y_end = y_start + step 33 | return x_start, x_end, y_start, y_end 34 | 35 | 36 | def prepare_data(df, n, step, test_size=0.3): 37 | delta = df.drop('volume', axis=1).pct_change() 38 | log_volume_delta = np.log(df.volume) - np.log(df.volume.shift(1)) 39 | delta['volume'] = log_volume_delta 40 | delta = delta.dropna(how='all') 41 | df = df.iloc[1:, :] 42 | nrows = delta.shape[0] 43 | i = 0 44 | X = [] 45 | y = [] 46 | while True: 47 | x_start, x_end, y_start, y_end = get_idx(i, n, step) 48 | if y_end > nrows - 1: 49 | break 50 | x = delta.iloc[x_start:x_end, :].values 51 | x = MinMaxScaler().fit_transform(x) * 255 52 | X.append(x.astype('int')) 53 | y.append((df.iloc[y_end, :].close - df.iloc[y_start, :].close) / df.iloc[y_start, :].close) 54 | i += 1 55 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) 56 | X_train, X_test = np.expand_dims(X_train, -1), np.expand_dims(X_test, -1) 57 | y_train, y_test = np.array(y_train) >= 0, np.array(y_test) >= 0 58 | return X_train, X_test, y_train * 1.0, y_test * 1.0 59 | -------------------------------------------------------------------------------- /kernel/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from tensorflow.keras.layers import Input, Dense, Convolution2D, MaxPooling2D, UpSampling2D, Flatten, Reshape, Dropout, Conv2D 4 | from tensorflow.keras.models import Model 5 | from tensorflow.keras import backend as K 6 | from sklearn.metrics import classification_report, mean_squared_error 7 | from sklearn.model_selection import RandomizedSearchCV 8 | from scipy.stats import randint as sp_randint 9 | 10 | 11 | class NetModel: 12 | def __init__(self, X_train, X_test, y_train, y_test): 13 | self.X_train, self.X_test = X_train, X_test 14 | self.y_train, self.y_test = y_train, y_test 15 | self.dims = X_train.shape 16 | 17 | def build_net(self, conv_window=(6, 3), pooling_window=(10, 1), n_filters=(64, 32, 16)): 18 | 19 | input_img = Input(shape=self.dims[1:]) # adapt this if using `channels_first` image data format 20 | print("shape of input", K.int_shape(input_img)) 21 | conv_1 = Conv2D(n_filters[0], conv_window, activation='relu', padding='same')(input_img) 22 | print("shape after first conv", K.int_shape(conv_1)) 23 | pool_1 = MaxPooling2D(pooling_window, padding='same')(conv_1) 24 | print("shape after first pooling", K.int_shape(pool_1)) 25 | conv_2 = Conv2D(n_filters[1], conv_window, activation='relu', padding='same')(pool_1) 26 | print("shape after second conv", K.int_shape(conv_2)) 27 | 28 | pool_2 = MaxPooling2D(pooling_window, padding='same')(conv_2) 29 | print("shape after second pooling", K.int_shape(pool_2)) 30 | 31 | conv_3 = Conv2D(n_filters[2], conv_window, activation='relu', padding='same')(pool_2) 32 | print("shape after third conv", K.int_shape(conv_3)) 33 | 34 | encoded = MaxPooling2D(pooling_window, padding='same')(conv_3) 35 | print("shape of encoded", K.int_shape(encoded)) 36 | 37 | up_3 = UpSampling2D(pooling_window)(encoded) 38 | print("shape after upsample third pooling", K.int_shape(up_3)) 39 | 40 | conv_neg_3 = Conv2D(n_filters[2], conv_window, activation='relu', padding='same')(up_3) 41 | print("shape after decode third conv", K.int_shape(conv_neg_3)) 42 | 43 | up_2 = UpSampling2D(pooling_window)(conv_neg_3) 44 | print("shape after upsample second pooling", K.int_shape(up_2)) 45 | 46 | conv_neg_2 = Conv2D(n_filters[1], conv_window, activation='relu', padding='same')(up_2) 47 | print("shape after decode second conv", K.int_shape(conv_neg_2)) 48 | up_1 = UpSampling2D(pooling_window)(conv_neg_2) 49 | print("shape after upsample first pooling", K.int_shape(up_1)) 50 | conv_neg_3 = Conv2D(n_filters[0], conv_window, activation='relu', padding='same')(up_1) 51 | print("shape after decode first conv", K.int_shape(conv_neg_3)) 52 | decoded = Conv2D(1, conv_window, activation='linear', padding='same')(conv_neg_3) 53 | print("shape after decode to input", K.int_shape(decoded)) 54 | 55 | self.autoencoder = Model(input_img, decoded) 56 | self.autoencoder.compile(optimizer='adam', loss='mean_squared_error') 57 | self.encoder_model = Model(self.autoencoder.input, self.autoencoder.layers[6].output) 58 | 59 | def train_encoder(self, n_epochs=100, batch_size=64): 60 | self.autoencoder.fit(self.X_train, self.X_train, epochs=n_epochs, 61 | batch_size=batch_size, shuffle=True) 62 | 63 | def get_encoded_series(self): 64 | self.reconstructed_train = self.autoencoder.predict(self.X_train) 65 | self.reconstructed_test = self.autoencoder.predict(self.X_test) 66 | self.lf_train = self.flatten_arr(self.encoder_model.predict(self.X_train)) 67 | self.lf_test = self.flatten_arr(self.encoder_model.predict(self.X_test)) 68 | self.train_features = self.merge_features(self.reconstructed_train, self.X_train, self.lf_train) 69 | self.test_features = self.merge_features(self.reconstructed_test, self.X_test, self.lf_test) 70 | 71 | @staticmethod 72 | def merge_features(X_, X, lf): 73 | recon_loss = [mean_squared_error(X_[i][:, :, 0], X[i][:, :, 0]) for i in range(len(X))] 74 | keys = [f'feature_{i}' for i in range(lf.shape[1])] 75 | vals = lf.T 76 | df = pd.DataFrame(dict(list(zip(keys, vals)))) 77 | df['recon_loss'] = recon_loss 78 | return df 79 | 80 | @staticmethod 81 | def flatten_arr(arr): 82 | flat = [] 83 | for a in arr: 84 | flat.append(a.reshape(-1,)) 85 | return np.array(flat) 86 | 87 | def train_classifier(self, model='xgb', n_search=10): 88 | if model == 'rf': 89 | from sklearn.ensemble import RandomForestClassifier 90 | param_grid = {"max_depth": [10, 20, 40, None], 91 | "max_features": sp_randint(1, 20), 92 | "min_samples_split": sp_randint(5, 50), 93 | "min_samples_leaf": sp_randint(5, 50), 94 | "bootstrap": [True, False], 95 | "criterion": ["gini", "entropy"]} 96 | clf = RandomForestClassifier(verbose=0, n_estimators=100) 97 | elif model == 'xgb': 98 | import xgboost as xgb 99 | param_grid = {'silent': [True], 100 | 'max_depth': [5, 10, 20], 101 | 'learning_rate': [0.001, 0.01], 102 | 'subsample': [0.2, 0.3, 0.5, 0.6, 0.9, 1.0], 103 | 'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], 104 | 'colsample_bylevel': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], 105 | 'min_child_weight': [0.5, 1.0, 3.0, 5.0, 7.0, 10.0], 106 | 'gamma': [0, 0.25, 0.5, 1.0], 107 | 'reg_lambda': [0.1, 1.0, 50.0, 100.0, 200.0], 108 | 'n_estimators': [100], 109 | 'max_features': [3, 10, None]} 110 | clf = xgb.XGBClassifier() 111 | 112 | clf_grid = RandomizedSearchCV(clf, param_distributions=param_grid, 113 | n_iter=n_seach, cv=3, iid=False) 114 | clf_grid.fit(self.train_features.values, self.y_train) 115 | self.train_acc = clf_grid.score(self.train_features.values, self.y_train) 116 | print(f'training acc: {self.train_acc}') 117 | 118 | y_pred = clf_grid.predict(self.test_features.values) 119 | print(classification_report(self.y_test, y_pred)) 120 | 121 | self.clf = clf_grid.best_estimator_ 122 | -------------------------------------------------------------------------------- /demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "from sklearn.preprocessing import MinMaxScaler\n", 12 | "from kernel import load_min_data, prepare_data, NetModel" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/html": [ 23 | "
\n", 24 | "\n", 37 | "\n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | "
highopenlowclosevolumeoi
time
2016-05-13 14:00:006456326326451480523570070
2016-05-13 14:05:006476446396402027243586682
2016-05-13 14:10:00643640636641941643578016
2016-05-13 14:15:006556406346513334523612632
2016-05-13 14:20:006576526396413008403585022
\n", 106 | "
" 107 | ], 108 | "text/plain": [ 109 | " high open low close volume oi\n", 110 | "time \n", 111 | "2016-05-13 14:00:00 645 632 632 645 148052 3570070\n", 112 | "2016-05-13 14:05:00 647 644 639 640 202724 3586682\n", 113 | "2016-05-13 14:10:00 643 640 636 641 94164 3578016\n", 114 | "2016-05-13 14:15:00 655 640 634 651 333452 3612632\n", 115 | "2016-05-13 14:20:00 657 652 639 641 300840 3585022" 116 | ] 117 | }, 118 | "execution_count": 2, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "train_end = '2018-05-13 00:00' # we use 2016 - 2018 for training\n", 125 | "df_train, df_test = load_min_data()\n", 126 | "df_train.head()" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "## Prepare Data for Model\n", 134 | "> - transform data into images consisting the past 1000 points
\n", 135 | "> - aim to predict the return after 3 periods = 15 mins
\n", 136 | "> - next split training data into training set and validation set (for model selection)
" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 3, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "training set size: 8682, array shape: (8682, 1000, 6, 1)\n", 149 | "validation set size: 3721, and array shape: (3721, 1000, 6, 1)\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "time_window = 1000\n", 155 | "forecast_period = 3\n", 156 | "X_train, X_valid, y_train, y_valid = prepare_data(df_train, time_window, forecast_period)\n", 157 | "print(f'training set size: {X_train.shape[0]}, array shape: {X_train.shape}')\n", 158 | "print(f'validation set size: {X_valid.shape[0]}, and array shape: {X_valid.shape}')" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "## Build Convolution AutoEncoder Network\n", 166 | "> The reommanded number of filters is (64, 32, 16).
\n", 167 | "> If you don't have a GPU-accelerated machine, use small number of filters such as (16, 8, 8). Or try google colab." 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 4, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "shape of input (None, 1000, 6, 1)\n", 180 | "shape after first conv (None, 1000, 6, 32)\n", 181 | "shape after first pooling (None, 200, 6, 32)\n", 182 | "shape after second conv (None, 200, 6, 16)\n", 183 | "shape after second pooling (None, 40, 6, 16)\n", 184 | "shape after third conv (None, 40, 6, 8)\n", 185 | "shape of encoded (None, 8, 6, 8)\n", 186 | "shape after upsample third pooling (None, 40, 6, 8)\n", 187 | "shape after decode third conv (None, 40, 6, 8)\n", 188 | "shape after upsample second pooling (None, 200, 6, 8)\n", 189 | "shape after decode second conv (None, 200, 6, 16)\n", 190 | "shape after upsample first pooling (None, 1000, 6, 16)\n", 191 | "shape after decode first conv (None, 1000, 6, 32)\n", 192 | "shape after decode to input (None, 1000, 6, 1)\n" 193 | ] 194 | } 195 | ], 196 | "source": [ 197 | "net = NetModel(X_train, X_valid, y_train, y_valid)\n", 198 | "conv_window=(3, 3)\n", 199 | "pooling_window=(5, 1)\n", 200 | "n_filters=(32, 16, 8)\n", 201 | "net.build_net(conv_window, pooling_window, n_filters)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "## Train AutoEncoder and Examine the Reconstruction" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 5, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "Epoch 1/3\n", 221 | "8682/8682 [==============================] - 174s 20ms/step - loss: 1472.3104\n", 222 | "Epoch 2/3\n", 223 | "8682/8682 [==============================] - 206s 24ms/step - loss: 377.1670\n", 224 | "Epoch 3/3\n", 225 | "8682/8682 [==============================] - 197s 23ms/step - loss: 355.8918\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "epoches = 3\n", 231 | "batch_size = 64\n", 232 | "net.train_encoder(epoches, batch_size)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 6, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAC7CAYAAABFJnSnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAACu5JREFUeJzt3W1sXnUZx/Hfb33Y2jKKyMbGOkXCQ5gikDSLsoTINLoBgUTfAIoxYvpGkmEwCC994VvlDYlZgEgCspAACSE8JjoRQaDjSWYBF4JsDB1zMMY2t7a7fNF2uTfu0tPS/3161e8nadZ7OzvXdbqrv/13ds65HRECAOSxoO4GAADTQ3ADQDIENwAkQ3ADQDIENwAkQ3ADQDIENwAkQ3ADQDIENwAkQ3ADQDLtRXba1ROdi08useuj4sTRovufcMqij4vX2Lu1yB/DJxz6QnfxGh37XLzGof17NHxof/lCx2nv7Y7Opb1Fa5zZtbvo/ie8sWdZ8RoL391fvIYknfHV8t+jC1R+3N7ZPqLde0YrFSqSGJ2LT9Y53/tZiV0fNbz+w6L7n/Djs54tXuPRL59UvIYkvXnL6uI1Tttc/h9xf3vi1uI1mulc2quzf3Nd0RoPXXBH0f1PuOTest+fknTGLc8XryFJmx75c/EaC11+cXXx+n9V3pZTJQCQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMlUCm7b62y/YXub7ZtLNwUAmNyUwW27TdJtktZLWiXpaturSjcGAGiuyop7taRtEfFWRByWtEnSlWXbAgBMpkpwr5C0veH1jvGfAwDUoEpwN7t3Pj6xkT1ge9D24MjB1jyjACjtmLnee6DudgBJ1YJ7h6SVDa/7JO08fqOI2BgR/RHR397VM1v9AbU6Zq57yz+kC6iiSnC/IOks21+y3SnpKkkPlW0LADCZKR95FREjtq+X9LikNkl3RsTW4p0BAJqq9KzCiHhE0iOFewEAVMCdkwCQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMkQ3ACQTJH3nO/4z0EtvfvVErs+6s1zzyu6/wlDpy0vXqPt7NOK15Ck71/0bPEaTz3x9eI1FJ94VA7wf4UVNwAkQ3ADQDIENwAkQ3ADQDIENwAkQ3ADQDIENwAkQ3ADQDIENwAkM2Vw277T9i7br7WiIQDAp6uy4v6dpHWF+wAAVDRlcEfEU5L2tKAXAEAFnOMGgGRmLbhtD9getD14OP47W7sFatU41yN7D9TdDiBpFoM7IjZGRH9E9Hd60WztFqhV41y393bX3Q4giVMlAJBOlcsB75X0rKRzbO+wfV35tgAAk5nyHXAi4upWNAIAqIZTJQCQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMkQ3ACQzJTXcc/E4VO6tPOa80vs+qgji0aK7n/Ca3uWF6+x97unFq8hSfrgYPESH55RZKSOMfqMi9cA5jJW3ACQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMlUeQeclbb/aHvI9lbbG1rRGACguSq3uY1IujEiXrS9WNIW209GxN8L9wYAaGLKFXdEvBcRL45/vk/SkKQVpRsDADQ3rXPctk+XdKGk50o0AwCYWuXgtn2CpPsl3RARHzX59QHbg7YHRw/un80egdo0zvXI3gN1twNIqhjctjs0Ftr3RMQDzbaJiI0R0R8R/W1dPbPZI1Cbxrlu7+2uux1AUrWrSizpDklDEfHr8i0BAD5NlRX3GknXSlpr++Xxj0sL9wUAmMSUlwNGxNOSeHI9AMwR3DkJAMkQ3ACQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMlUeazrtHXsP6JTny/7vJJrfvJM0f1P+MXn/1G8xmU/+k7xGpK0f21X8Rp9j+0uXmP73pHiNYC5jBU3ACRDcANAMgQ3ACRDcANAMgQ3ACRDcANAMgQ3ACRDcANAMlXeumyR7edtv2J7q+1ftqIxAEBzVe6cPCRpbUR8PP6mwU/bfjQi/lq4NwBAE1XeuiwkfTz+smP8I0o2BQCYXKVz3LbbbL8saZekJyPiubJtAQAmUym4I2I0Ii6Q1Cdpte2vHL+N7QHbg7YHh4fLPmAKaJXGuR7Ze6DudgBJ07yqJCI+lLRZ0romv7YxIvojor+jo2eW2gPq1TjX7b3ddbcDSKp2VckS2yeNf94l6VuSXi/dGACguSpXlSyXdJftNo0F/X0R8XDZtgAAk6lyVcmrki5sQS8AgAq4cxIAkiG4ASAZghsAkiG4ASAZghsAkiG4ASAZghsAkiG4ASCZKndOTlsssEa7iuz6qN/+5ZKi+5/wynl9xWscPnN58RqS9M7OtuI1Tr6os3iN4X+Xna3JRFijR8qudQ616IHJPtKaOq2w0OXn4VCMFK9xZBpPy2bFDQDJENwAkAzBDQDJENwAkAzBDQDJENwAkAzBDQDJENwAkEzl4LbdZvsl27xtGQDUaDor7g2Shko1AgCoplJw2+6TdJmk28u2AwCYStUV962SbpI0j55wAAA5TRncti+XtCsitkyx3YDtQduDw8P7Z61BoE6Ncz360YG62wEkVVtxr5F0he23JW2StNb23cdvFBEbI6I/Ivo7OnpmuU2gHo1z3XZid93tAJIqBHdE3BIRfRFxuqSrJP0hIn5QvDMAQFNcxw0AyUzrCeQRsVnS5iKdAAAqYcUNAMkQ3ACQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMlM6zruqoYXWzsvXlhi10ctWfl+0f1PGFj2p+I1fqUfFq8hScuXfVC8xoHOZcVryOVL1GVhi46t7VALCh0ZLV9DUveCzvJFWvB4vQXTGGxW3ACQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMkQ3ACQDMENAMlUugFn/P0m90kalTQSEf0lmwIATG46d05eEhG7i3UCAKiEUyUAkEzV4A5JT9jeYnugZEMAgE9X9VTJmojYaXuppCdtvx4RTzVuMB7oA5LU3vu5WW4TqEfjXHcs6a25G2BMpRV3ROwc/3GXpAclrW6yzcaI6I+I/raentntEqjJMXN9Ynfd7QCSKgS37R7biyc+l/RtSa+VbgwA0FyVUyWnSnrQ9sT2v4+Ix4p2BQCY1JTBHRFvSTq/Bb0AACrgckAASIbgBoBkCG4ASIbgBoBkCG4ASIbgBoBkCG4ASIbgBoBkHBGzv1P7fUn/nMZvOUXSfHnWN8dS3hcjYkmri85grqW5+zWciflyLHP1OCrPdZHgni7bg/PlXXU4FjSaT1/D+XIs8+E4OFUCAMkQ3ACQzFwJ7o11NzCLOBY0mk9fw/lyLOmPY06c4wYAVDdXVtwAgIpqD27b62y/YXub7Zvr7membK+0/UfbQ7a32t5Qd0+fhe022y/ZfrjuXjJirueu+TDbtQa37TZJt0laL2mVpKttr6qzp89gRNKNEXGupK9J+mniY5GkDZKG6m4iI+Z6zks/23WvuFdL2hYRb0XEYUmbJF1Zc08zEhHvRcSL45/v09hgrKi3q5mx3SfpMkm3191LUsz1HDVfZrvu4F4haXvD6x1KPBQTbJ8u6UJJz9XbyYzdKukmSUfqbiQp5nrumhezXXdwu8nPpb7MxfYJku6XdENEfFR3P9Nl+3JJuyJiS929JMZcz0HzabbrDu4dklY2vO6TtLOmXj4z2x0aG+57IuKBuvuZoTWSrrD9tsb+ib/W9t31tpQOcz03zZvZrvU6btvtkt6U9E1J70p6QdI1EbG1tqZmyLYl3SVpT0TcUHc/s8H2NyT9PCIur7uXTJjruS/7bNe64o6IEUnXS3pcY//pcV/G4R63RtK1Gvtb/OXxj0vrbgqtx1yjNO6cBIBk6j7HDQCYJoIbAJIhuAEgGYIbAJIhuAEgGYIbAJIhuAEgGYIbAJL5H26s+wya/iE2AAAAAElFTkSuQmCC\n", 243 | "text/plain": [ 244 | "
" 245 | ] 246 | }, 247 | "metadata": { 248 | "needs_background": "light" 249 | }, 250 | "output_type": "display_data" 251 | } 252 | ], 253 | "source": [ 254 | "net.get_encoded_series()\n", 255 | "img = X_train[0][:, :, 0].copy()\n", 256 | "img_c = net.reconstructed_train[0][:, :, 0].copy()\n", 257 | "img_c = (MinMaxScaler().fit_transform(img_c) * 255).astype('int')\n", 258 | "\n", 259 | "b=20\n", 260 | "f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)\n", 261 | "ax1.imshow(img[6*(b-1):6*b, :])\n", 262 | "ax2.imshow(img_c[6*(b-1):6*b, :])\n", 263 | "plt.show()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "## Train Asset Return Predictor\n", 271 | "- 'rf' means random forest classifier\n", 272 | "- 'xgb' is XGBoost classifier\n", 273 | "- 'n_search' is the number of random grid-search to perform" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 10, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "name": "stdout", 283 | "output_type": "stream", 284 | "text": [ 285 | "training acc: 0.6446671273900023\n", 286 | " precision recall f1-score support\n", 287 | "\n", 288 | " 0.0 0.49 0.15 0.23 1697\n", 289 | " 1.0 0.55 0.87 0.67 2024\n", 290 | "\n", 291 | " micro avg 0.54 0.54 0.54 3721\n", 292 | " macro avg 0.52 0.51 0.45 3721\n", 293 | "weighted avg 0.52 0.54 0.47 3721\n", 294 | "\n", 295 | "145.05216813087463\n" 296 | ] 297 | } 298 | ], 299 | "source": [ 300 | "import time\n", 301 | "start_time = time.time()\n", 302 | "net.train_classifier('rf', n_search=10)\n", 303 | "print(time.time() - start_time)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "## Applying Strategy" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [] 319 | } 320 | ], 321 | "metadata": { 322 | "kernelspec": { 323 | "display_name": "Python 3", 324 | "language": "python", 325 | "name": "python3" 326 | }, 327 | "language_info": { 328 | "codemirror_mode": { 329 | "name": "ipython", 330 | "version": 3 331 | }, 332 | "file_extension": ".py", 333 | "mimetype": "text/x-python", 334 | "name": "python", 335 | "nbconvert_exporter": "python", 336 | "pygments_lexer": "ipython3", 337 | "version": "3.6.7" 338 | } 339 | }, 340 | "nbformat": 4, 341 | "nbformat_minor": 2 342 | } 343 | --------------------------------------------------------------------------------