├── AutoBet pipeline.pdf ├── output └── evaluation.png ├── README.md └── src ├── process_data.py ├── evaluation.py ├── betting_algorithm.py ├── rating_scraper.ipynb ├── xgb_gridsearch.ipynb └── machine_learning.ipynb /AutoBet pipeline.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbrojas/autobet/HEAD/AutoBet pipeline.pdf -------------------------------------------------------------------------------- /output/evaluation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbrojas/autobet/HEAD/output/evaluation.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # auto_betting 2 | 3 | Project for soccer outcome predictive modelling and algorithmic betting. 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/process_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import sqlite3 4 | import math 5 | 6 | # set database path 7 | database_path = "../../data/database.sqlite" 8 | 9 | # helper functions 10 | def get_rating(player_id): 11 | if math.isnan(player_id): 12 | return np.nan 13 | return player_lvl['overall_rating'][player_id] 14 | 15 | def get_outcome(home, away): 16 | if home > away: 17 | return 0 18 | elif home == away: 19 | return 1 20 | else: 21 | return 2 22 | 23 | # open connection with database and get relevant data 24 | with sqlite3.connect(database_path) as con: 25 | sql = ''' 26 | SELECT * 27 | FROM match 28 | ''' 29 | df_match = pd.read_sql_query(sql, con) 30 | 31 | with sqlite3.connect(database_path) as con: 32 | sql = ''' 33 | SELECT player_api_id, overall_rating 34 | FROM player_attributes 35 | ''' 36 | player_lvl = pd.read_sql_query(sql, con) 37 | 38 | # select relevant variables 39 | df = df_match[['stage', 'home_team_goal', 'away_team_goal','B365H','B365D','B365A','BWH', 40 | 'BWD','BWA','IWH','IWD','IWA','LBH','LBD','LBA','WHH','WHD','WHA','SJH','SJD','SJA', 41 | 'VCH','VCD','VCA','GBH','GBD','GBA']] 42 | df_lvls = df_match[['home_player_1','home_player_2','home_player_3','home_player_4','home_player_5', 43 | 'home_player_6','home_player_7','home_player_8','home_player_9','home_player_10', 44 | 'home_player_11','away_player_1','away_player_2','away_player_3','away_player_4', 45 | 'away_player_5','away_player_6','away_player_7','away_player_8','away_player_9', 46 | 'away_player_10','away_player_11']] 47 | 48 | # get player skill-levels 49 | player_lvl = player_lvl.groupby('player_api_id').mean().to_dict() 50 | df = pd.concat([df, df_lvls.applymap(lambda x: get_rating(x))], axis=1) 51 | 52 | # get target variable 53 | df['target'] = df.apply(lambda x: get_outcome(x['home_team_goal'], x['away_team_goal']), axis=1) 54 | df = df.drop(['home_team_goal', 'away_team_goal'], axis=1) 55 | 56 | # drop nans 57 | df.dropna(inplace=True) 58 | 59 | # save processed data 60 | df.to_csv('../../data/processed.csv', index=False) 61 | -------------------------------------------------------------------------------- /src/evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | from matplotlib import gridspec 5 | 6 | ''' Evaluation plot 7 | 8 | This plot evaluates the strategy on the backtest data. Three plots are made: (1) a cumulative performance 9 | plot, (2) a net returns plot and (3) a transactions plot. Plot 1 shows the cumulative profit made on the 10 | backtest matches. Plot 2 shows the profit per match. Plot 3 shows the bets placed on the matches. 11 | 12 | ''' 13 | 14 | def evaluate(x): 15 | 16 | # define plotting data 17 | flow = np.cumsum(x.winnings - x.stake) 18 | profit = x.winnings-x.stake 19 | size = range(len(flow)) 20 | zero = np.zeros(len(flow)) 21 | 22 | # define figures 23 | fig = plt.figure(figsize=(24, 12)) 24 | gs = gridspec.GridSpec(3, 1, height_ratios=[2, 1, 1]) 25 | 26 | # plot cumulative profit 27 | ax0 = plt.subplot(gs[0]) 28 | ax0.grid(color='grey', linestyle='-', linewidth=1, alpha=0.5) 29 | ax0.plot(size, flow, '#4572a7', linewidth = 2) 30 | ax0.plot(size, zero, '#000000', linewidth = 1) 31 | ax0.fill_between(size, flow, zero, where=zero >= flow, facecolor='#dbe3ee', interpolate=True) 32 | ax0.fill_between(size, flow, zero, where=zero <= flow, facecolor='#dbe3ee', interpolate=True) 33 | ax0.set_title('Cumulative Performance: {}% of €{} invested'.format( 34 | format(((sum(x.winnings) / sum(x.stake))-1)*100, '.2f'), 35 | format(sum(x.stake), '.2f')), loc='left', fontsize=15) 36 | 37 | # plot returns 38 | ax1 = plt.subplot(gs[1]) 39 | ax1.grid(color='grey', linestyle='-', linewidth=1, alpha=0.5) 40 | ax1.set_ylim((min(profit), max(profit))) 41 | ax1.bar(size, np.clip(profit, 0, max(profit)), 2, color='#00998f') 42 | ax1.bar(size, np.clip(profit, min(profit), 0), 2, color='#f66a83') 43 | ax1.plot(size, zero, color='#000000', linewidth=1) 44 | ax1.set_title('Returns: {} wins / {} losses'.format( 45 | format(np.sum(profit > 0)), 46 | format(np.sum(profit < 0))), loc='left', fontsize=15) 47 | 48 | # plot transactions 49 | ax2 = plt.subplot(gs[2]) 50 | ax2.grid(color='grey', linestyle='-', linewidth=1, alpha=0.5) 51 | ax2.bar(size, x.stake, 2, color='#4572a7') 52 | ax2.set_title('Transactions', loc='left', fontsize=15) 53 | 54 | # save figure 55 | plt.savefig('../output/evaluation.png', bbox_inches='tight') 56 | 57 | # load backtest results 58 | results = pd.read_csv('../../data/backtest_results.csv') 59 | 60 | # evaluate results 61 | evaluate(results) 62 | -------------------------------------------------------------------------------- /src/betting_algorithm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | ''' Betting strategy #1: 5 | 6 | ## STRATEGY ## 7 | 8 | This betting strategy compares the confidence of soccer match outcomes from 8 different betting houses 9 | with a machine learning model. We select the betting house who underestimates the winning team performance 10 | the most. A bet is placed whose size is proportional to the magnitude of the underestimation. 11 | 12 | ## EXAMPLE ## 13 | 14 | Real Madrid vs. Levante 15 | B365 odds: 1.45 / 3.40 / 6.15 16 | BWIN odds: 1.35 / 4.10 / 5.90 17 | ML odds: 1.10 / 3.90 / 7.15 18 | 19 | All predictors think Real Madrid is going to win. However, the ML model is more certain than the houses. In 20 | this case, B365 underestimates Real Madrid the most. The algorithm selects this betting house and bets 21 | (1.45/1.1+risk_adj)^2 = $1.32 on Real Madrid (1.45), where risk_adj is a tunable parameter to control the 22 | amount of risk. 23 | ''' 24 | 25 | def strategy_1(x, true, preds, risk_adj): 26 | 27 | # function to determine magnitude of underestimation 28 | def _magnitude(odds, preds, risk_adj): 29 | x = np.array([]).reshape(len(odds), 0) 30 | for house in set([h[:-1] for h in odds.columns]): 31 | filter_col = [col for col in odds.columns if col.startswith(house)] 32 | x = np.concatenate([x, (np.array(odds[filter_col]) / (np.array(preds) + risk_adj))**2], axis=1) 33 | return pd.DataFrame(x, columns=odds.columns) 34 | 35 | # function to select optimal betting house 36 | def _selection(x, preds, true): 37 | idxs = [[np.argmin(p) + (3 * i) for i in range(8)] for p in preds.values] 38 | action = [] 39 | for i in range(len(x)): 40 | best = np.max(x.iloc[i, idxs[i]]) 41 | if best > 1: 42 | action.append((np.argmax(x.iloc[i, idxs[i]]), best, idxs[i][0])) 43 | else: 44 | action.append((np.nan, np.nan, np.nan)) 45 | res = pd.concat([pd.DataFrame(action), pd.Series(true)], axis=1) 46 | res.columns = ['house', 'stake', 'pred', 'true'] 47 | return res 48 | 49 | # apply strategy 1 50 | results = _selection(_magnitude(x, preds, risk_adj), preds, true) 51 | 52 | # get the odds for the winning bets 53 | win_odds = [] 54 | for res in results[results.pred == results.true].reset_index().values: 55 | win_odds.append(x.loc[res[0], res[1]]) 56 | win_odds = pd.DataFrame({'index': results[results.pred == results.true].reset_index()['index'], 57 | 'odd': pd.Series(win_odds)}) 58 | 59 | # merge winning odds and payoff with results 60 | results = pd.merge(results.reset_index(), win_odds, how='outer', on='index') 61 | results = results.fillna(0) 62 | results = pd.concat([results, results.stake * results.odd], axis=1) 63 | results.columns = ['match', 'house', 'stake', 'pred', 'true', 'odd', 'winnings'] 64 | 65 | return results 66 | 67 | # load backtest data 68 | x_bt = pd.read_csv('../../data/stack_backtest.csv') 69 | y_bt = x_bt.target 70 | x_bt = x_bt.drop(['target'], axis=1) 71 | 72 | # run backtest 73 | odds = x_bt.iloc[:, :24] 74 | preds = x_bt.iloc[:,-3:] 75 | results = strategy_1(odds, y_bt, preds, risk_adj=0.9) 76 | results.to_csv('../../data/backtest_results.csv', index=False) 77 | -------------------------------------------------------------------------------- /src/rating_scraper.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# AutoBet Rating Scraper\n", 8 | "\n", 9 | "This notebook will scrape the FIFA index (https://www.fifaindex.com/) for real-time player and club performance data. The main purpose for this index is to adjust the player/club abilities in the game FIFA '17 to match the performance of their real-life counterpart. Although it's unclear how FIFA determines these values, they might be useful features for the AutoBet classifier." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from lxml import html\n", 19 | "from tqdm import *\n", 20 | "import pandas as pd\n", 21 | "import requests" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Player information\n", 29 | "#### *Hyperlink extraction*\n", 30 | "\n", 31 | "First, we need to scrape the website for the hyperlinks referencing to all players in their database. These links are saved in the list *links* and stored to csv for backup." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 86, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# Set base webpage\n", 41 | "base = 'https://www.fifaindex.com/players/'\n", 42 | "\n", 43 | "# Create html tree\n", 44 | "page = requests.get(base)\n", 45 | "tree = html.fromstring(page.content)\n", 46 | "\n", 47 | "# Get player hyperlinks\n", 48 | "links = list(set([link for link in tree.xpath('//*[@id=\"no-more-tables\"]/table/tbody/tr/td/a[@title]/@href') if link.startswith('/player/')]))\n", 49 | "\n", 50 | "# Repeat for all pages (587 pages in total)\n", 51 | "for i in range(2, 587):\n", 52 | " base = base[:34] + str(i)+'/'\n", 53 | " page = requests.get(base)\n", 54 | " tree = html.fromstring(page.content)\n", 55 | " \n", 56 | " player_links = list(set([link for link in tree.xpath('//*[@id=\"no-more-tables\"]/table/tbody/tr/td/a[@title]/@href') if link.startswith('/player/')]))\n", 57 | " \n", 58 | " for link in player_links:\n", 59 | " links.append(link)\n", 60 | " \n", 61 | "pd.Series(links).to_csv('../data/hyperlinks.csv', index=False)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": { 68 | "collapsed": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "links = list(pd.read_csv('../data/hyperlinks.csv'))" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "#### *Feature scraping*\n", 80 | "\n", 81 | "Now that we have all the hyperlinks, we can extract the features from them. These are stored in a pandas DataFrame, where the players are indexed in the rows and features stored over the columns. With my network speed, the downloading will take about 1.5 hours." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "page = requests.get('https://www.fifaindex.com' + links[0])\n", 93 | "tree = html.fromstring(page.content)\n", 94 | "\n", 95 | "# Get feature names\n", 96 | "features = []\n", 97 | "for element in tree.find_class('pull-right'):\n", 98 | " try:\n", 99 | " features.append(element.getparent().text_content()[:-3])\n", 100 | " except:\n", 101 | " continue \n", 102 | "feature = features[16:]\n", 103 | "feature.insert(0, 'Overall_2')\n", 104 | "feature.insert(0, 'Overall_1')\n", 105 | "feature.append('Name')\n", 106 | "\n", 107 | "# Scrape player features and create DataFrame\n", 108 | "data = []\n", 109 | "for hyperlink in tqdm(links): \n", 110 | " page = requests.get('https://www.fifaindex.com' + hyperlink)\n", 111 | " tree = html.fromstring(page.content)\n", 112 | " features = [int(element.text_content()) for element in tree.find_class('label rating')]\n", 113 | " features.append(tree.find_class('panel-title')[0].text_content()[:-6])\n", 114 | " data.append(features)\n", 115 | "\n", 116 | "df = pd.DataFrame(data, columns=feature)\n", 117 | "df.to_csv('../data/player_features.csv', index=False, encoding='utf8')" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## Club information\n", 125 | "\n", 126 | "We will do the same thing for clubs" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "#### *Hyperlink extraction*" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 25, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "# Set base webpage\n", 143 | "base = 'https://www.fifaindex.com/teams/'\n", 144 | "\n", 145 | "# Create html tree\n", 146 | "page = requests.get(base)\n", 147 | "tree = html.fromstring(page.content)\n", 148 | "\n", 149 | "# Get player hyperlinks\n", 150 | "links = list(set([link for link in tree.xpath('//*[@id=\"no-more-tables\"]/table/tbody/tr/td/a[@title]/@href') if link.startswith('/team/')]))\n", 151 | "\n", 152 | "# Repeat for all pages (587 pages in total)\n", 153 | "for i in range(2, 23):\n", 154 | " base = base[:32] + str(i)+'/'\n", 155 | " page = requests.get(base)\n", 156 | " tree = html.fromstring(page.content)\n", 157 | " \n", 158 | " team_links = list(set([link for link in tree.xpath('//*[@id=\"no-more-tables\"]/table/tbody/tr/td/a[@title]/@href') if link.startswith('/team/')]))\n", 159 | " \n", 160 | " for link in team_links:\n", 161 | " links.append(link)\n", 162 | " \n", 163 | "pd.Series(links).to_csv('./team_hyperlinks.csv', index=False)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "links = list(pd.read_csv('./team_hyperlinks.csv'))" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "#### *Feature scraping*" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 119, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "page = requests.get('https://www.fifaindex.com' + links[0])\n", 191 | "tree = html.fromstring(page.content)\n", 192 | "\n", 193 | "# Get feature names\n", 194 | "features = []\n", 195 | "for element in tree.find_class('pull-right'):\n", 196 | " try:\n", 197 | " features.append(element.getparent().text_content()[:-3])\n", 198 | " except:\n", 199 | " continue \n", 200 | "feature = features[2:14]\n", 201 | "feature.insert(len(feature), 'Club')\n", 202 | "\n", 203 | "# Scrape club features and create DataFrame\n", 204 | "data = []\n", 205 | "for hyperlink in links:\n", 206 | " page = requests.get('https://www.fifaindex.com' + hyperlink)\n", 207 | " tree = html.fromstring(page.content)\n", 208 | " features = [e.text_content() for e in tree.find_class('pull-right')][2:14] \n", 209 | " features.append(tree.find_class('team normal')[0].items()[1][1])\n", 210 | " data.append(features)\n", 211 | " \n", 212 | "df = pd.DataFrame(data, columns=feature)\n", 213 | "df.to_csv('./team_features.csv', index=False, encoding='utf8')" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "## Save to SQL\n", 221 | "\n", 222 | "All dataframes are stored in .sqlite for later usage" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 9, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "name": "stderr", 232 | "output_type": "stream", 233 | "text": [ 234 | "/home/daniel/miniconda2/lib/python2.7/site-packages/ipykernel/__main__.py:11: UserWarning: The spaces in these column names will not be changed. In pandas versions < 0.14, spaces were converted to underscores.\n", 235 | "/home/daniel/miniconda2/lib/python2.7/site-packages/ipykernel/__main__.py:13: UserWarning: The spaces in these column names will not be changed. In pandas versions < 0.14, spaces were converted to underscores.\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "import sqlite3\n", 241 | "from pandas.io import sql\n", 242 | "\n", 243 | "tf = pd.read_csv('../data/team_features.csv', encoding='latin-1')\n", 244 | "tl = pd.read_csv('../data/team_hyperlinks.csv', encoding='latin-1')\n", 245 | "pf = pd.read_csv('../data/player_features.csv', encoding='latin-1')\n", 246 | "pl = pd.read_csv('../data/player_hyperlinks.csv', encoding='latin-1')\n", 247 | "\n", 248 | "db = sqlite3.connect('../data/features.sqlite')\n", 249 | "\n", 250 | "sql.to_sql(tf, name='team_features', con=db, index=False)\n", 251 | "sql.to_sql(tl, name='team_links', con=db, index=False)\n", 252 | "sql.to_sql(pf, name='player_features', con=db, index=False)\n", 253 | "sql.to_sql(pl, name='player_links', con=db, index=False)\n", 254 | "\n", 255 | "db.commit()" 256 | ] 257 | } 258 | ], 259 | "metadata": { 260 | "anaconda-cloud": {}, 261 | "kernelspec": { 262 | "display_name": "Python 3", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.5.3" 277 | } 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 2 281 | } 282 | -------------------------------------------------------------------------------- /src/xgb_gridsearch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import xgboost as xgb\n", 12 | "from sklearn.model_selection import GridSearchCV\n", 13 | "\n", 14 | "''' Function to perform gridsearch parameter tuning on xgboost algorithm.\n", 15 | "\n", 16 | "# Arguments:\n", 17 | " x_train: ndarray, the trainset features\n", 18 | " y_train: array, the trainset labels\n", 19 | " params: dict, initial xgboost parameters\n", 20 | " tune_params: dict, parameters to tune with value grid\n", 21 | " \n", 22 | "# Returns:\n", 23 | " gsearch: dict, optimal values for tune_params parameters\n", 24 | "'''\n", 25 | "\n", 26 | "def gridsearch(X, y, params, tune_params):\n", 27 | "\n", 28 | " model = xgb.XGBClassifier(learning_rate = params['learning_rate'], n_estimators = params['n_estimator']\n", 29 | " , max_depth = params['max_depth'], min_child_weight = params['min_child_weight']\n", 30 | " , gamma = params['gamma'], subsample = params['subsample']\n", 31 | " , colsample_bytree = params['colsample_bytree'], objective = params['objective']\n", 32 | " , scale_pos_weight = params['scale_pos_weight'], seed = params['seed'])\n", 33 | "\n", 34 | " gsearch = GridSearchCV(estimator=model, param_grid=tune_params, scoring=params['scoring']\n", 35 | " , n_jobs=1, iid=False, verbose=1)\n", 36 | "\n", 37 | " gsearch.fit(X, y)\n", 38 | " print(gsearch.best_params_)\n", 39 | "\n", 40 | " return gsearch.best_params_\n", 41 | "\n", 42 | "def update(base, new):\n", 43 | " for par in new.keys():\n", 44 | " base[par] = new[par]\n", 45 | " return base" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "import pandas as pd\n", 55 | "from sklearn.model_selection import train_test_split\n", 56 | "\n", 57 | "df = pd.read_csv('../../data/processed_stack.csv')\n", 58 | "X = df.drop('target', axis=1).values\n", 59 | "y = df['target'].values\n", 60 | "\n", 61 | "# keep 15% test data\n", 62 | "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=33)\n", 63 | "\n", 64 | "dtrain = xgb.DMatrix(x_train, label=y_train)\n", 65 | "dtest = xgb.DMatrix(x_test)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "[0]\ttrain-merror:0.425408+0.00680085\ttest-merror:0.483267+0.00892489\n", 78 | "[20]\ttrain-merror:0.352189+0.00430245\ttest-merror:0.469655+0.00364296\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "# Set initial parameters and find optimal number of boosting rounds\n", 84 | "\n", 85 | "xgb_params = {\n", 86 | " 'objective': 'multi:softmax',\n", 87 | " 'eval_metric': 'merror',\n", 88 | " 'learning_rate': 0.1,\n", 89 | " 'max_depth': 6,\n", 90 | " 'min_child_weight': 1,\n", 91 | " 'subsample': 0.7,\n", 92 | " 'colsample_bytree': 0.7,\n", 93 | " 'seed': 2017,\n", 94 | " 'silent': 1,\n", 95 | " 'num_parallel_tree': 1,\n", 96 | " 'num_class': 5\n", 97 | "}\n", 98 | "\n", 99 | "res = xgb.cv(xgb_params,\n", 100 | " dtrain,\n", 101 | " num_boost_round=750,\n", 102 | " nfold=4,\n", 103 | " seed=2017,\n", 104 | " stratified=False,\n", 105 | " early_stopping_rounds=15,\n", 106 | " verbose_eval=20,\n", 107 | " show_stdv=True,\n", 108 | " maximize=False)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 4, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "# formulate initial parameters\n", 120 | "params = {\n", 121 | " 'objective': 'multi:softmax',\n", 122 | " 'num_class': 3,\n", 123 | " 'scoring': 'accuracy',\n", 124 | " 'learning_rate': 0.1,\n", 125 | " 'max_depth': 6,\n", 126 | " 'min_child_weight': 1,\n", 127 | " 'scale_pos_weight': 1,\n", 128 | " 'subsample': 0.7,\n", 129 | " 'colsample_bytree': 0.7,\n", 130 | " 'seed': 1337,\n", 131 | " 'silent': 1,\n", 132 | " 'num_parallel_tree': 1,\n", 133 | " 'gamma': 0,\n", 134 | " 'n_estimator': res.shape[0]\n", 135 | "}" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 5, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "Fitting 3 folds for each of 12 candidates, totalling 36 fits\n" 148 | ] 149 | }, 150 | { 151 | "name": "stderr", 152 | "output_type": "stream", 153 | "text": [ 154 | "[Parallel(n_jobs=1)]: Done 36 out of 36 | elapsed: 16.5s finished\n" 155 | ] 156 | }, 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "{'max_depth': 3, 'min_child_weight': 5}\n", 162 | "Fitting 3 folds for each of 9 candidates, totalling 27 fits\n" 163 | ] 164 | }, 165 | { 166 | "name": "stderr", 167 | "output_type": "stream", 168 | "text": [ 169 | "[Parallel(n_jobs=1)]: Done 27 out of 27 | elapsed: 6.7s finished\n" 170 | ] 171 | }, 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "{'max_depth': 2, 'min_child_weight': 4}\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "# specify the max_depth and min_child weight grid\n", 182 | "tune_params = {\n", 183 | " 'max_depth': list(range(3,10,2)),\n", 184 | " 'min_child_weight': list(range(1,6,2))}\n", 185 | "tmp = gridsearch(x_train, y_train, params, tune_params)\n", 186 | "\n", 187 | "# narrow down grid\n", 188 | "tune_params = {\n", 189 | " 'max_depth': [tmp['max_depth'] + i for i in range(-1, 2)],\n", 190 | " 'min_child_weight': [tmp['min_child_weight'] + i for i in range(-1, 2)]}\n", 191 | "tmp = gridsearch(x_train, y_train, params, tune_params)\n", 192 | "\n", 193 | "# update base parameters with optimal values\n", 194 | "params = update(params, tmp)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 6, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n" 207 | ] 208 | }, 209 | { 210 | "name": "stderr", 211 | "output_type": "stream", 212 | "text": [ 213 | "[Parallel(n_jobs=1)]: Done 15 out of 15 | elapsed: 3.1s finished\n" 214 | ] 215 | }, 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "{'gamma': 0.0}\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "# specify gamma grid\n", 226 | "tune_params = {\n", 227 | " 'gamma':[i/10.0 for i in list(range(0,5))]\n", 228 | "}\n", 229 | "tmp = gridsearch(x_train, y_train, params, tune_params)\n", 230 | "params = update(params, tmp)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 7, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "[0]\ttrain-merror:0.463994+0.00285842\ttest-merror:0.47003+0.00779443\n", 243 | "[20]\ttrain-merror:0.457667+0.00218463\ttest-merror:0.463411+0.00755358\n" 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "# re-estimate optimal number of boosting rounds\n", 249 | "pars = ['scale_pos_weight','gamma','colsample_bytree','max_depth'\n", 250 | " ,'subsample','num_parallel_tree','min_child_weight']\n", 251 | "for par in pars:\n", 252 | " xgb_params[par] = params[par]\n", 253 | "\n", 254 | "res = xgb.cv(xgb_params,\n", 255 | " dtrain,\n", 256 | " num_boost_round=750,\n", 257 | " nfold=4,\n", 258 | " seed=2017,\n", 259 | " stratified=False,\n", 260 | " early_stopping_rounds=15,\n", 261 | " verbose_eval=20,\n", 262 | " show_stdv=True,\n", 263 | " maximize=False)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 8, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "Fitting 3 folds for each of 25 candidates, totalling 75 fits\n" 276 | ] 277 | }, 278 | { 279 | "name": "stderr", 280 | "output_type": "stream", 281 | "text": [ 282 | "[Parallel(n_jobs=1)]: Done 75 out of 75 | elapsed: 14.4s finished\n" 283 | ] 284 | }, 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "{'subsample': 0.5, 'colsample_bytree': 0.6}\n", 290 | "Fitting 3 folds for each of 36 candidates, totalling 108 fits\n", 291 | "{'subsample': 0.4, 'colsample_bytree': 0.55}\n" 292 | ] 293 | }, 294 | { 295 | "name": "stderr", 296 | "output_type": "stream", 297 | "text": [ 298 | "[Parallel(n_jobs=1)]: Done 108 out of 108 | elapsed: 14.7s finished\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "# specify subsample and colsample_bytree grid\n", 304 | "tune_params = {\n", 305 | " 'subsample': [i/10.0 for i in list(range(5,10))],\n", 306 | " 'colsample_bytree': [i/10.0 for i in list(range(5,10))]\n", 307 | "}\n", 308 | "tmp = gridsearch(x_train, y_train, params, tune_params)\n", 309 | "params = update(params, tmp)\n", 310 | "\n", 311 | "# narrow down grid\n", 312 | "tune_params = {\n", 313 | " 'subsample': [i/100.0 for i in list(range(int(tmp['subsample'] * 100) - 15\n", 314 | " , int(tmp['subsample'] * 100) + 15, 5))],\n", 315 | " 'colsample_bytree': [i/100.0 for i in list(range(int(tmp['subsample'] * 100) - 15\n", 316 | " , int(tmp['subsample'] * 100) + 15, 5))]\n", 317 | "}\n", 318 | "tmp = gridsearch(x_train, y_train, params, tune_params)\n", 319 | "params = update(params, tmp)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 9, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n", 332 | "{'reg_alpha': 0.05}\n" 333 | ] 334 | }, 335 | { 336 | "name": "stderr", 337 | "output_type": "stream", 338 | "text": [ 339 | "[Parallel(n_jobs=1)]: Done 15 out of 15 | elapsed: 2.2s finished\n" 340 | ] 341 | } 342 | ], 343 | "source": [ 344 | "# specify alpha grid\n", 345 | "tune_params = {\n", 346 | " 'reg_alpha': [0, 0.001, 0.005, 0.01, 0.05]\n", 347 | "}\n", 348 | "tmp = gridsearch(x_train, y_train, params, tune_params)\n", 349 | "params = update(params, tmp)" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 10, 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "{'colsample_bytree': 0.55,\n", 361 | " 'gamma': 0.0,\n", 362 | " 'learning_rate': 0.1,\n", 363 | " 'max_depth': 2,\n", 364 | " 'min_child_weight': 4,\n", 365 | " 'n_estimator': 16,\n", 366 | " 'num_class': 3,\n", 367 | " 'num_parallel_tree': 1,\n", 368 | " 'objective': 'multi:softmax',\n", 369 | " 'reg_alpha': 0.05,\n", 370 | " 'scale_pos_weight': 1,\n", 371 | " 'scoring': 'accuracy',\n", 372 | " 'seed': 1337,\n", 373 | " 'silent': 1,\n", 374 | " 'subsample': 0.4}" 375 | ] 376 | }, 377 | "execution_count": 10, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "# print optimal hyperparameters\n", 384 | "params" 385 | ] 386 | } 387 | ], 388 | "metadata": { 389 | "kernelspec": { 390 | "display_name": "Python 3", 391 | "language": "python", 392 | "name": "python3" 393 | }, 394 | "language_info": { 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "file_extension": ".py", 400 | "mimetype": "text/x-python", 401 | "name": "python", 402 | "nbconvert_exporter": "python", 403 | "pygments_lexer": "ipython3", 404 | "version": "3.5.3" 405 | } 406 | }, 407 | "nbformat": 4, 408 | "nbformat_minor": 2 409 | } 410 | -------------------------------------------------------------------------------- /src/machine_learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "# imports\n", 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "import xgboost as xgb\n", 21 | "from collections import OrderedDict\n", 22 | "from sklearn.model_selection import KFold, train_test_split\n", 23 | "from sklearn.linear_model import LogisticRegression\n", 24 | "from sklearn.neighbors import KNeighborsClassifier\n", 25 | "from sklearn.ensemble import RandomForestClassifier\n", 26 | "from sklearn.metrics import classification_report, accuracy_score\n", 27 | "from keras.models import Sequential\n", 28 | "from keras.layers import Dense, Dropout, Activation\n", 29 | "from keras.layers.advanced_activations import PReLU\n", 30 | "from keras.callbacks import EarlyStopping\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "%matplotlib inline" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "# helper functions\n", 44 | "def StackEnsemble(models, x_train, y_train, x_test, n_folds=5, prob=True, orig_data=True, verbose=True):\n", 45 | " \n", 46 | " def _join(x, preds): \n", 47 | " x = x if orig_data else np.array([]).reshape(len(x), 0)\n", 48 | " for pred in preds.values():\n", 49 | " x = np.concatenate([x, np.array(pred).reshape(-1, len(set(y_train)) if prob else 1)], axis=1)\n", 50 | " return x\n", 51 | " \n", 52 | " def _predict(x):\n", 53 | " return model.predict_proba(x) if prob else model.predict(x)\n", 54 | "\n", 55 | " kf = KFold(n_splits = n_folds)\n", 56 | " preds_train = OrderedDict()\n", 57 | " preds_test = OrderedDict()\n", 58 | "\n", 59 | " for name, model in [(str(type(m)).split('.')[-1][:-2], m) for m in models]:\n", 60 | " if verbose: print('Getting predictions from {}..'.format(name))\n", 61 | "\n", 62 | " preds_train[name] = []\n", 63 | " for train, test in kf.split(x_train):\n", 64 | " model.fit(x_train[train], y_train[train])\n", 65 | " preds_train[name].extend(_predict(x_train[test]))\n", 66 | " preds_test[name] = _predict(x_test) \n", 67 | "\n", 68 | " return _join(x_train, preds_train), _join(x_test, preds_test)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 3, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# load processed dataset\n", 80 | "df = pd.read_csv('../../data/processed.csv')\n", 81 | "\n", 82 | "# split target from data and create train/test split\n", 83 | "x = df.drop('target', axis=1)\n", 84 | "y = df['target']\n", 85 | "x_train, x_test, y_train, y_test = train_test_split(x.values, y.values, test_size=0.2, random_state=1337)\n", 86 | "\n", 87 | "# split odds for backtest\n", 88 | "odds = x_test[:, 1:25]" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "Getting predictions from LogisticRegression..\n", 101 | "Getting predictions from KNeighborsClassifier..\n", 102 | "Getting predictions from RandomForestClassifier..\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "# get predictions from 1st level models\n", 108 | "models = [LogisticRegression(class_weight='balanced'), \n", 109 | " KNeighborsClassifier(), \n", 110 | " RandomForestClassifier(class_weight='balanced')]\n", 111 | "\n", 112 | "x_train_stack, x_test_stack = StackEnsemble(models, x_train, y_train, x_test, prob=True, orig_data=True)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 5, 118 | "metadata": { 119 | "collapsed": true 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# save stacked data for XGB hyperparameter tuning\n", 124 | "tune = pd.concat([pd.DataFrame(x_train_stack), pd.Series(y_train)], axis=1)\n", 125 | "cols = list(df.columns)[:-1]\n", 126 | "cols.extend(['lr_h','lr_d','lr_a','kn_h','kn_d','kn_a','rf_h','rf_d','rf_a','target'])\n", 127 | "tune.columns = cols\n", 128 | "tune.to_csv('../../data/processed_stack.csv')" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 6, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stderr", 138 | "output_type": "stream", 139 | "text": [ 140 | "/home/daniel/miniconda3/envs/three/lib/python3.5/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", 141 | " 'precision', 'predicted', average, warn_for)\n" 142 | ] 143 | }, 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | " precision recall f1-score support\n", 149 | "\n", 150 | " 0 0.56 0.86 0.68 1104\n", 151 | " 1 0.00 0.00 0.00 604\n", 152 | " 2 0.50 0.51 0.50 649\n", 153 | "\n", 154 | "avg / total 0.40 0.54 0.46 2357\n", 155 | "\n", 156 | "0.542638947815\n" 157 | ] 158 | }, 159 | { 160 | "data": { 161 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6wAAAEOCAYAAACNcJqIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XncPed8+P/X2ycbIgkSWyKJENIgsYRGrPGtEko0tuCr\npTT019Ao30q1qpaili4IkRK0SlqNJUjsao/sspAQESSlgiBFre/fH9fcPudzPmedOXNmzn2/no/H\nPO77nDPvua4zc11zzTVn5prITCRJkiRJ6ptrdZ0BSZIkSZJGscMqSZIkSeolO6ySJEmSpF6ywypJ\nkiRJ6iU7rJIkSZKkXrLDKkmSJEnqJTuskiRJkqRessMqSZIkSeolO6ySJEmSpF7apusMjLLrrrvm\n3nvv3XU2JEmSJEkLdvbZZ38nM3ebZd5edlj33ntvzjrrrK6zIUmSJElasIj42qzzekmwJEmSJKmX\n7LBKkiRJknrJDqskSZIkqZfssEqSJEmSeskOqyRJkiSpl+ywSpIkSZJ6qZePtem7vY9931zzX/6S\nB7WUE0mSJElav/yFVZIkSZLUS3ZYJUmSJEm9ZIdVkiRJktRLdlglSZIkSb1kh1WSJEmS1Et2WCVJ\nkiRJvTRThzUiHhARl0TEpRFx7IjPHxsR50fEBRHxmYg4cNZYSZIkSZJGmdphjYhNwHHAYcD+wKMj\nYv+h2b4K3Dszbw+8ADhhjlhJkiRJkrYyyy+sdwUuzczLMvNnwEnA4YMzZOZnMvPq6uXpwB6zxkqS\nJEmSNMosHdbdgW8MvL6iem+cJwKn1YyVJEmSJAmAbRa5sIg4lNJhvUeN2KOAowD23HPPRWZLkiRJ\nkrSCZvmF9Urg5gOv96je20JEHAC8Hjg8M787TyxAZp6QmQdl5kG77bbbLHmXJEmSJK1js3RYzwT2\njYhbRMR2wJHAKYMzRMSewDuAx2Xml+aJlSRJkiRplKmXBGfmLyLiaOADwCbgxMy8KCKeUn1+PPBX\nwA2B10QEwC+qX0tHxrb0XSRJkiRJ68hM97Bm5qnAqUPvHT/w/5OAJ80aK0mSJEnSNLNcEixJkiRJ\n0tLZYZUkSZIk9ZIdVkmSJElSL9lhlSRJkiT1kh1WSZIkSVIv2WGVJEmSJPWSHVZJkiRJUi/ZYZUk\nSZIk9ZIdVkmSJElSL9lhlSRJkiT1kh1WSZIkSVIv2WGVJEmSJPWSHVZJkiRJUi/ZYZUkSZIk9ZId\nVkmSJElSL9lhlSRJkiT1kh1WSZIkSVIv2WGVJEmSJPWSHVZJkiRJUi/ZYZUkSZIk9ZIdVkmSJElS\nL9lhlSRJkiT1kh1WSZIkSVIv2WGVJEmSJPWSHVZJkiRJUi/ZYZUkSZIk9ZIdVkmSJElSL9lhlSRJ\nkiT10kwd1oh4QERcEhGXRsSxIz7fLyI+GxE/jYhnDn12eURcEBHnRcRZi8q4JEmSJGl922baDBGx\nCTgOuB9wBXBmRJySmV8YmO17wNOAh45ZzKGZ+Z2mmZUkSZIkbRyz/MJ6V+DSzLwsM38GnAQcPjhD\nZn47M88Eft5CHiVJkiRJG9AsHdbdgW8MvL6iem9WCXw4Is6OiKPGzRQRR0XEWRFx1lVXXTXH4iVJ\nkiRJ69EyBl26R2beATgM+OOIuNeomTLzhMw8KDMP2m233ZaQLUmSJElSn83SYb0SuPnA6z2q92aS\nmVdWf78NvJNyibEkSZIkSRPN0mE9E9g3Im4REdsBRwKnzLLwiLhuRFxv7X/gt4EL62ZWkiRJkrRx\nTB0lODN/ERFHAx8ANgEnZuZFEfGU6vPjI+ImwFnATsCvIuIYYH9gV+CdEbGW1lsz8/3tfBVJkiRJ\n0noytcMKkJmnAqcOvXf8wP/folwqPOyHwIFNMihJkiRJ2piWMeiSJEmSJElzs8MqSZIkSeolO6yS\nJEmSpF6ywypJkiRJ6iU7rJIkSZKkXrLDKkmSJEnqJTuskiRJkqRessMqSZIkSeolO6ySJEmSpF6y\nwypJkiRJ6iU7rJIkSZKkXrLDKkmSJEnqJTuskiRJkqRessMqSZIkSeolO6ySJEmSpF6ywypJkiRJ\n6iU7rJIkSZKkXrLDKkmSJEnqJTuskiRJkqRessMqSZIkSeolO6ySJEmSpF6ywypJkiRJ6iU7rJIk\nSZKkXrLDKkmSJEnqJTuskiRJkqRessMqSZIkSeolO6ySJEmSpF6aqcMaEQ+IiEsi4tKIOHbE5/tF\nxGcj4qcR8cx5YiVJkiRJGmVqhzUiNgHHAYcB+wOPjoj9h2b7HvA04OU1YiVJkiRJ2sosv7DeFbg0\nMy/LzJ8BJwGHD86Qmd/OzDOBn88bK0mSJEnSKLN0WHcHvjHw+orqvVnMHBsRR0XEWRFx1lVXXTXj\n4iVJkiRJ61VvBl3KzBMy86DMPGi33XbrOjuSJEmSpI7N0mG9Erj5wOs9qvdm0SRWkiRJkrSBzdJh\nPRPYNyJuERHbAUcCp8y4/CaxkiRJkqQNbJtpM2TmLyLiaOADwCbgxMy8KCKeUn1+fETcBDgL2An4\nVUQcA+yfmT8cFdvWl5EkSZIkrR9TO6wAmXkqcOrQe8cP/P8tyuW+M8VKkiRJkjRNbwZdkiRJkiRp\n0Ey/sGpx9j72fTPPe/lLHtRiTiRJkiSp3/yFVZIkSZLUS3ZYJUmSJEm9ZIdVkiRJktRLdlglSZIk\nSb1kh1WSJEmS1Et2WCVJkiRJvWSHVZIkSZLUS3ZYJUmSJEm9ZIdVkiRJktRLdlglSZIkSb1kh1WS\nJEmS1Et2WCVJkiRJvWSHVZIkSZLUS3ZYJUmSJEm9ZIdVkiRJktRLdlglSZIkSb1kh1WSJEmS1Et2\nWCVJkiRJvWSHVZIkSZLUS9t0nQHNZu9j3zfX/Je/5EEt5USSJEmSlsNfWCVJkiRJvWSHVZIkSZLU\nS3ZYJUmSJEm9ZIdVkiRJktRLdlglSZIkSb00U4c1Ih4QEZdExKURceyIzyMiXll9fn5E3Gngs8sj\n4oKIOC8izlpk5iVJkiRJ69fUx9pExCbgOOB+wBXAmRFxSmZ+YWC2w4B9q+k3gddWf9ccmpnfWViu\nJUmSJEnr3iy/sN4VuDQzL8vMnwEnAYcPzXM48M9ZnA7sEhE3XXBeJUmSJEkbyCwd1t2Bbwy8vqJ6\nb9Z5EvhwRJwdEUeNSyQijoqIsyLirKuuumqGbEmSJEmS1rNlDLp0j8y8A+Wy4T+OiHuNmikzT8jM\ngzLzoN12220J2ZIkSZIk9dksHdYrgZsPvN6jem+meTJz7e+3gXdSLjGWJEmSJGmiWTqsZwL7RsQt\nImI74EjglKF5TgF+rxot+GDgB5n5zYi4bkRcDyAirgv8NnDhAvMvSZIkSVqnpo4SnJm/iIijgQ8A\nm4ATM/OiiHhK9fnxwKnAA4FLgR8DT6jCbwy8MyLW0nprZr5/4d9CkiRJkrTuTO2wAmTmqZRO6eB7\nxw/8n8Afj4i7DDiwYR4lSZIkSRvQTB1Wrba9j33fzPNe/pIHtZgTSZIkSZrdMkYJliRJkiRpbnZY\nJUmSJEm9ZIdVkiRJktRLdlglSZIkSb3koEsaa57BmmDLAZuaxEqSJEkS+AurJEmSJKmn7LBKkiRJ\nknrJDqskSZIkqZfssEqSJEmSeskOqyRJkiSplxwlWL3S1cjE88R2naYkSZK0UfgLqyRJkiSpl+yw\nSpIkSZJ6yQ6rJEmSJKmX7LBKkiRJknrJQZekFbNRBpfqagAuSZIk9Ye/sEqSJEmSeskOqyRJkiSp\nl+ywSpIkSZJ6yQ6rJEmSJKmXHHRJkipdDfTU94GpTHO+WEmStDj+wipJkiRJ6iU7rJIkSZKkXrLD\nKkmSJEnqJTuskiRJkqRectAlSZIWqO6ATQ76ZZrLTnMw1vI3PU5SN2b6hTUiHhARl0TEpRFx7IjP\nIyJeWX1+fkTcadZYSZIkSZJGmdphjYhNwHHAYcD+wKMjYv+h2Q4D9q2mo4DXzhErSZIkSdJWZvmF\n9a7ApZl5WWb+DDgJOHxonsOBf87idGCXiLjpjLGSJEmSJG1llg7r7sA3Bl5fUb03yzyzxEqSJEmS\ntJXIzMkzRDwceEBmPql6/TjgNzPz6IF53gu8JDM/Vb3+CPAsYO9psQPLOIpyOTHAbYBLmn21TuwK\nfGfJsaZpml3GmqZpdhlrmqbZZaxpmmaXsaZpmouI7dJembnbTHNm5sQJuBvwgYHXfw78+dA8rwMe\nPfD6EuCms8Supwk4a9mxpmmaGy2/prm+0ly1/Jrm+kpz1fJrmusrzVXLr2murzRXaZrlkuAzgX0j\n4hYRsR1wJHDK0DynAL9XjRZ8MPCDzPzmjLGSJEmSJG1l6nNYM/MXEXE08AFgE3BiZl4UEU+pPj8e\nOBV4IHAp8GPgCZNiW/kmkiRJkqR1ZWqHFSAzT6V0SgffO37g/wT+eNbYdeyEDmJN0zS7jDVN0+wy\n1jRNs8tY0zTNLmNN0zQXEbsSpg66JEmSJElSF2a5h1WSJEmSpKWzwypJkiRJ6iU7rJpbROwQEY/o\nOh/ziIi7dJ0Hrb6I2LbrPEiS2rGKxzerpk476nbRTIMuabyI2B54GLA3A+szM58/Y/z1gX2BHQZi\nPzFh/j+dtLzM/LsZ0tytmveqWfJYxWwC7g88Gvht4JPA22eNr5axE+W7XpaZV0+ZdxvgMGC/6q0v\nAu/PzF/Mkd7+VX4fDXwfOGjMfEdMWk5mvmPWNEcs+36Z+aExn70HGHsTeWY+pG66U/J0G+Aotly3\n/5SZl8wYvwtlOwJ8KTN/MGPc3Nu0jW0TEfegPDd65EBxI+YP4L7AY4DfAW48b5ozpLHQ7xkRu2bm\nzA8Rr/YJewC/pNTP/5knvWoZN8jM780b10QXaTYREQ/JzJkf7RYR1wb2nLVuVjHbrNWpiNiRUt8u\nm7aeImI/4HBg9+qtK4FTMvOLs6Y9YplPyMw3zjDfbsAfsnU7+gczxN4O2J8t29B/rpPfeUXEnTLz\nnBnnPRR4KnCb6q0vAq/OzP+cMf76wC8z84czzv8qJrcvTxsTd6dJy531+1bLui5wBHBkZj5ohvl3\nBh7AlmXwA5n5/VnTrGMRxzcDy9px0v5zEes3InYH9mLLujL2uLGKmbv81S1DY5Y1dzs673apW8ci\nYpdFlbGIuBVwIPDFzPzCIpapwg5rc+8GfgCcDfx0nsCIeBLwJ5QDxfOAg4HPUir1ONer/t4GuAub\nn2v7YOCMCWkF8FzgaMov6xERvwBeNalzHRH3puxgHlgt/+7ALTLzxzN8v7cAx2TmdyLi/sA/AV+i\nPJv3mZk5csdT7Yw/CnwTOBcIyg7uFRFxaGb+14Q092ZzJ/XnlJ36QZl5+YSsPnjo//cMvE6gdocV\neAOw55jPXl79Dcq6edIsC4yIaxjdiARl0O6dJsTejfJ9XkcZVS6AOwIfi4gjMvP0CbHbV3EPBb5a\nxe4VEe8EnpKZP5sQW3ebLmTbRMQdKeX4EVXep8ZVz5R+DOX73oAyEvozp8R8r1r224CP5uyj2tX+\nnhFxGPAaysHdU4G3ADtU2+v3M/MjE2L3B15J6SjsSdk2N4qIjwN/Mu5kRETcHXg98CvgD4AXAvtU\nz9t+ZGZ+dkKaf5CZJ1b/7wG8Gbgz8AXg8Zn5pTFxf5mZLxzI97uAbat926My83MT0rw58DLKgfBp\nwMsy8+fVZ+/KzIeOidsP+Pvqez4NeA6lPHyJsm7HduZGnIQI4LjqxM3UkxAR8WDKPmI74BYRcQfg\n+ZNOZEXE4yl16ruUtuU4Snm/dUT8WWa+bUzcsyj7zJPY3I7sAbwtIk7KzJdMyusEzwOmdlgp7egn\ngQ9TTprMJCKeC9yH0mE9lXJC7FPA2A5rRNyesr9dKwvPWjuBGhFnZOZdx8QNdzQCeHe1nWJSRyMi\nHgS8Gng+ZZ0EcCfgxIg4unqawqi4mwEvoZxI2BG4shR3TgT+Zq0Mj3HWwP/Po7T/s3jFwP93rpYT\n1etk8vEJ1T7gQZR95/2Bk4HjJ8VUcb9X5fGDlH0ZwKHAiyLieeNOQjRsD2sf30zwBca3+bB5/e5A\nOYn++SqvB1DW9d0mLTwi/hZ4VJXOWl1JYNIPHbXKH5vL0N0pdezfqtePqNKfqmY7Ovd2afAdAb4T\nEf9JabdPnqfzGhEfAx5RHec+jtJGfAL464g4ITNfNeuyhpZ7QWbefso8BwOvAn6D0k5sAn40qcyv\ntMx0ajABFzaIvYCy0zqver0f8I4ZYz8BXG/g9fWAT0yY/0+BD1Eq/dp7+1Cekfv0MTFXAJ8BHreW\nFvDVeb7fwP+fAfau/t8V+PyEuDdROrrD7z8NePOEuM8CF1F2GPvOm99q/nNrbMdTxkzvoew8Wkm3\nThzlAO0+I96/N3DalNjnA/86otz9M/CCKbG1tmnD73prygHQxZSD2KcCX5sh7kXAl4GPUE4i3HDW\ncgRcQjkp9GnKQdc/Age3vE3PozRYdwO+u5Ze9d45U2JPB25T/X/XtW1B+bXrPybEnQHcvkrzO8A9\nqvfvBHx6SprnDPz/75Rf+68F/C7wkRnj3gccNpDvz0xJ80PAU4A7UBr4zwA3nLa+KfvZB1M6c18D\njqQcCD14Ul6r2J8D76V0Lt5YTddUf0+cYbueDew8mD8G9qljYi6g7F9vAfwQuGX1/o2B8yfEfQnY\ndsT72wFfnpLm+WOmC4CfzlqG5ynzQ9/3WlTtSfU9PzQl5lOUX/F2oRw4XzSwniaVhV9V5eZjA9NP\nqr8fnZLmfwIHjnj/AODjE+I+SrW/pvxS+ffAdSkniE6YYz213r5QfgF7I2W/95aqjlw+R/wlwC4j\n3r8+5UqeRee39vEN5Xhq1PQM4HszLuMdwO0HXt+OCfvcofW0/ZzbsVb5G5jvdGCbgdfbAqdPianV\njtbdLk2+Y7Uf+R3K8c13KSfQjgSuPUO6Fw78fyab25XrMGGfW81zxJjpYcBVM6R9FnAryonmTcAT\ngBfPUzZWaeo8A6s+UX6lun3N2DOrv+et7YCAi2aM3WKnBWwPXDJh/nOBXUe8v9u4nTzwD8DllIOu\nx1Aaysvm+H4XATtV/38KuNbgZxPiLp70vSd89i7g65SzbIdU782c32r+iQf4Y2KuppxRvvfQdB/g\nv9tKt04cExr+Seu2+vxC4Doj3t+RKSdu6m7Tht/1V8DHgVsNvDe1PADfrsrrwwfq5UzliC07VXsC\nfwacA1wGvKil7zmY5jeGPpvYEWDoxNHQsr44Ie7ccfNNy/9QGsPpT+owDMadN2vcmPn/b7V/uuWk\n/A59z0vn/J53oRys/dHAe1+dY7uePiIP0w6Azhv4/7+GPpvUYb0Y2GvE+3vNsF/4b8qJgL2Gpr2H\n8zBhGS8EHjjruhmIO6P6ezawE+Vkwth9zZgydyjlwPrgKWXhYdX+5LB5t+ekPE35bDivZ88SN2I5\nrbcvbN7fDp4Un+d44UvAziPe35kpJ01q5rf28Q3wv8ALKCdEh6fvz7iMrY6BRr03Yp7TgB3n3I61\nyt/APJcANxh4ff0Z9gu12tG626XJd2TLtuXawCMpJxS+C7x1Suy5wO7V/x8Ddqj+3zRte1JOar6J\nzSc0B6drZvjOZ1V/zx/MzzxlY5UmLwmuKSIuoFyGsQ3whIi4jHJJ8NplKAfMsJgrqvsB3wV8KCKu\nppzFn8U/A2dUl2NCudziTRPm3zZH3NOWmVfFmBvgM/OYiHg6peP1aOClwM4R8Ujg1Jx+n9vzKJea\nHkf5xentEXEK5QDh/RPifjLhs7GXhGTmQ6t7YI6gXI6xL7BLRNw1M8deLr0ApwM/zsyPD38QEWPv\nPYuIGwy83FTdo7R26RXZzr1510z47EdTYn+VIy7Jycz/iYicEltrmzZ0BOUs6cci4v2Uyx1jcggA\nNwXuRynz/1Bd8nPtwXsDJxjcfl+n1JmXVpeWPqrGd5jF9yPiyZQD9qurOvvvwG8B0+roVyLiOZRf\nco6gnDxbGxRj0qB8g5/9+dBn201Jc4+IeCVlXe0aEdvm5ksbJw3GsU+1/4hqGdcZKI/TBvHYNiJ2\nyMz/BcjMt0TEtyhXmFx3Qtymgf+HxweY+D0z88yIuB/w1KoMPYsJ94ONcFFEPIayb9iXcjXCZ6bE\nfD0iXky58uHiiHgF5cDrtyiX449zDPCRiPgy8I3qvT0pZ++PnpLmeykH0OcNf1BdZjfWwOWcATw7\nIn5KOYibejln5ayqDf0nSqf1fyhX2kwUETtndbl7Zn4sIh5GuXT1BuNiMvPkiPgA8IKI+APKr2mz\nbs9J+9ZJn10VEf+XciB8BOVAfu0Wn74Nmnknyv72w9Xx0ElsWX+m+RvgnIj4IFuWwftROocL1fD4\n5hzgXZl59vAHUW71msX5EfF6yq/RAI+lXJkwUmy+n/THwHkR8REGbkPLyfeT1i1/a14CnFvtxwK4\nF/DXU2JqtaMNtkuT7zjYbv+E0n7+e3U8OfJ2kQFPBz4YESdTToJ+tNpP3IPpt0OcD7w8My/cKkMR\nvzUlFuDH1SX450XESyn7+L7tFxYmqh655hQRe036PDO/Vs13/ZwywFA1370pZxLfn9W9gNNiq3tq\n7lm9/ERmnjvw2RaxEXFOZo682X/SZ0PzbcvmG+Dvn5m7zhBzK8rlhbemdO6voOzoPzAh5jJG3+MQ\nwEsz85bT0q2WcyNKJ+FIysAlNx8z3+DgR/di6F6QbG/wo6+y+WBtWGbmPmPiBu+NezlD6yon3BsX\nEd+mHEhs9RHl3sOxAyFExOcpjcio/H4sMw+cEFtrmy5i20QZ+ONwSrm9L+Vkzzsz84MzxG5PuVTo\n0ZS69pHMfMyE+f8uMycOjDYmrvb3jHJ/5l9SfuF4XpXXJ1JOfj0zJ99nuQvwbMr9SZ8HXpKZ11QN\n9W/kmHuaI+IhwIeHT2BExC2Bh2XmSyek+ftDb52SmVdHxE2Ap2Xms8fE3XvorbOrkyU3Bh6emcdN\nSPPplLPoHx96/46U8ne/MXFPBv51+CCp2q8dnZnHjEtzaP6bUX45OGhcvR4Rcx3gLyiXWgalc/2C\ntU73mJidKPeIJeVKk/tTLhP7GvDCzBzbaY2Ia1Eurx4c8ObMzJz5ntK2RMRtM/OiKfPsTbmi5/yB\n97aKq04CXDZctiNiT+A5mfmHM+TnjpQTGLfNzBvNMP/3GX2PYVAup7/+mLg9Kfv4/Sknk/5fZn4z\nIm5IuVT45AlpDt7beR02nxiceDIgthxo50iG2ospHaO1ZRxC2Q89jLJfeWdmnjBD3PUpZXZ40KVJ\nx0G128Oh5cx8fBNl4MLvjvoRICJunJn/PUN6OwB/RNnfQykfrx1Xv0fsN7eQmW+ekFat8je0jJsA\nv1m9/Fxmfmvgs4n1c952dCh2pu3S5DtGGVPl5eM+nyGPO1N+DR48zn13Zl48Je6elNuUvj7is4My\n86wRYYPz7EW5wmU7Ssd5Z+A1mXlprS/Sc3ZYWzZrZ7Dt2Ij4JaPPMgXlEoatfqGIMnLjbjk00llE\n3JZyucJWlaxmXv88M1888HriWanMfEKNNPZaO4kw4rPhA+Hh9Lb65bRLU9ZP5oSRNRs2epdTOkVz\ndbCr2FrbdNHbpjogegRlkJ7/M2fs9YDfzQWMQDqizLdeBofTnDP2VZn51GXFVbG18ttRmktft03M\nk2ZMGfW0rdih5dRqCxu2oRPXUfUr5/VyaNTeUWWh7frdpPyNWFbtNmLEsq5F+XX/yEntUhN128OI\neCjl3vdvj/js2tUvbU3z1mRfdHJmPmwRcUsofzPXs2ntaN3tsqQ2tMn27KJdqlWG+soOa8si4tzM\nvOMqxQ4s4yTK2ZpPDL1/T8p9WTOdIZshndoHFUPL2ZXy68LVlIFOXkY5m/cV4BnjzjpN6JjvT7nx\nfebH/4xY9tiR3mLy8PY/Bb6emZMu4V33JjVeNZc39TEosYBHR82Qj+ETSgv9nrOkuYxY06wfG6Mf\ne/UDykAbr5v0S2vdNMfM+/XMnDTqaSuxQ8up1Z41bEMXVhbart+z5nWW/V+DPLTWnk1qR+uKiP+g\nDBr3Y8ql9p+mbKOtLs9skEaT/cLCyvwSyt/IvFadyKsz8/wol/Tei3I89prMHPlUjbrbpc3juIFl\n9eIHqDniGvcD+sR7WNvX5IxAq7EzNF63Gu6sAmTmJyPitQ3ytlVWhvJVt8PwVsrB3L6UUUzfSBml\n9Z6UR3DcZ0zcqyiPBRl2Q8qllhM75jH+GZoB3GRC6CtGvLe23bYF9oyI43Lo8somHaq6nfoqttEB\nSZ3GizI4znERMfdBRdR/DMr1Bv5/MuVRPmsWdYZv+Ffq2t+zQZp910V+66bZRl4vowyKt/YomkdR\n7kG/NeWezcctKqEJ+5SgDKrWSuwc6ta7Ls7IjyoLbdfvrdKcsv87Msdf7l+3jXgFW97iMrjux7Zn\nA+nWbUdrtS2Z+fAqdm/gkGp6cpTLsM/MzAdOSnMJFlnm2y5/W6UZZeySA4DtI+JLlH3B+ymPqDmR\ncs/u1guqv10aHcf1XN32ZV39ImmHdYOo2Xhdb8R7a6YNcjKP4Uo1qcMwyY0z89nVd/paZr6sev/i\niPjjCXFNO+b/RhkOfdTOYYcR7615FmVU12/Cry/FehhlYI2/plzCfS5l0IFBddcP1O/UQ80ONmzR\neO0QZSCqthsvKIOUvLD6/2WU54qeFhF3pdxPeMiYNJ83kO+HDr5eoC3KypIOnlat8eoiv33qGB2S\nmXcZeP2eiDgzM+8SERPv56zhRZQ6MmowlGmDeDSJXY+2KgtLqN+jyl+t/R/124i67dmaWu1o3bZl\nTWZeHuV+0mtX09r/60ZHnfNDM3P/at1eCdwoM38ZEa9jwuBSA3med7ss6weWLqxa290KO6zta3Lm\nfZGxdRqvSyPigTn0wOWIOIxy9n9Rtshrgw7DL6v4jIjhwRB+NSGuace87khvx1Pu7yEi7gW8mPKs\n0DtQLvl7RJQHUW+hYYeqbqcemh2QLLvxGrZ7Zp5WLeuMiJg1tq2GYmTdbvngqYt9kWnWj90xIvbM\naqyA6uBy7RfLnzVIb1SaTUY9XcSIqdPU/b5N1tPCy0KL9XtaXufZ/9VtI2q1ZwPqtqO12paIeDbl\n0tPdKI9mLusZAAAgAElEQVRtOZ0yUNlRubiBxnq1L2qx/I2qZ2sjsv9vRHxtbZ1Wx2c/HzE/0Gi7\nLOMHlr4czy8jzd7ZiGc/FyYiNkXExFHAgJEDu3QVW9mi8WL8DusYynDkb4qIp1bTmylnW/9kSvqD\neX36lNnePuGzeToM+0TEKVHu/Vr7f+31LSbEXRoRW51hnKNjfgzwwzGf/e6EuE0Dl2Q/ivIg+JMz\n8zmUs9uMOggcMm+H6teNBjBPpx7KAclPYYsDkjdT7qt7XXXp1bgDkl83XpSDoMF8TGy8IuI9EXE6\n5fEp21EarwMy89Ap+R0sD3tEGXV1zSKvENhCnTLf8Hs2qmdV7LQREv9xUXEDsXPnt8M0l7puK88A\nPhURH4vyeJhPAs+MMur1pMHR6qS5NpLwKAdNWVaT2F+LiN0j4pCIuNfatPZZZh68yLiGZb5OGapd\nvxuUv7r7v7ptRNP2rG47WqttAX4PuBnl19h/pTxv89xZO6sNytBHqr9/OyX2WYuIq2IatS/VMuat\nZzeKiD+NiGcM/L/2ercJSdXdLo2O4xbQtiytXWpSFlaZgy41FBHvBp6aNUbMXWZsbB7yOyhnr/bM\n6pEUEXFhZt5uRMxxlPunbg3ctnr7IsoOZOYBPyLijMy866zzD8XOMzjIqFHi1gp45JhR4qI84/B9\nlHs71hrUgyjr6Xcy80vz5Xps/oZHhr0QuENm/qI6AXHU2iUt47bJiGXOdTP+UDm4J5uHgZ9l6PfP\nZ/XomqpsXJWZf129Pi8z7zAh9grKYyCCMvz62n22ARyT4x85dDHll9v3ULbP57J6fuIM37XWY1Bi\n8zOWA7glsHbP1szPWJ63zDf5nnXTHIo9fVKnYNFxVWyt/HaU5tLXbRW7PbBf9fKSWfe7TdKcstxW\nRsmsDrweBXyBqsNEqWsTH1tVN66KrVvm5y4LTet3zTTr7v9qtRGLaM9mMaIdrdW2VLE3YPMlsgdT\nrmD4POX+zmnP0KxVhiLiC8CTgDdQ7qscvsLsnEXGVbFNy9/c9SwinjtpmZOuDKuzXRZxHNdwX720\ndqlJWVhldlgbiohPAHek3Ovx68fGzNhgLi22TuMVEX9CeQ7bTSkPUn5bDjzrdVYR8feUM7r/NpTX\ncTvmWh2GiDgc2GPtu0TEGZQzeQk8KzPH/QqykI75NMOdy4j4C+CBlLPYewJ3qi6XuRXw5sy8+5jl\n1O5Q1e3UV7G1D0iW3Xg1UZ3t/DTwPUacoc8xj0caWsZcZb6KaXrwNHeaA7GvpTz78O1DsROfYVg3\nrkl+O0pz6eu2ir0d5Rmcv76HL2d4rFKTNKcst5WRLqPce3hAjh98bdwya8VVsXXLfN0yVLt+Nyl/\n82pw4rdWe1Yjf8PtaO22ZWAZ2wB3pgzW9GTgFpm5aYa4uctQRDyc8mz63wI+zpadjczM+y4ybiC+\nSfmrXc/GLO+YzPyHGeabebss4jiu4b56ae1S07KwquywNjRm5z7TM5+6ip1XlIcTH1lN16bsFN42\nyxmrKv5jI96etGOu1WGIiE9TBpD6RvX6PMql0dcF3phjnru5qI75NDF6uPmDq3Q/mJk/qt67NbDj\nhJ1V7Q5V3U59NW8rByRtNF7V/EcDJ2Xmd6o8nkgZnOMS4Ik5ZoTEKJf2HEL5ZesCqhEVKQ37TI+E\nmLfMD8XWPXhqkuaoA5bMKc9OrBtXxdbKb0dpdrFun0sZ4GZ/4FTgMOBTWQ2g0kaaMyy3rQ7racAj\ncs5nttaNq2LrbpfaZaGKn7t+10mzwf6vSRsxd3s2r1Ht6IR5x7YtEfEQyn7+7pQOzkWUff1nKfv6\nqY9BaVCGAvhlZs51W17duKFl1Cl/tevZmOWNfdxV3e2yiOO4hm3LUtulRZSFVWOHdYOo23iNWM4d\n12JnOYiumddaHYaoRtAceP3qzDy6+n/q5RpNO+bTNDnYG1pO7Q5V3U79QPzCD0jaaLyq2Isy87bV\n/+8DXp+Z74yI+wB/M62DHRHbUS4pOoRyWdHdgO9n5v5zfsWpFnHwpPUlypUUBwLnZuaBUa6GeUtm\n3q/DPLXVYT2Z8l0/QnWfPEBmPm3KMmvFLVsX9bvu/q9pG9G2ecrglLblHWxuO8/OzKYDmc0lyngg\nr87MM9uOa1r+Fl3PIuIbOf42oEbbpe3juD6pW4ZWVmY61Zgoz8P74YjpGuCHfYsFLhr4/33A71b/\n3wf49JQ0twEeTLkB/lvAScDhc6yrG1OutT+ter0/pZM8LW47yk72mcDJwH8BX5gw/6UTPvvKnNv3\njpQRb3+5wDJz7oLL4Fzrp4o5c+j1qwf+P32R+Zvje3xjwmfvoAw+czdguzmXe8mE733+DPE7Aw8A\nXgB8mPKohzfOmPZcZb7J96yb5lDsrSkHIxdWrw8A/rKtuCb57SjNLtbtGdXfs4GdKJd9XdxmmjMs\nt/Y+bFJstQ/7/aHp6BmWWSuu4XaZuyw0rd8106y1/xsxb+dtRN0yyIS2Zcz8u1L9kNNmGarmvZjy\nKKivUEYzvmDSdmkSt4DyV7uejVne15dUVuY6jmu4Pbtol2qVoVWdOs+A05I2dI3GC7gf5dfUbwGn\nUG7uvm6NtE8DHgl8vnq9DXDBDHFzdRgoHeo/HPH+kyln2KalV7tjDmwCnj5lnmcveJvO3aFigZ36\nBX6PuRqvWQ8qgL8B3gTsAzybMgrlXpRRTd87Ie4Eyhne9wPPo1yKef0581irzDdcj7XTpNwHc1cG\nDgbXGt424prkt6M0u1i3rwF2AZ4CfJly4PXGttKs9mEvnzLP4xcdW312DnC7gdePpgwMM+171opr\nuF1Wom5Tf//XWRvBgttRJrQtlPs4/5PSmbsjcCGl3f828IA2y1A1316jprbiRixn5s55nXrG5B9X\nflEzbuIPOlV8k+O4Jtuzi3ZpIWVhVabOM+C0pA1do/ECPkoZiWyug/URyzmz+jtYkc+bMH+tDgNw\nI8plJB8DXlFN/0m57OXGE+IW1TE/Y0nbsnaHioad+gZ5rtt4NTqoAB4PfI5y3+01lFEOXwTsPCHm\n/ZTO/5uAo4DbM8dZ95plvlEjXSfNRcSaZrvbZWD+vSm3YbRS/gbmqf0LWsPYfSgHxftRBhP55KQ6\n2jRu2WWhaf1ukNfHM//+r5M2YiCdudrRKet2UttyFvDbwCOAq4GDq/f3Y8ZfcRdRt5cx0bwdrV3P\nlvgdGx/HNdmeXcVupGkbtCFk5l9ExOMp1/PfEtiecjD+LuCxY2IWNdLYjyLihlQjDVb3QU4aUn3P\nKn9fpjwI/Arg+9MSycxvA4dExH3ZPErc+zLzo1NC/xx4K/CMzLx6WjoTfDoiXk37IznWWj+VpwPv\niojHUBogKAMwbA88dMH5/LXMnPRQ70leTTnBsjPlBMphmXl6ROxHKcvvn5Lumygdz5ll5gOqAQ1u\nS7nk+hnA7SLie8BnM/O5MyxmrjLfYP3UTnPIdyLilgOxDwe+2WJck/x2kebS121EfCSr+wUz8/Lh\n99pIEzg3Ik6h3ujCtWMz87KIOJLSHn0d+O3M/ElbcZW662jusrCA+l2r/NXZ/9FRGzFgrna0wbrd\nJjM/CBARz8/M06vlXVx2/zNpsi9apqbtaJN6tiyLOI5rsj27aJc2FAddUusi4s7AK4HbUc7s7UZ5\nlM75E2IGOwyHVLHzdBiWrs5Ibw3SarR+hjr1F83Qqe9EDDzfNSK+mJm/MfDZ1NEiI+ImAJn5rYjY\njfJcwUsy86IZ09+DMlDFIcDvADfMzF1miJu7zDfVJM2I2Ifyy/0hlF8bvgo8Nqc8wqduXJP8dpTm\n0tZtROwAXIdypch94NePLNgJeH9m7jcqrkmaA3FvHPF2ZkujZMbmR3StuRHlQO2nVfC4R5jVihta\nRt11tDJ1u8n+r6s2Ylnt6ODgTcMDOc06sFOTfdEy1W1HF1HPVknDtmXp7dJGY4d1A2l68N4w7W2A\n21AOvi7JzK0exzImrlaHYaNY7+unyUFFRDwZOJZS5v6WcnnchcA9gJdm5hvGxD2NzScCfk41AnM1\nXZCZv5ox77XKfBMN6tmmzPxlRFwXuFZmXtNmXJP8dpFmw7i58hvl8QzHADejXEERlIPGa4ATcsQz\ns5um2ZUoI3qONaFTXytuaBm119Eq1O26+7+NIiJ+SfkFNyijyf547SNgh8zcdoZlrEo9q9WOLqKe\nrZKG+4RO2qUNJXtwXbJT+xPl/pOvApcDf0S5r+UNbH6sTZtpnw38f8x+n+XTKDfKf50y+tm/VHk+\nkLIj6Hx9jsl37ZFE50xnJddPze/6SwbuR2LL+5N+PiX2AsovVTcE/ge4SfX+9Zl8P+nfAQ8Dbtog\n33OV+QWtq9ppApcBLwN+YxlxTfLbUZpdrNu/Anaq/n8O8E7Ks4/bTLOTUTK7mBqso5Wo23X3f11P\ny2pHuyxDHeSzdju6kaaGbcvS26WNNm2YB86KoymX99yZUqkOz8wnUm7Gf2rLaT8K2B04MyJOioj7\nx+SbRPam3Af1m5l5y8x8XGa+NjM/nzP+utWRNwEfoPwyAvAlyi8li7Y3q7l+5paZmzJzp8y8XmZu\nU/2/9nraGfCfZ+aPM/O7lNEtv1Ut82q2vMxpOM0/zcyTM7PJvUjzlvlFaJLmgZTy+oaIOD0ijoqI\nnVqMa5LfLtLsYt0+PDN/GBH3AO4LvB54bctp/hPlXrCfA2S5JO3IGdNsEtuFuutoVep2rf1fD7yJ\n5bSji9BkX7Q0DdvRjaTJ9uyiXdpYuu4xOy1nAs4Z+P/zQ58t9PmgE/JwLeAhlMvcvk4Z4fYGXa+b\nBX4/R3rr0UQ5a7lt9f8eA+/vMFwHWszD0st80zSBe1exPwLeDNyq5bja+V12mstct2v7EeDFwGMG\n35uzPMyT5oYc6bJOOep73e7D/q/md1zJclR3X+TUz6nJ9uyiLdwIk7+wbhwZEWtn0h609maUAT5a\nLwcRcQDlMTMvA06mDCX/Q8qIdeuFI731y++u/ZOZVwy8f0PKyL+t6qLM100zIjZFxEMi4p3AP1TL\n2Ad4D3DqouOa5LeLNBvG1c3vlRHxOsrZ91MjYntm3Fc3SLOrUTKXrkk5WpG63en+r4GVaUeb7ovU\nLw33CZ20SxtK1z1mp+VMlEehbDvi/d2B32o57bMp9zY9Bth+6LN3dL1uFvg970x5PuoPqr9fYo5n\nJzotZRvN/LD0huksvcw3SZNy/80bgENGfPbKRcc1yW9HaXaxbq8DHAHsW72+KeVxErOUhbpp7gN8\nmDIAzZXAp5jxQfRNYruYGqyjlarbQ/MuZf/X8LuuTDvaZF/k1L+pYduy9HZpo02OEryBRcSuwHez\n5UIQEftk5mVtptEXjvTWH9WZ+ZdQHvfzAsrgVLtSfqX6vcyc+Oy5hmkvvcw3STMidszM/1lWXBVb\nK78dpbn0ddtEg+3Z2SiZy9ZgHa1E3e5y/9fUqrSjXdRttadh27L0dmmjscO6QXTdeEXEgyiDPu2w\n9l5mPr/NNJctIs6mnGF7W9Z/cLUWJCLOYvPD0k9g6GHpOeUZrgtIf+llvm6a1a0BTxwRO/H5m3Xj\nmuS3izQbxjXKbx0NtudllMvRTszML86ZZu3YLjTZLqtQt7ve/9W1Su1oF3Vb7Wm4T+ikXdpIvId1\n43g18CLgbZRr4p+UmTcB7kUZ1KM1EXE85T6sp1LOmD4C2KvNNDviSG/9sk1mfjAz3w58KzNPB8jM\ni9tOuIsy3zDNfwFuAtwf+DiwB+WRB23FNcnv0tPsaN02UTfNrkbJ7EKtdbRCdbuz/V9Dq9SOdlG3\n1Z4m27OLtnBj6fqaZKflTAyMsgd8ceizVkcJBs4f+rsj8Mmu10mL39eR3nowseXI2OeM+6yltJde\n5pukyeYRadditwVObyuuSX47SnPp67ZhWWicJh2MkrnMqUGZX4m63eX+b0HfufftaBd126mf27OL\ndmmjTf7CunEMPp/zJ0OftX1d+Fp6P46Im1Ge03fTltPshCO99cqBEfHDiLgGOKD6f+317VtOu4sy\n3yTNtXvEvh8Rt6NcRnijFuOa5LeLNLtYt03USrPLUTI7UHe7rErd7nL/18gKtaNd1G21p8n27KJd\n2lC26ToDWpoDI+KHlMsNrl39T/V6h/FhC/HeiNiF0vicQ+kgv77lNJeuuvfm+5T7b47NzJ9WH30u\nIu7eXc42pszc1GHyXZT5JmmeEBHXB54DnEI5w/tXLcY1yW8XaXaxbpuom+aXgY8BL8vMzwy8/x8R\nca8WY7tQdx2tRN3ueP9X24q1o13UbbWnyfbsol3aUBx0SUsV5VmCO2RmL5+r1oQjvWmULsr8qtWz\nVVpHq7Zu59HVKJmrapXK7aqwHdVGtt7rdxN2WNWaiDhi0ueZ+Y5l5WVZHOltY+uizDdJMyL+dErs\n3y0yroqtld+O0lz6um2iaZpdjpK5LA3K/ErV7VXW93a0i7qt9jRsW5beLm1UXhKsNj14wmcJrKvK\nWI30dh3gUMrlHA8Hzug0U1q2Lsp8kzSvVzPNunFQP79dpNnFum2iaZr/AlxMGeny+cBjgVkfUdMk\ndpnqrqNVq9sraUXa0S7qttrTZHt20S5tSP7CKi1IRJyfmQcM/N0ROC0z79l13iRpmog4NzPvOLAP\n25YyWuXBbcZKa2xHJY3iKMFqXUTcMCJeGRHnRMTZEfGPEXHDrvPVAkd6E9BNmW+SZkTsExHviYir\nIuLbEfHuiNinrbgm+e0ozaWv2yYapNnVKJlL16DMr1TdXkEr0452UbfVnoZty9LbpY3GDquW4STg\nKuBhlMt7rgL+rdMctWN4pLfLgbd1miN1pYsy3yTNtwL/TjkwvBnwdmYru3XjmuS3izS7WLdN1E1z\neKTLLwAvnTHNJrFdqLuOVq1ur5pVake7qNtqT5Pt2UW7tKF4SbBaFxEXZubtht67IDN7/Sy4JsKR\n3ja0Lsp8kzTXLr8beu/zmXlgG3FN8ttRmktft010keaqaVDmV6pur7K+t6PWs/WlYduy9HZpo3HQ\nJS3DByPiSMrZJyhnkD7QYX4WKiaM9BYRjvS2MXVR5pukeVpEHEs505vAo4BTI+IGAJn5vQXHNclv\nF2l2sW6bmCvN6GiUzI7V3S6rVrdXwoq2o13UbbWnyfbsol3aUPyFVa2LiGuA6wK/rN7aBPyo+j8z\nc6dOMrYgEfHGCR9n3x7roPZ1UeabpBkRX52w6MzMkffi1I1rkt+O0lz6um1i3jQj4rmTlpeZz5uQ\nVu3YLjUo8ytVt1fFKrajXdRttadh27L0dmmjscOqzkXEbTPzoq7zIS1LF2W+SZoRcb/M/NCy4qrY\nWvntKM2lr9smukhz1TQo8ytVt9Ue69n60rBtWXq7tN446JL64F+6zsAiONKb5tBFmW+S5t8uOQ7q\n57eLNLtYt02MTDM6GiWzp+pul1Wr272yztrRLuq22tNke3bRLq0rdljVB9F1BhbEkd40qy7KfJM0\n68aaZruxi06zq1Ey+6iLslDXemlDYX21o+tpu6i7/bzlCDus6of1cl36TTPzBZn51Wp6IXDjrjOl\nXuqizDdJs26sabYbu+g0r5OZ/5KZv6imtwA7zLjMJrF91EVZqGu9tKGwvtrR9bRd1N1+3nKEHVZp\nkT4YEUdGxLWq6ZE40puk1XFaRBwbEXtHxF4R8WdUI12ujXbZUqy0xnZU0lZ8rI364GddZ2BB/hA4\nhs33G2wCfhQRT8aR3rSlLsp8kzQvX3Ic1M9vF2l2sW6bGJfmI6u/Tx56/0jKWf5J96Q2ie2jy2vG\nrVrd7pv11I5e3nUGtFCXdxS7nup3bY4SrNZFRACPBfbJzOdHxJ7ATTLzjI6ztlSO9LZxdFHmm6QZ\nEWcDJwJvzcyr50izVlyT/HaU5tLXbRNtpdnVKJltaFDmV6purzd9ake7qNtqT8O2Zent0kbjJcFa\nhtcAdwMeXb2+Bjiuu+x0xpHeNo4uynyTNB9FGSjnzIg4KSLuXzWibcU1yW8XaXaxbptoK82uRsls\nQ911tGp1e73pUzvaRd1We5pszy7apY0lM52cWp2Ac6q/5w689/mu89XBeji36zw4LW1bL73MLyJN\nyknMhwBXAl8HngfcoI24pvldZppdrtuGZWKhaTbZh/V1/zfvOlrVur1epj6Woy7qtlM/t2cXbeFG\nmfyFVcvw84jYRDXSWUTsBvyq2yx1wuvvN44uynyjNCPiAOAVwMuAk4FHAD8EPtpGXJP8dpBmJ+u2\niZbSXFcjXdZcRytXt9eZXpWjLuq22tNke3bRFm4kDrqkZXgl8E7gRhHxN5Rnq/1lt1mSWtVFma+d\nZnX/zfeBNwDHZuZPq48+FxF3X3Rck/x2kWaDuKb5raWLNFdNg3W0UnVb7bGerS9NtmdH7dKG4qBL\nWoqI2A/4P5QHIH8kM7/YcZaWLiJOz8yDu86HlqOLMl83zYjYJzMvq5FerbiB+Lnz20WaDeMa5beO\nttKMiHdk5hHLjm1Dk3W0SnV7velTO9pF3VZ7Gu4TOmmXNhI7rFqKiLg+cHMGftXPzHO6y9HiOdKb\nBnVR5pukGREPAm4L7DAQ+/y24prkt6M0l75um6iTZnQ0SmZXGpT5larbq2TV2tEu6rba07BtWXq7\ntJF4SbBaFxEvAB4PfIXN958kcN+u8tSS11DuO7gv8HzKSG8nA3fpMlNavi7KfJM0I+J44DrAocDr\nKZckzfLIllpxTfLbUZpLX7dNNEjzUcATKCNdngW8EfhgznZmu0ns0jUo8ytVt1fQyrSjXdRttadh\n27L0dmnD6XrUJ6f1PwGXANt1nY8lfE9HenNa2+5LL/NN0gTOH/q7I/DJtuKa5LejNJe+bhuWhUZp\nsuRRMruYGpT5larbqzatUjvaRd126uf27KJd2miTowRrGS4Eduk6E0vgSG9a00WZb5LmT6q/P46I\nmwE/B27aYhzUz28XaXaxbpuonWZHo2R2oe46WrW6vWpWqR3tom6rPU22Zxft0obiJcFahhcD50bE\nhcDayGlk5kO6y1IrHOlNa7oo803SfG9E7ELpaJxDOVj8pxbjmuS3izS7WLdN1Eqzw1Eyu1B3u6xa\n3V41q9SOdlG31Z4m27OLdmlDcdAltS4iLgJeB1zAwJnSzPx4Z5lqiSO9Cbop84tKMyK2B3bIzB+0\nGbeI/C4rza7XbRPzpNnlKJldmnMdrWzdXhWr2I52UbfVnibbs4u2cCOww6rWRcSZmdm7ARPa4Ehv\ngm7KfJM0I+JTwMeBTwKfzsxr2oxrkt+O0lz6um2i4TrqZJTMZWtQ5leqbq+iVWlHu6jbak/D/ebS\n26WNxg6rWhcRf0e5zOEUtrzcoXcNUBPjRnrLTEd622C6KPNN0oyIWwD3rKaDq/hPZubT24hrkt+O\n0lz6um2iwfYcOdJlZj5xhjRrx3ahwTpaqbq9alapHe2ibqs9DduWpbdLG433sGoZ7lj9HXzY93oc\nsvuRwC0z82ddZ0Sd66LM104zM78aEf8L/KyaDgV+o624JvntIs0GcU3zW0uDNA/JzAMi4vzMfF5E\nvAI4bcZkm8QuXYN1tFJ1ewWtTDvaRd1We5psz47apQ3FX1ilBYmIk4E/ysxvd50XaR4R8RXgO8Bb\nKZc0nZeZU0fmrBvXRV67skrrKCI+l5m/GRGnA0cA3wUuysxbtRnbhVUrRxvFKrWjlqH1pcn2tCy0\nzw6rWhcROwPPBe5VvfVx4PnrbXCCiDgIeDdliHJHetvAuijzTdKMiD8B7kG5b+ziKvYTmfmVNuKa\n5LejNJe+bptosD2fA7yKMuDNcVQjXWbmX82QZu3YLjRYRytVt1fNKrWjXdRttadh27L0dmmjscOq\n1lVnTC8E3ly99TjgwMw8ortcLZ4jvWlNF2V+EWlGxI7AE4BnAntk5qa24prmd5lpdrlum2iS5jJH\nyezSvOtoVev2qljFdrSLuq32NNxvLr0t3CjssKp1EXFeZt5h2nurzpHetKaLMt8kzep+w3sC1wU+\nA3yKMmDExMeU1I1rkt+O0lz6um2iwfbsZJTMLjRYRytVt1fNKrWjXdRttadh27L0dmmjcdAlLcNP\nIuIemfkpgCgPkf9Jx3lqwycj4sU40pu6KfNN0vws8HJgT2D76r09gGmNbd24JvntIs0u1m0TddN8\nHOWg62HAyyJinlFPm8R2oe46WrW6vWpWqR3tom6rPU22Zxft0oZih1XL8EfAm6vr9AGuBn6/w/y0\nxZHetKaLMt8kzesDH6Q0sOdRyvBnmV5268Y1yW8XaXaxbpuolWaHo2R2oe52WbW6vWpWqR3tom6r\nPU22Zxft0obiJcFqXXU/08OBWwK7AD+gPFetlw+Ul5rqosw3STMiLgDuApyemXeIiP2AF81wb2et\nuCb57SjNpa/bJhpszw0zSmaDdbRSdVvt6aJuqz0N25alt0sbjb+wahneDXwfOAe4suO8tMaR3jSg\nizLfJM3/zcz/jQgiYvvMvDgibtNiXJP8dpFmF+u2ibppvpIy0uWjKb90fTwiZh31tElsF+quo1Wr\n2ytlxdrRLuq22tNke3bRLm0odli1DHtk5gO6zsQSnEgZ6e2R1evHAW+kPJNQG0sXZb5JmldExC7A\nu4APRcTVwNdajIP6+e0izS7WbRO10szMfwT+MTaPdPnXlEvcpo502SS2I3W3y6rV7VWzSu1oF3Vb\n7WmyPbtolzYULwlW6yLiBOBVmXlB13lpkyO9aU0XZX5RaUbEvYGdgfdn5s/ailtEfpeVZtfrdllp\nRkejZHZtznW0snV7FaxqO9pF3VZ7mmzPLtrCjcAOq1pTXdOflF/y96WMlvZTICjX5x/QYfYWLiI+\nC/y/oZHeXp6Zd+s2Z1qWLsr8qtWzVVpHq7Zum4qIhwOfZsuRLsnMT7QZuwpWqdyuMttRbRQbsX43\n4SXBatPvdJ2BJXOkN3VR5letnq3SOlq1ddtUV6NkroJVKrerzHZUG8VGrN+1+QurtCCO9CZplXU1\nSq/ehekAAAChSURBVKa0xnZU0ij+wiotjiO9SVplXY2SKa2xHZW0FTus0uI40pukVdbVKJnSGttR\nSVvxkmBpQRzpTdJ6scxRMqU1tqOSRrHDKjXkSG+SJNVnOyppEjusUkMRsdekzzPTy+IkSRrDdlTS\nJHZYJUmSJEm9dK2uMyBJkiRJ0ih2WCVJkiRJvWSHVZIkSZLUS3ZYJUmSJEm9ZIdVkiRJktRL/z8A\nb+ayE7nyeQAAAABJRU5ErkJggg==\n", 162 | "text/plain": [ 163 | "" 164 | ] 165 | }, 166 | "metadata": {}, 167 | "output_type": "display_data" 168 | } 169 | ], 170 | "source": [ 171 | "## XGBoost\n", 172 | "\n", 173 | "# train xgboost classifier and get predictions\n", 174 | "model = xgb.XGBClassifier(colsample_bytree=0.55,gamma=0,learning_rate=0.1,max_depth=2,n_estimators=16,\n", 175 | " min_child_weight=4,objective='multi:softmax',reg_alpha=0.05,scale_pos_weight=1,\n", 176 | " subsample=0.4).fit(x_train_stack, y_train)\n", 177 | "preds_xgb = model.predict_proba(x_test_stack)\n", 178 | "\n", 179 | "# evaluate model performance\n", 180 | "print(classification_report(y_test, [np.argmax(p) for p in preds_xgb]))\n", 181 | "print(accuracy_score(y_test, [np.argmax(p) for p in preds_xgb]))\n", 182 | "\n", 183 | "# get feature importances\n", 184 | "tmp = df.drop(['target'], axis=1)\n", 185 | "cols = list(x.columns)\n", 186 | "cols.extend(['lr_h','lr_d','lr_a','kn_h','kn_d','kn_a','rf_h','rf_d','rf_a'])\n", 187 | "feat_importance = pd.DataFrame(pd.concat([pd.Series(model.feature_importances_),\n", 188 | " pd.Series(cols)], axis=1)).sort_values(0, ascending=False)\n", 189 | "\n", 190 | "fig, ax = plt.subplots(1,1, figsize=(16, 3))\n", 191 | "ax.bar(range(len(feat_importance)), feat_importance.iloc[:,0])\n", 192 | "ax.set_xticks(range(x_train_stack.shape[1]))\n", 193 | "ax.set_xticklabels(feat_importance.iloc[:,1])\n", 194 | "for tick in ax.get_xticklabels():\n", 195 | " tick.set_rotation(90)\n", 196 | "plt.show()" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 7, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | " precision recall f1-score support\n", 209 | "\n", 210 | " 0 0.54 0.88 0.67 1104\n", 211 | " 1 0.00 0.00 0.00 604\n", 212 | " 2 0.51 0.44 0.47 649\n", 213 | "\n", 214 | "avg / total 0.39 0.53 0.44 2357\n", 215 | "\n", 216 | "0.533305048791\n" 217 | ] 218 | }, 219 | { 220 | "name": "stderr", 221 | "output_type": "stream", 222 | "text": [ 223 | "/home/daniel/miniconda3/envs/three/lib/python3.5/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", 224 | " 'precision', 'predicted', average, warn_for)\n" 225 | ] 226 | } 227 | ], 228 | "source": [ 229 | "## Neural Network\n", 230 | "\n", 231 | "# define network architecture\n", 232 | "model = Sequential()\n", 233 | "model.add(Dense(64, input_dim=x_train_stack.shape[1]))\n", 234 | "model.add(PReLU())\n", 235 | "model.add(Dropout(0.15))\n", 236 | "model.add(Dense(64))\n", 237 | "model.add(PReLU())\n", 238 | "model.add(Dropout(0.15))\n", 239 | "model.add(Dense(3, activation='softmax'))\n", 240 | "\n", 241 | "# compile model\n", 242 | "model.compile(loss='sparse_categorical_crossentropy',\n", 243 | " optimizer='adam',\n", 244 | " metrics=['accuracy'])\n", 245 | "cb = [EarlyStopping(monitor='val_loss', patience=3)]\n", 246 | "\n", 247 | "# train the network\n", 248 | "model.fit(x_train_stack, y_train.reshape((-1, 1)), validation_split=0.2, \n", 249 | " epochs=100, batch_size=128, callbacks=cb, verbose=0)\n", 250 | "\n", 251 | "# get the predictions\n", 252 | "preds = model.predict_proba(x_test_stack, verbose=0)\n", 253 | "\n", 254 | "# evaluate model performance\n", 255 | "print(classification_report(y_test, [np.argmax(p) for p in preds]))\n", 256 | "print(accuracy_score(y_test, [np.argmax(p) for p in preds]))" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 8, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/html": [ 267 | "
\n", 268 | "\n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | "
B365HB365DB365ABWHBWDBWAIWHIWDIWALBH...VCHVCDVCAGBHGBDGBAlr_hlr_dlr_atarget
01.364.758.501.364.609.01.374.67.51.36...1.365.29.51.364.69.001.0073873.2546114.9980851
11.604.205.001.623.605.51.653.44.41.62...1.623.85.41.623.65.001.3429042.4331482.9039811
21.176.5019.001.167.5012.01.176.511.51.17...1.178.021.01.157.015.000.8678085.2221236.4029090
32.103.253.752.003.253.82.203.23.02.10...2.103.33.82.003.23.751.6002822.2017562.3757062
41.833.604.331.753.754.11.953.43.51.80...1.833.84.51.853.34.331.3844902.3699092.8109230
\n", 418 | "

5 rows × 28 columns

\n", 419 | "
" 420 | ], 421 | "text/plain": [ 422 | " B365H B365D B365A BWH BWD BWA IWH IWD IWA LBH ... VCH \\\n", 423 | "0 1.36 4.75 8.50 1.36 4.60 9.0 1.37 4.6 7.5 1.36 ... 1.36 \n", 424 | "1 1.60 4.20 5.00 1.62 3.60 5.5 1.65 3.4 4.4 1.62 ... 1.62 \n", 425 | "2 1.17 6.50 19.00 1.16 7.50 12.0 1.17 6.5 11.5 1.17 ... 1.17 \n", 426 | "3 2.10 3.25 3.75 2.00 3.25 3.8 2.20 3.2 3.0 2.10 ... 2.10 \n", 427 | "4 1.83 3.60 4.33 1.75 3.75 4.1 1.95 3.4 3.5 1.80 ... 1.83 \n", 428 | "\n", 429 | " VCD VCA GBH GBD GBA lr_h lr_d lr_a target \n", 430 | "0 5.2 9.5 1.36 4.6 9.00 1.007387 3.254611 4.998085 1 \n", 431 | "1 3.8 5.4 1.62 3.6 5.00 1.342904 2.433148 2.903981 1 \n", 432 | "2 8.0 21.0 1.15 7.0 15.00 0.867808 5.222123 6.402909 0 \n", 433 | "3 3.3 3.8 2.00 3.2 3.75 1.600282 2.201756 2.375706 2 \n", 434 | "4 3.8 4.5 1.85 3.3 4.33 1.384490 2.369909 2.810923 0 \n", 435 | "\n", 436 | "[5 rows x 28 columns]" 437 | ] 438 | }, 439 | "execution_count": 8, 440 | "metadata": {}, 441 | "output_type": "execute_result" 442 | } 443 | ], 444 | "source": [ 445 | "# transform averaged level 2 predictions\n", 446 | "predictions = 1 / (preds_xgb + preds / 2)\n", 447 | "\n", 448 | "# concat to backtest data\n", 449 | "x_bt = pd.concat([pd.DataFrame(odds), pd.DataFrame(predictions), pd.Series(y_test)], axis=1)\n", 450 | "col = cols[1:25] + cols[-9:-6] + ['target']\n", 451 | "x_bt.columns = col\n", 452 | "\n", 453 | "x_bt.head()" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 9, 459 | "metadata": { 460 | "collapsed": true 461 | }, 462 | "outputs": [], 463 | "source": [ 464 | "x_bt.to_csv('../../data/stack_backtest.csv', index=False)" 465 | ] 466 | } 467 | ], 468 | "metadata": { 469 | "kernelspec": { 470 | "display_name": "Python 3", 471 | "language": "python", 472 | "name": "python3" 473 | }, 474 | "language_info": { 475 | "codemirror_mode": { 476 | "name": "ipython", 477 | "version": 3 478 | }, 479 | "file_extension": ".py", 480 | "mimetype": "text/x-python", 481 | "name": "python", 482 | "nbconvert_exporter": "python", 483 | "pygments_lexer": "ipython3", 484 | "version": "3.5.3" 485 | } 486 | }, 487 | "nbformat": 4, 488 | "nbformat_minor": 2 489 | } 490 | --------------------------------------------------------------------------------