├── CNN_LSTM.ipynb ├── README.md ├── XGB+CNN.ipynb ├── requirements.txt └── xgb.ipynb /CNN_LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "from __future__ import print_function\n", 18 | "import numpy as np\n", 19 | "from keras.preprocessing import sequence\n", 20 | "from keras.models import Sequential\n", 21 | "from keras.layers import Dense, Dropout, Activation\n", 22 | "from keras.layers import Embedding\n", 23 | "from keras.layers import LSTM\n", 24 | "from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D, GlobalAveragePooling1D\n", 25 | "from keras.datasets import imdb\n", 26 | "from keras.models import load_model\n", 27 | "\n", 28 | "import pandas as pd\n", 29 | "from sklearn.preprocessing import minmax_scale\n", 30 | "from sklearn.model_selection import train_test_split" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# Parameters\n", 40 | "max_features = 1024\n", 41 | "batch_size = 64\n", 42 | "epochs = 20" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# load training data\n", 52 | "train = pd.read_csv('data/train.csv', header=None, sep=',', usecols = list(range(0,max_features)))\n", 53 | "label = pd.read_csv(\"data/train_label.csv\", sep=',')" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "X = train.fillna(0)\n", 63 | "\n", 64 | "X = X.iloc[:, :]\n", 65 | "y = label.iloc[:, -1]\n", 66 | "\n", 67 | "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 5, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "x_train shape: (107954, 1024)\n", 80 | "x_test shape: (5682, 1024)\n", 81 | "y_train shape: (107954,)\n", 82 | "y_test shape: (5682,)\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "print('x_train shape:', x_train.shape)\n", 88 | "print('x_test shape:', x_test.shape)\n", 89 | "print('y_train shape:', y_train.shape)\n", 90 | "print('y_test shape:', y_test.shape)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 6, 96 | "metadata": { 97 | "scrolled": false 98 | }, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "_________________________________________________________________\n", 105 | "Layer (type) Output Shape Param # \n", 106 | "=================================================================\n", 107 | "embedding_1 (Embedding) (None, None, 64) 65536 \n", 108 | "_________________________________________________________________\n", 109 | "conv1d_1 (Conv1D) (None, None, 64) 12352 \n", 110 | "_________________________________________________________________\n", 111 | "conv1d_2 (Conv1D) (None, None, 64) 12352 \n", 112 | "_________________________________________________________________\n", 113 | "average_pooling1d_1 (Average (None, None, 64) 0 \n", 114 | "_________________________________________________________________\n", 115 | "conv1d_3 (Conv1D) (None, None, 128) 24704 \n", 116 | "_________________________________________________________________\n", 117 | "conv1d_4 (Conv1D) (None, None, 128) 49280 \n", 118 | "_________________________________________________________________\n", 119 | "average_pooling1d_2 (Average (None, None, 128) 0 \n", 120 | "_________________________________________________________________\n", 121 | "conv1d_5 (Conv1D) (None, None, 256) 98560 \n", 122 | "_________________________________________________________________\n", 123 | "conv1d_6 (Conv1D) (None, None, 256) 196864 \n", 124 | "_________________________________________________________________\n", 125 | "average_pooling1d_3 (Average (None, None, 256) 0 \n", 126 | "_________________________________________________________________\n", 127 | "conv1d_7 (Conv1D) (None, None, 512) 393728 \n", 128 | "_________________________________________________________________\n", 129 | "conv1d_8 (Conv1D) (None, None, 512) 786944 \n", 130 | "_________________________________________________________________\n", 131 | "global_average_pooling1d_1 ( (None, 512) 0 \n", 132 | "_________________________________________________________________\n", 133 | "dropout_1 (Dropout) (None, 512) 0 \n", 134 | "_________________________________________________________________\n", 135 | "dense_1 (Dense) (None, 1) 513 \n", 136 | "=================================================================\n", 137 | "Total params: 1,640,833\n", 138 | "Trainable params: 1,640,833\n", 139 | "Non-trainable params: 0\n", 140 | "_________________________________________________________________\n", 141 | "Train on 107954 samples, validate on 5682 samples\n", 142 | "Epoch 1/40\n", 143 | "107954/107954 [==============================] - 112s 1ms/step - loss: 0.2387 - acc: 0.8947 - val_loss: 0.1407 - val_acc: 0.9483\n", 144 | "Epoch 2/40\n", 145 | "107954/107954 [==============================] - 111s 1ms/step - loss: 0.1324 - acc: 0.9499 - val_loss: 0.1272 - val_acc: 0.9535\n", 146 | "Epoch 3/40\n", 147 | "107954/107954 [==============================] - 113s 1ms/step - loss: 0.1106 - acc: 0.9599 - val_loss: 0.0979 - val_acc: 0.9652\n", 148 | "Epoch 4/40\n", 149 | "107954/107954 [==============================] - 114s 1ms/step - loss: 0.0991 - acc: 0.9642 - val_loss: 0.0949 - val_acc: 0.9660\n", 150 | "Epoch 5/40\n", 151 | "107954/107954 [==============================] - 115s 1ms/step - loss: 0.0901 - acc: 0.9679 - val_loss: 0.0875 - val_acc: 0.9669\n", 152 | "Epoch 6/40\n", 153 | "107954/107954 [==============================] - 115s 1ms/step - loss: 0.0814 - acc: 0.9712 - val_loss: 0.0900 - val_acc: 0.9722\n", 154 | "Epoch 7/40\n", 155 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0751 - acc: 0.9737 - val_loss: 0.0813 - val_acc: 0.9725\n", 156 | "Epoch 8/40\n", 157 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0684 - acc: 0.9759 - val_loss: 0.0843 - val_acc: 0.9732\n", 158 | "Epoch 9/40\n", 159 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0626 - acc: 0.9784 - val_loss: 0.0789 - val_acc: 0.9715\n", 160 | "Epoch 10/40\n", 161 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0558 - acc: 0.9809 - val_loss: 0.0778 - val_acc: 0.9748\n", 162 | "Epoch 11/40\n", 163 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0527 - acc: 0.9819 - val_loss: 0.0795 - val_acc: 0.9747\n", 164 | "Epoch 12/40\n", 165 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0490 - acc: 0.9833 - val_loss: 0.0727 - val_acc: 0.9752\n", 166 | "Epoch 13/40\n", 167 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0429 - acc: 0.9857 - val_loss: 0.0832 - val_acc: 0.9741\n", 168 | "Epoch 14/40\n", 169 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0398 - acc: 0.9865 - val_loss: 0.0871 - val_acc: 0.9743\n", 170 | "Epoch 15/40\n", 171 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0364 - acc: 0.9878 - val_loss: 0.0914 - val_acc: 0.9743\n", 172 | "Epoch 16/40\n", 173 | "107954/107954 [==============================] - 115s 1ms/step - loss: 0.0352 - acc: 0.9880 - val_loss: 0.0958 - val_acc: 0.9720\n", 174 | "Epoch 17/40\n", 175 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0314 - acc: 0.9896 - val_loss: 0.0871 - val_acc: 0.9738\n", 176 | "Epoch 18/40\n", 177 | "107954/107954 [==============================] - 115s 1ms/step - loss: 0.0291 - acc: 0.9907 - val_loss: 0.0932 - val_acc: 0.9748\n", 178 | "Epoch 19/40\n", 179 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0280 - acc: 0.9907 - val_loss: 0.0887 - val_acc: 0.9787\n", 180 | "Epoch 20/40\n", 181 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0251 - acc: 0.9915 - val_loss: 0.1049 - val_acc: 0.9732\n", 182 | "Epoch 21/40\n", 183 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0237 - acc: 0.9923 - val_loss: 0.1048 - val_acc: 0.9757\n", 184 | "Epoch 22/40\n", 185 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0232 - acc: 0.9921 - val_loss: 0.0936 - val_acc: 0.9724\n", 186 | "Epoch 23/40\n", 187 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0216 - acc: 0.9930 - val_loss: 0.1081 - val_acc: 0.9752\n", 188 | "Epoch 24/40\n", 189 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0210 - acc: 0.9931 - val_loss: 0.0965 - val_acc: 0.9764\n", 190 | "Epoch 25/40\n", 191 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0203 - acc: 0.9933 - val_loss: 0.1139 - val_acc: 0.9755\n", 192 | "Epoch 26/40\n", 193 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0196 - acc: 0.9938 - val_loss: 0.1076 - val_acc: 0.9743\n", 194 | "Epoch 27/40\n", 195 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0189 - acc: 0.9936 - val_loss: 0.1128 - val_acc: 0.9761\n", 196 | "Epoch 28/40\n", 197 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0189 - acc: 0.9937 - val_loss: 0.1015 - val_acc: 0.9759\n", 198 | "Epoch 29/40\n", 199 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0171 - acc: 0.9945 - val_loss: 0.1111 - val_acc: 0.9734\n", 200 | "Epoch 30/40\n", 201 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0181 - acc: 0.9941 - val_loss: 0.1020 - val_acc: 0.9766\n", 202 | "Epoch 31/40\n", 203 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0149 - acc: 0.9951 - val_loss: 0.1151 - val_acc: 0.9773\n", 204 | "Epoch 32/40\n", 205 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0169 - acc: 0.9946 - val_loss: 0.1029 - val_acc: 0.9775\n", 206 | "Epoch 33/40\n", 207 | "107954/107954 [==============================] - 116s 1ms/step - loss: 0.0150 - acc: 0.9952 - val_loss: 0.1188 - val_acc: 0.9738\n", 208 | "Epoch 34/40\n", 209 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0159 - acc: 0.9946 - val_loss: 0.1243 - val_acc: 0.9724\n", 210 | "Epoch 35/40\n", 211 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0137 - acc: 0.9956 - val_loss: 0.1080 - val_acc: 0.9769\n", 212 | "Epoch 36/40\n", 213 | "107954/107954 [==============================] - 117s 1ms/step - loss: 0.0149 - acc: 0.9953 - val_loss: 0.1153 - val_acc: 0.9766\n", 214 | "Epoch 37/40\n", 215 | "107954/107954 [==============================] - 118s 1ms/step - loss: 0.0142 - acc: 0.9956 - val_loss: 0.1215 - val_acc: 0.9748\n", 216 | "Epoch 38/40\n", 217 | "107954/107954 [==============================] - 118s 1ms/step - loss: 0.0141 - acc: 0.9957 - val_loss: 0.1204 - val_acc: 0.9731\n", 218 | "Epoch 39/40\n", 219 | "107954/107954 [==============================] - 118s 1ms/step - loss: 0.0132 - acc: 0.9960 - val_loss: 0.1337 - val_acc: 0.9741\n", 220 | "Epoch 40/40\n", 221 | "107954/107954 [==============================] - 118s 1ms/step - loss: 0.0131 - acc: 0.9958 - val_loss: 0.1344 - val_acc: 0.9754\n" 222 | ] 223 | }, 224 | { 225 | "data": { 226 | "text/plain": [ 227 | "" 228 | ] 229 | }, 230 | "execution_count": 6, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "# VGG current best\n", 237 | "#del model\n", 238 | "model = Sequential()\n", 239 | "model.add(Embedding(max_features, 64)) #128:no\n", 240 | "model.add(Conv1D(64, 3, activation='relu', strides=1, padding='valid'))\n", 241 | "model.add(Conv1D(64, 3, activation='relu'))\n", 242 | "model.add(AveragePooling1D(3))\n", 243 | "model.add(Conv1D(128, 3, activation='relu')) \n", 244 | "model.add(Conv1D(128, 3, activation='relu'))\n", 245 | "model.add(AveragePooling1D(3))\n", 246 | "model.add(Conv1D(256, 3, activation='relu'))\n", 247 | "model.add(Conv1D(256, 3, activation='relu'))\n", 248 | "model.add(AveragePooling1D(3))\n", 249 | "model.add(Conv1D(512, 3, activation='relu'))\n", 250 | "model.add(Conv1D(512, 3, activation='relu'))\n", 251 | "model.add(GlobalAveragePooling1D())\n", 252 | "model.add(Dropout(0.2))\n", 253 | "model.add(Dense(1, activation='sigmoid')) # tanh:no \n", 254 | "\n", 255 | "model.compile(loss='binary_crossentropy',\n", 256 | " optimizer='adam',\n", 257 | " metrics=['accuracy'])\n", 258 | "model.summary()\n", 259 | "\n", 260 | "model.fit(x_train, y_train, batch_size=batch_size, epochs=40, verbose=1, validation_data=(x_test, y_test))" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 10, 266 | "metadata": { 267 | "collapsed": true 268 | }, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "_________________________________________________________________\n", 275 | "Layer (type) Output Shape Param # \n", 276 | "=================================================================\n", 277 | "embedding_2 (Embedding) (None, None, 64) 65536 \n", 278 | "_________________________________________________________________\n", 279 | "conv1d_9 (Conv1D) (None, None, 64) 12352 \n", 280 | "_________________________________________________________________\n", 281 | "conv1d_10 (Conv1D) (None, None, 64) 12352 \n", 282 | "_________________________________________________________________\n", 283 | "average_pooling1d_1 (Average (None, None, 64) 0 \n", 284 | "_________________________________________________________________\n", 285 | "conv1d_11 (Conv1D) (None, None, 128) 24704 \n", 286 | "_________________________________________________________________\n", 287 | "conv1d_12 (Conv1D) (None, None, 128) 49280 \n", 288 | "_________________________________________________________________\n", 289 | "average_pooling1d_2 (Average (None, None, 128) 0 \n", 290 | "_________________________________________________________________\n", 291 | "conv1d_13 (Conv1D) (None, None, 256) 98560 \n", 292 | "_________________________________________________________________\n", 293 | "conv1d_14 (Conv1D) (None, None, 256) 196864 \n", 294 | "_________________________________________________________________\n", 295 | "average_pooling1d_3 (Average (None, None, 256) 0 \n", 296 | "_________________________________________________________________\n", 297 | "conv1d_15 (Conv1D) (None, None, 512) 393728 \n", 298 | "_________________________________________________________________\n", 299 | "conv1d_16 (Conv1D) (None, None, 512) 786944 \n", 300 | "_________________________________________________________________\n", 301 | "conv1d_17 (Conv1D) (None, None, 512) 786944 \n", 302 | "_________________________________________________________________\n", 303 | "global_average_pooling1d_2 ( (None, 512) 0 \n", 304 | "_________________________________________________________________\n", 305 | "dropout_2 (Dropout) (None, 512) 0 \n", 306 | "_________________________________________________________________\n", 307 | "dense_2 (Dense) (None, 1) 513 \n", 308 | "=================================================================\n", 309 | "Total params: 2,427,777\n", 310 | "Trainable params: 2,427,777\n", 311 | "Non-trainable params: 0\n", 312 | "_________________________________________________________________\n", 313 | "Train on 107954 samples, validate on 5682 samples\n", 314 | "Epoch 1/40\n", 315 | "107954/107954 [==============================] - 123s 1ms/step - loss: 0.2772 - acc: 0.8656 - val_loss: 0.1340 - val_acc: 0.9476\n", 316 | "Epoch 2/40\n", 317 | "107954/107954 [==============================] - 125s 1ms/step - loss: 0.1394 - acc: 0.9473 - val_loss: 0.1417 - val_acc: 0.9493\n", 318 | "Epoch 3/40\n", 319 | "107954/107954 [==============================] - 127s 1ms/step - loss: 0.1174 - acc: 0.9570 - val_loss: 0.1152 - val_acc: 0.9585\n", 320 | "Epoch 4/40\n", 321 | "107954/107954 [==============================] - 127s 1ms/step - loss: 0.1041 - acc: 0.9624 - val_loss: 0.1171 - val_acc: 0.9599\n", 322 | "Epoch 5/40\n", 323 | "107954/107954 [==============================] - 127s 1ms/step - loss: 0.0947 - acc: 0.9666 - val_loss: 0.1176 - val_acc: 0.9602\n", 324 | "Epoch 6/40\n", 325 | "107954/107954 [==============================] - 127s 1ms/step - loss: 0.0857 - acc: 0.9699 - val_loss: 0.0898 - val_acc: 0.9694\n", 326 | "Epoch 7/40\n", 327 | "107954/107954 [==============================] - 127s 1ms/step - loss: 0.0787 - acc: 0.9728 - val_loss: 0.0857 - val_acc: 0.9706\n", 328 | "Epoch 8/40\n", 329 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0717 - acc: 0.9752 - val_loss: 0.0891 - val_acc: 0.9671\n", 330 | "Epoch 9/40\n", 331 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0656 - acc: 0.9772 - val_loss: 0.0808 - val_acc: 0.9715\n", 332 | "Epoch 10/40\n", 333 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0609 - acc: 0.9791 - val_loss: 0.0833 - val_acc: 0.9699\n", 334 | "Epoch 11/40\n", 335 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0563 - acc: 0.9807 - val_loss: 0.0927 - val_acc: 0.9699\n", 336 | "Epoch 12/40\n", 337 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0504 - acc: 0.9829 - val_loss: 0.0856 - val_acc: 0.9715\n", 338 | "Epoch 13/40\n", 339 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0474 - acc: 0.9839 - val_loss: 0.0834 - val_acc: 0.9720\n", 340 | "Epoch 14/40\n", 341 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0426 - acc: 0.9855 - val_loss: 0.0799 - val_acc: 0.9725\n", 342 | "Epoch 15/40\n", 343 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0407 - acc: 0.9861 - val_loss: 0.0867 - val_acc: 0.9732\n", 344 | "Epoch 16/40\n", 345 | "107954/107954 [==============================] - 130s 1ms/step - loss: 0.0371 - acc: 0.9876 - val_loss: 0.0823 - val_acc: 0.9734\n", 346 | "Epoch 17/40\n", 347 | "107954/107954 [==============================] - 131s 1ms/step - loss: 0.0351 - acc: 0.9885 - val_loss: 0.1096 - val_acc: 0.9694\n", 348 | "Epoch 18/40\n", 349 | "107954/107954 [==============================] - 131s 1ms/step - loss: 0.0331 - acc: 0.9889 - val_loss: 0.0980 - val_acc: 0.9729\n", 350 | "Epoch 19/40\n", 351 | "107954/107954 [==============================] - 131s 1ms/step - loss: 0.0299 - acc: 0.9902 - val_loss: 0.0858 - val_acc: 0.9752\n", 352 | "Epoch 20/40\n", 353 | "107954/107954 [==============================] - 130s 1ms/step - loss: 0.0283 - acc: 0.9903 - val_loss: 0.0896 - val_acc: 0.9754\n", 354 | "Epoch 21/40\n", 355 | "107954/107954 [==============================] - 131s 1ms/step - loss: 0.0270 - acc: 0.9912 - val_loss: 0.1069 - val_acc: 0.9715\n", 356 | "Epoch 22/40\n", 357 | "107954/107954 [==============================] - 131s 1ms/step - loss: 0.0252 - acc: 0.9915 - val_loss: 0.1134 - val_acc: 0.9727\n", 358 | "Epoch 23/40\n", 359 | "107954/107954 [==============================] - 131s 1ms/step - loss: 0.0238 - acc: 0.9923 - val_loss: 0.0982 - val_acc: 0.9766\n", 360 | "Epoch 24/40\n", 361 | "107954/107954 [==============================] - 130s 1ms/step - loss: 0.0225 - acc: 0.9928 - val_loss: 0.0994 - val_acc: 0.9732\n", 362 | "Epoch 25/40\n", 363 | "107954/107954 [==============================] - 130s 1ms/step - loss: 0.0207 - acc: 0.9933 - val_loss: 0.1037 - val_acc: 0.9740\n", 364 | "Epoch 26/40\n", 365 | "107954/107954 [==============================] - 130s 1ms/step - loss: 0.0212 - acc: 0.9930 - val_loss: 0.1026 - val_acc: 0.9729\n", 366 | "Epoch 27/40\n", 367 | "107954/107954 [==============================] - 130s 1ms/step - loss: 0.0197 - acc: 0.9937 - val_loss: 0.0839 - val_acc: 0.9759\n", 368 | "Epoch 28/40\n", 369 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0188 - acc: 0.9936 - val_loss: 0.1113 - val_acc: 0.9731\n", 370 | "Epoch 29/40\n", 371 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0187 - acc: 0.9941 - val_loss: 0.0949 - val_acc: 0.9747\n", 372 | "Epoch 30/40\n", 373 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0170 - acc: 0.9948 - val_loss: 0.0790 - val_acc: 0.9768\n", 374 | "Epoch 31/40\n", 375 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0168 - acc: 0.9948 - val_loss: 0.0909 - val_acc: 0.9752\n", 376 | "Epoch 32/40\n", 377 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0167 - acc: 0.9946 - val_loss: 0.1032 - val_acc: 0.9755\n", 378 | "Epoch 33/40\n", 379 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0166 - acc: 0.9949 - val_loss: 0.1187 - val_acc: 0.9748\n", 380 | "Epoch 34/40\n", 381 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0156 - acc: 0.9950 - val_loss: 0.1219 - val_acc: 0.9743\n", 382 | "Epoch 35/40\n", 383 | "107954/107954 [==============================] - 128s 1ms/step - loss: 0.0151 - acc: 0.9951 - val_loss: 0.1080 - val_acc: 0.9738\n", 384 | "Epoch 36/40\n", 385 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0149 - acc: 0.9955 - val_loss: 0.0951 - val_acc: 0.9778\n", 386 | "Epoch 37/40\n", 387 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0137 - acc: 0.9957 - val_loss: 0.1257 - val_acc: 0.9717\n", 388 | "Epoch 38/40\n", 389 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0146 - acc: 0.9953 - val_loss: 0.1072 - val_acc: 0.9736\n", 390 | "Epoch 39/40\n", 391 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0140 - acc: 0.9955 - val_loss: 0.1181 - val_acc: 0.9748\n", 392 | "Epoch 40/40\n", 393 | "107954/107954 [==============================] - 129s 1ms/step - loss: 0.0143 - acc: 0.9954 - val_loss: 0.1055 - val_acc: 0.9757\n" 394 | ] 395 | }, 396 | { 397 | "data": { 398 | "text/plain": [ 399 | "" 400 | ] 401 | }, 402 | "execution_count": 10, 403 | "metadata": {}, 404 | "output_type": "execute_result" 405 | } 406 | ], 407 | "source": [ 408 | "# VGG new trial\n", 409 | "del model\n", 410 | "model = Sequential()\n", 411 | "model.add(Embedding(max_features, 64)) #128:no\n", 412 | "model.add(Conv1D(64, 3, activation='relu', strides=1, padding='valid'))\n", 413 | "model.add(Conv1D(64, 3, activation='relu'))\n", 414 | "model.add(AveragePooling1D(3))\n", 415 | "model.add(Conv1D(128, 3, activation='relu')) \n", 416 | "model.add(Conv1D(128, 3, activation='relu'))\n", 417 | "model.add(AveragePooling1D(3))\n", 418 | "model.add(Conv1D(256, 3, activation='relu'))\n", 419 | "model.add(Conv1D(256, 3, activation='relu'))\n", 420 | "model.add(AveragePooling1D(3))\n", 421 | "model.add(Conv1D(512, 3, activation='relu'))\n", 422 | "model.add(Conv1D(512, 3, activation='relu'))\n", 423 | "model.add(Conv1D(512, 3, activation='relu'))\n", 424 | "model.add(GlobalAveragePooling1D())\n", 425 | "model.add(Dropout(0.2))\n", 426 | "model.add(Dense(1, activation='sigmoid')) # tanh:no \n", 427 | "\n", 428 | "model.compile(loss='binary_crossentropy',\n", 429 | " optimizer='adam',\n", 430 | " metrics=['accuracy'])\n", 431 | "model.summary()\n", 432 | "\n", 433 | "model.fit(x_train, y_train, batch_size=batch_size, epochs=40, verbose=1, validation_data=(x_test, y_test))" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 7, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "name": "stdout", 443 | "output_type": "stream", 444 | "text": [ 445 | "Train on 107954 samples, validate on 5682 samples\n", 446 | "Epoch 1/1\n", 447 | "107954/107954 [==============================] - 120s 1ms/step - loss: 0.0128 - acc: 0.9962 - val_loss: 0.1066 - val_acc: 0.9776\n" 448 | ] 449 | }, 450 | { 451 | "data": { 452 | "text/plain": [ 453 | "" 454 | ] 455 | }, 456 | "execution_count": 7, 457 | "metadata": {}, 458 | "output_type": "execute_result" 459 | } 460 | ], 461 | "source": [ 462 | "# model.save('model/CNN_LSTM_model.h5')\n", 463 | "# del model\n", 464 | "# model = load_model('model/CNN_LSTM_model.h5')\n", 465 | "model.fit(x_train, y_train, batch_size=batch_size, epochs=1, verbose=1, validation_data=(x_test, y_test))\n", 466 | "#print(model.metrics_names)\n", 467 | "#score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)\n", 468 | "#print('Test score:', score)\n", 469 | "#print('Test accuracy:', acc)" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 7, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [ 478 | "test = pd.read_csv('data/test.csv', header=None, names = list(range(0,max_features)))\n", 479 | "test = test.fillna(0)\n", 480 | "\n", 481 | "test = test.iloc[:, :]" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 9, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "# fit model on test\n", 491 | "y_pred = model.predict(test, batch_size=batch_size)\n", 492 | "df = pd.DataFrame(y_pred)\n", 493 | "df.to_csv(\"data/CNN_prob.csv\")" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [ 502 | "# embedding 64 : 0.953537486884343\n", 503 | "# embedding 128 : 0.9592\n", 504 | "# VGG V1 - 512 : 0.9740\n", 505 | "# VGG V1 - 1024: 0.9771\n", 506 | "# VGG V1 - 1024 - maxpooling: 0.9776" 507 | ] 508 | } 509 | ], 510 | "metadata": { 511 | "kernelspec": { 512 | "display_name": "Python 3", 513 | "language": "python", 514 | "name": "python3" 515 | }, 516 | "language_info": { 517 | "codemirror_mode": { 518 | "name": "ipython", 519 | "version": 3 520 | }, 521 | "file_extension": ".py", 522 | "mimetype": "text/x-python", 523 | "name": "python", 524 | "nbconvert_exporter": "python", 525 | "pygments_lexer": "ipython3", 526 | "version": "3.5.5" 527 | } 528 | }, 529 | "nbformat": 4, 530 | "nbformat_minor": 2 531 | } 532 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Malware-Detection 2 | XGBoost + CNN to detect malware using PE header 3 | This is the repository of a project to detect malware using dataset consisting of 110k+ binary files extracted from PE header of exe files. 4 | -------------------------------------------------------------------------------- /XGB+CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from __future__ import print_function\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "import pickle\n", 13 | "from sklearn.preprocessing import minmax_scale\n", 14 | "from keras.models import load_model\n", 15 | "\n", 16 | "# parameters\n", 17 | "max_features = 1024\n", 18 | "batch_size = 64\n", 19 | "\n", 20 | "# load models\n", 21 | "cnn_model = load_model('CNN_model.h5')\n", 22 | "xgb_model = pickle.load(open(\"xgb.model.dat\", \"rb\"))\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# fit cnn model\n", 32 | "test = pd.read_csv('data/test.csv', header=None, names = list(range(0,max_features)))\n", 33 | "\n", 34 | "# cnn_test = test.fillna(0)\n", 35 | "# cnn_test = cnn_test.iloc[:, :]\n", 36 | "\n", 37 | "# cnn_pred = cnn_model.predict(cnn_test, batch_size=batch_size)\n", 38 | "# cnn_df = pd.DataFrame(cnn_pred)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# fit xgboost model\n", 48 | "test_copy = test\n", 49 | "number_of_nan = test_copy.isnull().sum(axis=1)\n", 50 | "test_copy = test_copy.fillna(0)\n", 51 | "test_copy = test_copy.astype(int)\n", 52 | "number_valid = max_features - number_of_nan\n", 53 | "test_describe = test_copy.apply(pd.DataFrame.describe, axis=1)\n", 54 | "test_copy = pd.concat([test_copy, number_of_nan, number_valid, test_describe], axis=1)\n", 55 | "test_copy.columns.values[1024] = \"number_of_nan\"\n", 56 | "test_copy.columns.values[1025] = \"number_valid\"\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "submission_df = pd.read_csv('data/sample_submission.csv')" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 11, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "cnn_df = pd.read_csv('data/sample_submission_cnn.csv')" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 12, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "xgb_pred = xgb_model.predict_proba(test_copy,)[:, 1]\n", 84 | "xgb_df = pd.DataFrame(xgb_pred)\n", 85 | "\n", 86 | "# Ensemble\n", 87 | "probs = 0.5*cnn_df.iloc[:,1] + 0.5*xgb_df.iloc[:,0]\n", 88 | "\n", 89 | "ensemble = pd.DataFrame({\n", 90 | " 'sample_id': submission_df.iloc[:,0],\n", 91 | " 'malware': probs\n", 92 | "}, columns=['sample_id', 'malware'])\n", 93 | "\n", 94 | "ensemble.to_csv(\"data/sample_submission_ensemble.csv\", index=False)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "Python 3", 108 | "language": "python", 109 | "name": "python3" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.5.5" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.12.1 2 | Keras==2.1.5 3 | scipy==1.0.0 4 | pandas==0.22.0 5 | scikit_learn==0.19.1 6 | xgboost==0.60 7 | pickle==0.7.4 8 | tensorflow==1.0.1 9 | -------------------------------------------------------------------------------- /xgb.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | "C:\\Anaconda3\\envs\\tensorflow\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 15 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n", 16 | "C:\\Anaconda3\\envs\\tensorflow\\lib\\site-packages\\sklearn\\grid_search.py:42: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. This module will be removed in 0.20.\n", 17 | " DeprecationWarning)\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import scipy as sp\n", 24 | "import pandas as pd\n", 25 | "import sklearn as skl\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import xgboost as xgb\n", 28 | "import pickle\n", 29 | "from xgboost.sklearn import XGBClassifier\n", 30 | "from sklearn import cross_validation, metrics #Additional scklearn functions\n", 31 | "from sklearn.grid_search import GridSearchCV #Perforing grid search\n", 32 | "from sklearn.cross_validation import train_test_split, cross_val_score, KFold\n", 33 | "from sklearn.metrics import log_loss, confusion_matrix, accuracy_score\n", 34 | "from sklearn.preprocessing import minmax_scale\n" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# load training data\n", 44 | "max_features = 1024\n", 45 | "train = pd.read_csv('data/train.csv', header=None, sep=',', usecols = list(range(0,max_features)))\n", 46 | "label = pd.read_csv(\"data/train_label.csv\", sep=',')" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "X = train\n", 56 | "number_of_nan = X.isnull().sum(axis=1)\n", 57 | "X = X.fillna(0)\n", 58 | "X = X.astype(int)\n", 59 | "number_valid = max_features - number_of_nan\n", 60 | "X_describe = X.apply(pd.DataFrame.describe, axis=1)\n", 61 | "X = pd.concat([X, number_of_nan, number_valid, X_describe], axis=1)\n", 62 | "X.columns.values[1024] = \"number_of_nan\"\n", 63 | "X.columns.values[1025] = \"number_valid\"" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "y = label.iloc[:, -1]\n", 73 | "\n", 74 | "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 11, 80 | "metadata": { 81 | "scrolled": true 82 | }, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "[0]\tvalidation_0-logloss:0.626909\n", 89 | "Will train until validation_0-logloss hasn't improved in 100 rounds.\n", 90 | "[1]\tvalidation_0-logloss:0.57162\n", 91 | "[2]\tvalidation_0-logloss:0.525737\n", 92 | "[3]\tvalidation_0-logloss:0.48707\n", 93 | "[4]\tvalidation_0-logloss:0.453427\n", 94 | "[5]\tvalidation_0-logloss:0.420634\n", 95 | "[6]\tvalidation_0-logloss:0.38918\n", 96 | "[7]\tvalidation_0-logloss:0.366433\n", 97 | "[8]\tvalidation_0-logloss:0.341959\n", 98 | "[9]\tvalidation_0-logloss:0.319798\n", 99 | "[10]\tvalidation_0-logloss:0.300399\n", 100 | "[11]\tvalidation_0-logloss:0.284087\n", 101 | "[12]\tvalidation_0-logloss:0.269005\n", 102 | "[13]\tvalidation_0-logloss:0.255484\n", 103 | "[14]\tvalidation_0-logloss:0.243773\n", 104 | "[15]\tvalidation_0-logloss:0.232777\n", 105 | "[16]\tvalidation_0-logloss:0.223419\n", 106 | "[17]\tvalidation_0-logloss:0.214943\n", 107 | "[18]\tvalidation_0-logloss:0.207696\n", 108 | "[19]\tvalidation_0-logloss:0.199876\n", 109 | "[20]\tvalidation_0-logloss:0.19351\n", 110 | "[21]\tvalidation_0-logloss:0.18786\n", 111 | "[22]\tvalidation_0-logloss:0.182258\n", 112 | "[23]\tvalidation_0-logloss:0.178127\n", 113 | "[24]\tvalidation_0-logloss:0.172549\n", 114 | "[25]\tvalidation_0-logloss:0.169023\n", 115 | "[26]\tvalidation_0-logloss:0.164423\n", 116 | "[27]\tvalidation_0-logloss:0.160208\n", 117 | "[28]\tvalidation_0-logloss:0.157057\n", 118 | "[29]\tvalidation_0-logloss:0.153332\n", 119 | "[30]\tvalidation_0-logloss:0.150322\n", 120 | "[31]\tvalidation_0-logloss:0.148037\n", 121 | "[32]\tvalidation_0-logloss:0.146284\n", 122 | "[33]\tvalidation_0-logloss:0.142861\n", 123 | "[34]\tvalidation_0-logloss:0.141188\n", 124 | "[35]\tvalidation_0-logloss:0.138363\n", 125 | "[36]\tvalidation_0-logloss:0.136485\n", 126 | "[37]\tvalidation_0-logloss:0.134169\n", 127 | "[38]\tvalidation_0-logloss:0.133417\n", 128 | "[39]\tvalidation_0-logloss:0.13082\n", 129 | "[40]\tvalidation_0-logloss:0.129569\n", 130 | "[41]\tvalidation_0-logloss:0.128296\n", 131 | "[42]\tvalidation_0-logloss:0.126491\n", 132 | "[43]\tvalidation_0-logloss:0.125229\n", 133 | "[44]\tvalidation_0-logloss:0.122846\n", 134 | "[45]\tvalidation_0-logloss:0.121522\n", 135 | "[46]\tvalidation_0-logloss:0.121007\n", 136 | "[47]\tvalidation_0-logloss:0.120182\n", 137 | "[48]\tvalidation_0-logloss:0.119535\n", 138 | "[49]\tvalidation_0-logloss:0.118456\n", 139 | "[50]\tvalidation_0-logloss:0.117228\n", 140 | "[51]\tvalidation_0-logloss:0.115873\n", 141 | "[52]\tvalidation_0-logloss:0.114542\n", 142 | "[53]\tvalidation_0-logloss:0.114219\n", 143 | "[54]\tvalidation_0-logloss:0.113627\n", 144 | "[55]\tvalidation_0-logloss:0.112969\n", 145 | "[56]\tvalidation_0-logloss:0.112343\n", 146 | "[57]\tvalidation_0-logloss:0.111842\n", 147 | "[58]\tvalidation_0-logloss:0.111364\n", 148 | "[59]\tvalidation_0-logloss:0.110819\n", 149 | "[60]\tvalidation_0-logloss:0.110583\n", 150 | "[61]\tvalidation_0-logloss:0.110044\n", 151 | "[62]\tvalidation_0-logloss:0.109248\n", 152 | "[63]\tvalidation_0-logloss:0.108585\n", 153 | "[64]\tvalidation_0-logloss:0.107083\n", 154 | "[65]\tvalidation_0-logloss:0.106516\n", 155 | "[66]\tvalidation_0-logloss:0.106264\n", 156 | "[67]\tvalidation_0-logloss:0.105676\n", 157 | "[68]\tvalidation_0-logloss:0.104934\n", 158 | "[69]\tvalidation_0-logloss:0.104234\n", 159 | "[70]\tvalidation_0-logloss:0.103694\n", 160 | "[71]\tvalidation_0-logloss:0.103444\n", 161 | "[72]\tvalidation_0-logloss:0.10324\n", 162 | "[73]\tvalidation_0-logloss:0.102519\n", 163 | "[74]\tvalidation_0-logloss:0.10193\n", 164 | "[75]\tvalidation_0-logloss:0.101387\n", 165 | "[76]\tvalidation_0-logloss:0.100915\n", 166 | "[77]\tvalidation_0-logloss:0.100264\n", 167 | "[78]\tvalidation_0-logloss:0.099704\n", 168 | "[79]\tvalidation_0-logloss:0.099336\n", 169 | "[80]\tvalidation_0-logloss:0.098777\n", 170 | "[81]\tvalidation_0-logloss:0.098566\n", 171 | "[82]\tvalidation_0-logloss:0.098025\n", 172 | "[83]\tvalidation_0-logloss:0.097759\n", 173 | "[84]\tvalidation_0-logloss:0.097475\n", 174 | "[85]\tvalidation_0-logloss:0.097084\n", 175 | "[86]\tvalidation_0-logloss:0.096921\n", 176 | "[87]\tvalidation_0-logloss:0.096273\n", 177 | "[88]\tvalidation_0-logloss:0.095944\n", 178 | "[89]\tvalidation_0-logloss:0.095366\n", 179 | "[90]\tvalidation_0-logloss:0.094919\n", 180 | "[91]\tvalidation_0-logloss:0.094458\n", 181 | "[92]\tvalidation_0-logloss:0.094183\n", 182 | "[93]\tvalidation_0-logloss:0.09397\n", 183 | "[94]\tvalidation_0-logloss:0.0938\n", 184 | "[95]\tvalidation_0-logloss:0.093517\n", 185 | "[96]\tvalidation_0-logloss:0.093273\n", 186 | "[97]\tvalidation_0-logloss:0.092721\n", 187 | "[98]\tvalidation_0-logloss:0.092356\n", 188 | "[99]\tvalidation_0-logloss:0.092208\n", 189 | "[100]\tvalidation_0-logloss:0.091319\n", 190 | "[101]\tvalidation_0-logloss:0.090723\n", 191 | "[102]\tvalidation_0-logloss:0.090549\n", 192 | "[103]\tvalidation_0-logloss:0.090204\n", 193 | "[104]\tvalidation_0-logloss:0.089576\n", 194 | "[105]\tvalidation_0-logloss:0.0893\n", 195 | "[106]\tvalidation_0-logloss:0.089084\n", 196 | "[107]\tvalidation_0-logloss:0.088869\n", 197 | "[108]\tvalidation_0-logloss:0.088649\n", 198 | "[109]\tvalidation_0-logloss:0.088366\n", 199 | "[110]\tvalidation_0-logloss:0.088174\n", 200 | "[111]\tvalidation_0-logloss:0.087802\n", 201 | "[112]\tvalidation_0-logloss:0.087701\n", 202 | "[113]\tvalidation_0-logloss:0.087099\n", 203 | "[114]\tvalidation_0-logloss:0.086976\n", 204 | "[115]\tvalidation_0-logloss:0.086692\n", 205 | "[116]\tvalidation_0-logloss:0.086579\n", 206 | "[117]\tvalidation_0-logloss:0.086162\n", 207 | "[118]\tvalidation_0-logloss:0.085545\n", 208 | "[119]\tvalidation_0-logloss:0.085389\n", 209 | "[120]\tvalidation_0-logloss:0.085294\n", 210 | "[121]\tvalidation_0-logloss:0.084999\n", 211 | "[122]\tvalidation_0-logloss:0.084798\n", 212 | "[123]\tvalidation_0-logloss:0.084646\n", 213 | "[124]\tvalidation_0-logloss:0.084357\n", 214 | "[125]\tvalidation_0-logloss:0.084219\n", 215 | "[126]\tvalidation_0-logloss:0.08373\n", 216 | "[127]\tvalidation_0-logloss:0.083576\n", 217 | "[128]\tvalidation_0-logloss:0.083408\n", 218 | "[129]\tvalidation_0-logloss:0.083109\n", 219 | "[130]\tvalidation_0-logloss:0.082948\n", 220 | "[131]\tvalidation_0-logloss:0.082758\n", 221 | "[132]\tvalidation_0-logloss:0.082707\n", 222 | "[133]\tvalidation_0-logloss:0.082609\n", 223 | "[134]\tvalidation_0-logloss:0.082327\n", 224 | "[135]\tvalidation_0-logloss:0.082246\n", 225 | "[136]\tvalidation_0-logloss:0.081988\n", 226 | "[137]\tvalidation_0-logloss:0.081909\n", 227 | "[138]\tvalidation_0-logloss:0.08177\n", 228 | "[139]\tvalidation_0-logloss:0.08143\n", 229 | "[140]\tvalidation_0-logloss:0.08134\n", 230 | "[141]\tvalidation_0-logloss:0.0812\n", 231 | "[142]\tvalidation_0-logloss:0.080894\n", 232 | "[143]\tvalidation_0-logloss:0.080762\n", 233 | "[144]\tvalidation_0-logloss:0.080602\n", 234 | "[145]\tvalidation_0-logloss:0.080204\n", 235 | "[146]\tvalidation_0-logloss:0.080107\n", 236 | "[147]\tvalidation_0-logloss:0.079836\n", 237 | "[148]\tvalidation_0-logloss:0.079552\n", 238 | "[149]\tvalidation_0-logloss:0.079254\n", 239 | "[150]\tvalidation_0-logloss:0.079011\n", 240 | "[151]\tvalidation_0-logloss:0.078915\n", 241 | "[152]\tvalidation_0-logloss:0.078813\n", 242 | "[153]\tvalidation_0-logloss:0.078567\n", 243 | "[154]\tvalidation_0-logloss:0.078149\n", 244 | "[155]\tvalidation_0-logloss:0.077971\n", 245 | "[156]\tvalidation_0-logloss:0.077753\n", 246 | "[157]\tvalidation_0-logloss:0.077559\n", 247 | "[158]\tvalidation_0-logloss:0.077491\n", 248 | "[159]\tvalidation_0-logloss:0.07737\n", 249 | "[160]\tvalidation_0-logloss:0.077209\n", 250 | "[161]\tvalidation_0-logloss:0.077105\n", 251 | "[162]\tvalidation_0-logloss:0.076911\n", 252 | "[163]\tvalidation_0-logloss:0.076653\n", 253 | "[164]\tvalidation_0-logloss:0.076559\n", 254 | "[165]\tvalidation_0-logloss:0.076492\n", 255 | "[166]\tvalidation_0-logloss:0.076412\n", 256 | "[167]\tvalidation_0-logloss:0.07639\n", 257 | "[168]\tvalidation_0-logloss:0.076255\n", 258 | "[169]\tvalidation_0-logloss:0.076203\n", 259 | "[170]\tvalidation_0-logloss:0.075618\n", 260 | "[171]\tvalidation_0-logloss:0.075536\n", 261 | "[172]\tvalidation_0-logloss:0.075454\n", 262 | "[173]\tvalidation_0-logloss:0.075254\n", 263 | "[174]\tvalidation_0-logloss:0.075175\n", 264 | "[175]\tvalidation_0-logloss:0.07506\n", 265 | "[176]\tvalidation_0-logloss:0.07479\n", 266 | "[177]\tvalidation_0-logloss:0.074626\n", 267 | "[178]\tvalidation_0-logloss:0.074471\n", 268 | "[179]\tvalidation_0-logloss:0.074295\n", 269 | "[180]\tvalidation_0-logloss:0.074145\n", 270 | "[181]\tvalidation_0-logloss:0.074031\n", 271 | "[182]\tvalidation_0-logloss:0.073958\n", 272 | "[183]\tvalidation_0-logloss:0.073779\n", 273 | "[184]\tvalidation_0-logloss:0.073502\n", 274 | "[185]\tvalidation_0-logloss:0.073446\n", 275 | "[186]\tvalidation_0-logloss:0.07339\n", 276 | "[187]\tvalidation_0-logloss:0.073218\n", 277 | "[188]\tvalidation_0-logloss:0.073104\n", 278 | "[189]\tvalidation_0-logloss:0.073015\n", 279 | "[190]\tvalidation_0-logloss:0.072926\n", 280 | "[191]\tvalidation_0-logloss:0.07272\n", 281 | "[192]\tvalidation_0-logloss:0.072653\n", 282 | "[193]\tvalidation_0-logloss:0.072603\n", 283 | "[194]\tvalidation_0-logloss:0.072503\n", 284 | "[195]\tvalidation_0-logloss:0.072496\n", 285 | "[196]\tvalidation_0-logloss:0.072426\n", 286 | "[197]\tvalidation_0-logloss:0.072331\n", 287 | "[198]\tvalidation_0-logloss:0.072239\n", 288 | "[199]\tvalidation_0-logloss:0.072045\n", 289 | "[200]\tvalidation_0-logloss:0.071734\n", 290 | "[201]\tvalidation_0-logloss:0.071688\n", 291 | "[202]\tvalidation_0-logloss:0.071265\n", 292 | "[203]\tvalidation_0-logloss:0.071092\n", 293 | "[204]\tvalidation_0-logloss:0.070932\n", 294 | "[205]\tvalidation_0-logloss:0.070891\n", 295 | "[206]\tvalidation_0-logloss:0.070739\n", 296 | "[207]\tvalidation_0-logloss:0.070629\n", 297 | "[208]\tvalidation_0-logloss:0.070606\n", 298 | "[209]\tvalidation_0-logloss:0.070438\n", 299 | "[210]\tvalidation_0-logloss:0.070382\n", 300 | "[211]\tvalidation_0-logloss:0.070254\n", 301 | "[212]\tvalidation_0-logloss:0.070041\n", 302 | "[213]\tvalidation_0-logloss:0.070005\n", 303 | "[214]\tvalidation_0-logloss:0.06994\n", 304 | "[215]\tvalidation_0-logloss:0.069814\n", 305 | "[216]\tvalidation_0-logloss:0.069725\n", 306 | "[217]\tvalidation_0-logloss:0.069636\n", 307 | "[218]\tvalidation_0-logloss:0.069139\n", 308 | "[219]\tvalidation_0-logloss:0.069084\n", 309 | "[220]\tvalidation_0-logloss:0.069071\n", 310 | "[221]\tvalidation_0-logloss:0.068929\n", 311 | "[222]\tvalidation_0-logloss:0.068733\n", 312 | "[223]\tvalidation_0-logloss:0.068428\n", 313 | "[224]\tvalidation_0-logloss:0.068384\n", 314 | "[225]\tvalidation_0-logloss:0.06827\n", 315 | "[226]\tvalidation_0-logloss:0.068074\n", 316 | "[227]\tvalidation_0-logloss:0.067944\n", 317 | "[228]\tvalidation_0-logloss:0.067869\n", 318 | "[229]\tvalidation_0-logloss:0.067548\n" 319 | ] 320 | }, 321 | { 322 | "name": "stdout", 323 | "output_type": "stream", 324 | "text": [ 325 | "[230]\tvalidation_0-logloss:0.06753\n", 326 | "[231]\tvalidation_0-logloss:0.067359\n", 327 | "[232]\tvalidation_0-logloss:0.067182\n", 328 | "[233]\tvalidation_0-logloss:0.067147\n", 329 | "[234]\tvalidation_0-logloss:0.06716\n", 330 | "[235]\tvalidation_0-logloss:0.066998\n", 331 | "[236]\tvalidation_0-logloss:0.066893\n", 332 | "[237]\tvalidation_0-logloss:0.066788\n", 333 | "[238]\tvalidation_0-logloss:0.066722\n", 334 | "[239]\tvalidation_0-logloss:0.066689\n", 335 | "[240]\tvalidation_0-logloss:0.066626\n", 336 | "[241]\tvalidation_0-logloss:0.066592\n", 337 | "[242]\tvalidation_0-logloss:0.066564\n", 338 | "[243]\tvalidation_0-logloss:0.066558\n", 339 | "[244]\tvalidation_0-logloss:0.066463\n", 340 | "[245]\tvalidation_0-logloss:0.066388\n", 341 | "[246]\tvalidation_0-logloss:0.066326\n", 342 | "[247]\tvalidation_0-logloss:0.066209\n", 343 | "[248]\tvalidation_0-logloss:0.066204\n", 344 | "[249]\tvalidation_0-logloss:0.066175\n", 345 | "[250]\tvalidation_0-logloss:0.065933\n", 346 | "[251]\tvalidation_0-logloss:0.065877\n", 347 | "[252]\tvalidation_0-logloss:0.065697\n", 348 | "[253]\tvalidation_0-logloss:0.065752\n", 349 | "[254]\tvalidation_0-logloss:0.065682\n", 350 | "[255]\tvalidation_0-logloss:0.065387\n", 351 | "[256]\tvalidation_0-logloss:0.065342\n", 352 | "[257]\tvalidation_0-logloss:0.065192\n", 353 | "[258]\tvalidation_0-logloss:0.065093\n", 354 | "[259]\tvalidation_0-logloss:0.065064\n", 355 | "[260]\tvalidation_0-logloss:0.065059\n", 356 | "[261]\tvalidation_0-logloss:0.064936\n", 357 | "[262]\tvalidation_0-logloss:0.064889\n", 358 | "[263]\tvalidation_0-logloss:0.064754\n", 359 | "[264]\tvalidation_0-logloss:0.064707\n", 360 | "[265]\tvalidation_0-logloss:0.064628\n", 361 | "[266]\tvalidation_0-logloss:0.064576\n", 362 | "[267]\tvalidation_0-logloss:0.064469\n", 363 | "[268]\tvalidation_0-logloss:0.064328\n", 364 | "[269]\tvalidation_0-logloss:0.064258\n", 365 | "[270]\tvalidation_0-logloss:0.064231\n", 366 | "[271]\tvalidation_0-logloss:0.064192\n", 367 | "[272]\tvalidation_0-logloss:0.064082\n", 368 | "[273]\tvalidation_0-logloss:0.064025\n", 369 | "[274]\tvalidation_0-logloss:0.06396\n", 370 | "[275]\tvalidation_0-logloss:0.063882\n", 371 | "[276]\tvalidation_0-logloss:0.063832\n", 372 | "[277]\tvalidation_0-logloss:0.063653\n", 373 | "[278]\tvalidation_0-logloss:0.063626\n", 374 | "[279]\tvalidation_0-logloss:0.063563\n", 375 | "[280]\tvalidation_0-logloss:0.063564\n", 376 | "[281]\tvalidation_0-logloss:0.06356\n", 377 | "[282]\tvalidation_0-logloss:0.063569\n", 378 | "[283]\tvalidation_0-logloss:0.063502\n", 379 | "[284]\tvalidation_0-logloss:0.063461\n", 380 | "[285]\tvalidation_0-logloss:0.063389\n", 381 | "[286]\tvalidation_0-logloss:0.063267\n", 382 | "[287]\tvalidation_0-logloss:0.063228\n", 383 | "[288]\tvalidation_0-logloss:0.063203\n", 384 | "[289]\tvalidation_0-logloss:0.063172\n", 385 | "[290]\tvalidation_0-logloss:0.062977\n", 386 | "[291]\tvalidation_0-logloss:0.062965\n", 387 | "[292]\tvalidation_0-logloss:0.06293\n", 388 | "[293]\tvalidation_0-logloss:0.062649\n", 389 | "[294]\tvalidation_0-logloss:0.06267\n", 390 | "[295]\tvalidation_0-logloss:0.062568\n", 391 | "[296]\tvalidation_0-logloss:0.062618\n", 392 | "[297]\tvalidation_0-logloss:0.062587\n", 393 | "[298]\tvalidation_0-logloss:0.062579\n", 394 | "[299]\tvalidation_0-logloss:0.062494\n", 395 | "[300]\tvalidation_0-logloss:0.062472\n", 396 | "[301]\tvalidation_0-logloss:0.062379\n", 397 | "[302]\tvalidation_0-logloss:0.062396\n", 398 | "[303]\tvalidation_0-logloss:0.062376\n", 399 | "[304]\tvalidation_0-logloss:0.062323\n", 400 | "[305]\tvalidation_0-logloss:0.062236\n", 401 | "[306]\tvalidation_0-logloss:0.062087\n", 402 | "[307]\tvalidation_0-logloss:0.061985\n", 403 | "[308]\tvalidation_0-logloss:0.061952\n", 404 | "[309]\tvalidation_0-logloss:0.061919\n", 405 | "[310]\tvalidation_0-logloss:0.061824\n", 406 | "[311]\tvalidation_0-logloss:0.061827\n", 407 | "[312]\tvalidation_0-logloss:0.061794\n", 408 | "[313]\tvalidation_0-logloss:0.061821\n", 409 | "[314]\tvalidation_0-logloss:0.061851\n", 410 | "[315]\tvalidation_0-logloss:0.061847\n", 411 | "[316]\tvalidation_0-logloss:0.061819\n", 412 | "[317]\tvalidation_0-logloss:0.061775\n", 413 | "[318]\tvalidation_0-logloss:0.06177\n", 414 | "[319]\tvalidation_0-logloss:0.061706\n", 415 | "[320]\tvalidation_0-logloss:0.061602\n", 416 | "[321]\tvalidation_0-logloss:0.061497\n", 417 | "[322]\tvalidation_0-logloss:0.061491\n", 418 | "[323]\tvalidation_0-logloss:0.061202\n", 419 | "[324]\tvalidation_0-logloss:0.061167\n", 420 | "[325]\tvalidation_0-logloss:0.061062\n", 421 | "[326]\tvalidation_0-logloss:0.060971\n", 422 | "[327]\tvalidation_0-logloss:0.060884\n", 423 | "[328]\tvalidation_0-logloss:0.060858\n", 424 | "[329]\tvalidation_0-logloss:0.060867\n", 425 | "[330]\tvalidation_0-logloss:0.06088\n", 426 | "[331]\tvalidation_0-logloss:0.060745\n", 427 | "[332]\tvalidation_0-logloss:0.060744\n", 428 | "[333]\tvalidation_0-logloss:0.06074\n", 429 | "[334]\tvalidation_0-logloss:0.060675\n", 430 | "[335]\tvalidation_0-logloss:0.060613\n", 431 | "[336]\tvalidation_0-logloss:0.060609\n", 432 | "[337]\tvalidation_0-logloss:0.060481\n", 433 | "[338]\tvalidation_0-logloss:0.060404\n", 434 | "[339]\tvalidation_0-logloss:0.060353\n", 435 | "[340]\tvalidation_0-logloss:0.060324\n", 436 | "[341]\tvalidation_0-logloss:0.060308\n", 437 | "[342]\tvalidation_0-logloss:0.06026\n", 438 | "[343]\tvalidation_0-logloss:0.060187\n", 439 | "[344]\tvalidation_0-logloss:0.060157\n", 440 | "[345]\tvalidation_0-logloss:0.060112\n", 441 | "[346]\tvalidation_0-logloss:0.060046\n", 442 | "[347]\tvalidation_0-logloss:0.060001\n", 443 | "[348]\tvalidation_0-logloss:0.060003\n", 444 | "[349]\tvalidation_0-logloss:0.059945\n", 445 | "[350]\tvalidation_0-logloss:0.059927\n", 446 | "[351]\tvalidation_0-logloss:0.05989\n", 447 | "[352]\tvalidation_0-logloss:0.059858\n", 448 | "[353]\tvalidation_0-logloss:0.059866\n", 449 | "[354]\tvalidation_0-logloss:0.059774\n", 450 | "[355]\tvalidation_0-logloss:0.059743\n", 451 | "[356]\tvalidation_0-logloss:0.059712\n", 452 | "[357]\tvalidation_0-logloss:0.059705\n", 453 | "[358]\tvalidation_0-logloss:0.059678\n", 454 | "[359]\tvalidation_0-logloss:0.059641\n", 455 | "[360]\tvalidation_0-logloss:0.05959\n", 456 | "[361]\tvalidation_0-logloss:0.059451\n", 457 | "[362]\tvalidation_0-logloss:0.059391\n", 458 | "[363]\tvalidation_0-logloss:0.059371\n", 459 | "[364]\tvalidation_0-logloss:0.05932\n", 460 | "[365]\tvalidation_0-logloss:0.059149\n", 461 | "[366]\tvalidation_0-logloss:0.059151\n", 462 | "[367]\tvalidation_0-logloss:0.059109\n", 463 | "[368]\tvalidation_0-logloss:0.059119\n", 464 | "[369]\tvalidation_0-logloss:0.059001\n", 465 | "[370]\tvalidation_0-logloss:0.058877\n", 466 | "[371]\tvalidation_0-logloss:0.058901\n", 467 | "[372]\tvalidation_0-logloss:0.058783\n", 468 | "[373]\tvalidation_0-logloss:0.058728\n", 469 | "[374]\tvalidation_0-logloss:0.058703\n", 470 | "[375]\tvalidation_0-logloss:0.058611\n", 471 | "[376]\tvalidation_0-logloss:0.058607\n", 472 | "[377]\tvalidation_0-logloss:0.058491\n", 473 | "[378]\tvalidation_0-logloss:0.058484\n", 474 | "[379]\tvalidation_0-logloss:0.058407\n", 475 | "[380]\tvalidation_0-logloss:0.058405\n", 476 | "[381]\tvalidation_0-logloss:0.058309\n", 477 | "[382]\tvalidation_0-logloss:0.058192\n", 478 | "[383]\tvalidation_0-logloss:0.058155\n", 479 | "[384]\tvalidation_0-logloss:0.058168\n", 480 | "[385]\tvalidation_0-logloss:0.058101\n", 481 | "[386]\tvalidation_0-logloss:0.058047\n", 482 | "[387]\tvalidation_0-logloss:0.058077\n", 483 | "[388]\tvalidation_0-logloss:0.058044\n", 484 | "[389]\tvalidation_0-logloss:0.058045\n", 485 | "[390]\tvalidation_0-logloss:0.057997\n", 486 | "[391]\tvalidation_0-logloss:0.057831\n", 487 | "[392]\tvalidation_0-logloss:0.057797\n", 488 | "[393]\tvalidation_0-logloss:0.057776\n", 489 | "[394]\tvalidation_0-logloss:0.057758\n", 490 | "[395]\tvalidation_0-logloss:0.05776\n", 491 | "[396]\tvalidation_0-logloss:0.057781\n", 492 | "[397]\tvalidation_0-logloss:0.057814\n", 493 | "[398]\tvalidation_0-logloss:0.057795\n", 494 | "[399]\tvalidation_0-logloss:0.057724\n", 495 | "[400]\tvalidation_0-logloss:0.057652\n", 496 | "[401]\tvalidation_0-logloss:0.057617\n", 497 | "[402]\tvalidation_0-logloss:0.057598\n", 498 | "[403]\tvalidation_0-logloss:0.057608\n", 499 | "[404]\tvalidation_0-logloss:0.057612\n", 500 | "[405]\tvalidation_0-logloss:0.057502\n", 501 | "[406]\tvalidation_0-logloss:0.057479\n", 502 | "[407]\tvalidation_0-logloss:0.057433\n", 503 | "[408]\tvalidation_0-logloss:0.057438\n", 504 | "[409]\tvalidation_0-logloss:0.05742\n", 505 | "[410]\tvalidation_0-logloss:0.057441\n", 506 | "[411]\tvalidation_0-logloss:0.057443\n", 507 | "[412]\tvalidation_0-logloss:0.057318\n", 508 | "[413]\tvalidation_0-logloss:0.057395\n", 509 | "[414]\tvalidation_0-logloss:0.057373\n", 510 | "[415]\tvalidation_0-logloss:0.057194\n", 511 | "[416]\tvalidation_0-logloss:0.057078\n", 512 | "[417]\tvalidation_0-logloss:0.057057\n", 513 | "[418]\tvalidation_0-logloss:0.057004\n", 514 | "[419]\tvalidation_0-logloss:0.056998\n", 515 | "[420]\tvalidation_0-logloss:0.056998\n", 516 | "[421]\tvalidation_0-logloss:0.05701\n", 517 | "[422]\tvalidation_0-logloss:0.056911\n", 518 | "[423]\tvalidation_0-logloss:0.056884\n", 519 | "[424]\tvalidation_0-logloss:0.056866\n", 520 | "[425]\tvalidation_0-logloss:0.056854\n", 521 | "[426]\tvalidation_0-logloss:0.056816\n", 522 | "[427]\tvalidation_0-logloss:0.056672\n", 523 | "[428]\tvalidation_0-logloss:0.056572\n", 524 | "[429]\tvalidation_0-logloss:0.056574\n", 525 | "[430]\tvalidation_0-logloss:0.056533\n", 526 | "[431]\tvalidation_0-logloss:0.056487\n", 527 | "[432]\tvalidation_0-logloss:0.056439\n", 528 | "[433]\tvalidation_0-logloss:0.056455\n", 529 | "[434]\tvalidation_0-logloss:0.056411\n", 530 | "[435]\tvalidation_0-logloss:0.0564\n", 531 | "[436]\tvalidation_0-logloss:0.056396\n", 532 | "[437]\tvalidation_0-logloss:0.056395\n", 533 | "[438]\tvalidation_0-logloss:0.056389\n", 534 | "[439]\tvalidation_0-logloss:0.056365\n", 535 | "[440]\tvalidation_0-logloss:0.056325\n", 536 | "[441]\tvalidation_0-logloss:0.056327\n", 537 | "[442]\tvalidation_0-logloss:0.056336\n", 538 | "[443]\tvalidation_0-logloss:0.056328\n", 539 | "[444]\tvalidation_0-logloss:0.056246\n", 540 | "[445]\tvalidation_0-logloss:0.056237\n", 541 | "[446]\tvalidation_0-logloss:0.056151\n", 542 | "[447]\tvalidation_0-logloss:0.05614\n", 543 | "[448]\tvalidation_0-logloss:0.056104\n", 544 | "[449]\tvalidation_0-logloss:0.056051\n", 545 | "[450]\tvalidation_0-logloss:0.056009\n", 546 | "[451]\tvalidation_0-logloss:0.055972\n", 547 | "[452]\tvalidation_0-logloss:0.055907\n", 548 | "[453]\tvalidation_0-logloss:0.055872\n", 549 | "[454]\tvalidation_0-logloss:0.055832\n", 550 | "[455]\tvalidation_0-logloss:0.055762\n", 551 | "[456]\tvalidation_0-logloss:0.055758\n", 552 | "[457]\tvalidation_0-logloss:0.055632\n", 553 | "[458]\tvalidation_0-logloss:0.055618\n" 554 | ] 555 | }, 556 | { 557 | "name": "stdout", 558 | "output_type": "stream", 559 | "text": [ 560 | "[459]\tvalidation_0-logloss:0.055525\n", 561 | "[460]\tvalidation_0-logloss:0.055444\n", 562 | "[461]\tvalidation_0-logloss:0.055394\n", 563 | "[462]\tvalidation_0-logloss:0.055361\n", 564 | "[463]\tvalidation_0-logloss:0.055354\n", 565 | "[464]\tvalidation_0-logloss:0.055277\n", 566 | "[465]\tvalidation_0-logloss:0.055262\n", 567 | "[466]\tvalidation_0-logloss:0.055252\n", 568 | "[467]\tvalidation_0-logloss:0.055259\n", 569 | "[468]\tvalidation_0-logloss:0.05526\n", 570 | "[469]\tvalidation_0-logloss:0.055261\n", 571 | "[470]\tvalidation_0-logloss:0.055205\n", 572 | "[471]\tvalidation_0-logloss:0.055172\n", 573 | "[472]\tvalidation_0-logloss:0.055184\n", 574 | "[473]\tvalidation_0-logloss:0.055186\n", 575 | "[474]\tvalidation_0-logloss:0.055139\n", 576 | "[475]\tvalidation_0-logloss:0.055155\n", 577 | "[476]\tvalidation_0-logloss:0.05514\n", 578 | "[477]\tvalidation_0-logloss:0.055122\n", 579 | "[478]\tvalidation_0-logloss:0.055104\n", 580 | "[479]\tvalidation_0-logloss:0.055072\n", 581 | "[480]\tvalidation_0-logloss:0.055044\n", 582 | "[481]\tvalidation_0-logloss:0.055001\n", 583 | "[482]\tvalidation_0-logloss:0.054955\n", 584 | "[483]\tvalidation_0-logloss:0.054924\n", 585 | "[484]\tvalidation_0-logloss:0.054869\n", 586 | "[485]\tvalidation_0-logloss:0.054867\n", 587 | "[486]\tvalidation_0-logloss:0.05483\n", 588 | "[487]\tvalidation_0-logloss:0.054757\n", 589 | "[488]\tvalidation_0-logloss:0.054702\n", 590 | "[489]\tvalidation_0-logloss:0.054703\n", 591 | "[490]\tvalidation_0-logloss:0.054649\n", 592 | "[491]\tvalidation_0-logloss:0.054628\n", 593 | "[492]\tvalidation_0-logloss:0.054583\n", 594 | "[493]\tvalidation_0-logloss:0.054549\n", 595 | "[494]\tvalidation_0-logloss:0.054564\n", 596 | "[495]\tvalidation_0-logloss:0.054583\n", 597 | "[496]\tvalidation_0-logloss:0.054447\n", 598 | "[497]\tvalidation_0-logloss:0.054469\n", 599 | "[498]\tvalidation_0-logloss:0.054438\n", 600 | "[499]\tvalidation_0-logloss:0.054464\n", 601 | "[500]\tvalidation_0-logloss:0.054406\n", 602 | "[501]\tvalidation_0-logloss:0.054369\n", 603 | "[502]\tvalidation_0-logloss:0.054382\n", 604 | "[503]\tvalidation_0-logloss:0.054366\n", 605 | "[504]\tvalidation_0-logloss:0.05434\n", 606 | "[505]\tvalidation_0-logloss:0.054299\n", 607 | "[506]\tvalidation_0-logloss:0.054335\n", 608 | "[507]\tvalidation_0-logloss:0.054328\n", 609 | "[508]\tvalidation_0-logloss:0.054263\n", 610 | "[509]\tvalidation_0-logloss:0.054239\n", 611 | "[510]\tvalidation_0-logloss:0.054215\n", 612 | "[511]\tvalidation_0-logloss:0.054202\n", 613 | "[512]\tvalidation_0-logloss:0.054191\n", 614 | "[513]\tvalidation_0-logloss:0.054147\n", 615 | "[514]\tvalidation_0-logloss:0.054171\n", 616 | "[515]\tvalidation_0-logloss:0.054107\n", 617 | "[516]\tvalidation_0-logloss:0.054061\n", 618 | "[517]\tvalidation_0-logloss:0.05408\n", 619 | "[518]\tvalidation_0-logloss:0.054065\n", 620 | "[519]\tvalidation_0-logloss:0.054014\n", 621 | "[520]\tvalidation_0-logloss:0.054032\n", 622 | "[521]\tvalidation_0-logloss:0.054019\n", 623 | "[522]\tvalidation_0-logloss:0.054021\n", 624 | "[523]\tvalidation_0-logloss:0.053973\n", 625 | "[524]\tvalidation_0-logloss:0.053977\n", 626 | "[525]\tvalidation_0-logloss:0.053948\n", 627 | "[526]\tvalidation_0-logloss:0.053833\n", 628 | "[527]\tvalidation_0-logloss:0.053838\n", 629 | "[528]\tvalidation_0-logloss:0.053821\n", 630 | "[529]\tvalidation_0-logloss:0.053801\n", 631 | "[530]\tvalidation_0-logloss:0.053809\n", 632 | "[531]\tvalidation_0-logloss:0.053766\n", 633 | "[532]\tvalidation_0-logloss:0.05375\n", 634 | "[533]\tvalidation_0-logloss:0.053754\n", 635 | "[534]\tvalidation_0-logloss:0.053758\n", 636 | "[535]\tvalidation_0-logloss:0.053755\n", 637 | "[536]\tvalidation_0-logloss:0.053749\n", 638 | "[537]\tvalidation_0-logloss:0.053689\n", 639 | "[538]\tvalidation_0-logloss:0.053659\n", 640 | "[539]\tvalidation_0-logloss:0.053666\n", 641 | "[540]\tvalidation_0-logloss:0.053549\n", 642 | "[541]\tvalidation_0-logloss:0.053551\n", 643 | "[542]\tvalidation_0-logloss:0.053481\n", 644 | "[543]\tvalidation_0-logloss:0.053502\n", 645 | "[544]\tvalidation_0-logloss:0.053481\n", 646 | "[545]\tvalidation_0-logloss:0.053406\n", 647 | "[546]\tvalidation_0-logloss:0.053442\n", 648 | "[547]\tvalidation_0-logloss:0.053442\n", 649 | "[548]\tvalidation_0-logloss:0.053421\n", 650 | "[549]\tvalidation_0-logloss:0.053402\n", 651 | "[550]\tvalidation_0-logloss:0.053347\n", 652 | "[551]\tvalidation_0-logloss:0.053312\n", 653 | "[552]\tvalidation_0-logloss:0.053294\n", 654 | "[553]\tvalidation_0-logloss:0.053308\n", 655 | "[554]\tvalidation_0-logloss:0.053223\n", 656 | "[555]\tvalidation_0-logloss:0.053175\n", 657 | "[556]\tvalidation_0-logloss:0.053199\n", 658 | "[557]\tvalidation_0-logloss:0.053169\n", 659 | "[558]\tvalidation_0-logloss:0.053123\n", 660 | "[559]\tvalidation_0-logloss:0.053144\n", 661 | "[560]\tvalidation_0-logloss:0.053096\n", 662 | "[561]\tvalidation_0-logloss:0.053134\n", 663 | "[562]\tvalidation_0-logloss:0.053177\n", 664 | "[563]\tvalidation_0-logloss:0.053097\n", 665 | "[564]\tvalidation_0-logloss:0.053099\n", 666 | "[565]\tvalidation_0-logloss:0.053048\n", 667 | "[566]\tvalidation_0-logloss:0.053036\n", 668 | "[567]\tvalidation_0-logloss:0.053024\n", 669 | "[568]\tvalidation_0-logloss:0.05299\n", 670 | "[569]\tvalidation_0-logloss:0.052932\n", 671 | "[570]\tvalidation_0-logloss:0.052912\n", 672 | "[571]\tvalidation_0-logloss:0.052932\n", 673 | "[572]\tvalidation_0-logloss:0.052946\n", 674 | "[573]\tvalidation_0-logloss:0.052928\n", 675 | "[574]\tvalidation_0-logloss:0.052937\n", 676 | "[575]\tvalidation_0-logloss:0.052934\n", 677 | "[576]\tvalidation_0-logloss:0.052967\n", 678 | "[577]\tvalidation_0-logloss:0.052971\n", 679 | "[578]\tvalidation_0-logloss:0.053004\n", 680 | "[579]\tvalidation_0-logloss:0.053021\n", 681 | "[580]\tvalidation_0-logloss:0.052945\n", 682 | "[581]\tvalidation_0-logloss:0.052974\n", 683 | "[582]\tvalidation_0-logloss:0.052938\n", 684 | "[583]\tvalidation_0-logloss:0.052965\n", 685 | "[584]\tvalidation_0-logloss:0.052938\n", 686 | "[585]\tvalidation_0-logloss:0.052918\n", 687 | "[586]\tvalidation_0-logloss:0.052895\n", 688 | "[587]\tvalidation_0-logloss:0.052914\n", 689 | "[588]\tvalidation_0-logloss:0.052879\n", 690 | "[589]\tvalidation_0-logloss:0.052868\n", 691 | "[590]\tvalidation_0-logloss:0.05287\n", 692 | "[591]\tvalidation_0-logloss:0.052901\n", 693 | "[592]\tvalidation_0-logloss:0.052904\n", 694 | "[593]\tvalidation_0-logloss:0.05289\n", 695 | "[594]\tvalidation_0-logloss:0.052853\n", 696 | "[595]\tvalidation_0-logloss:0.052859\n", 697 | "[596]\tvalidation_0-logloss:0.052861\n", 698 | "[597]\tvalidation_0-logloss:0.052817\n", 699 | "[598]\tvalidation_0-logloss:0.052826\n", 700 | "[599]\tvalidation_0-logloss:0.052811\n", 701 | "[600]\tvalidation_0-logloss:0.052845\n", 702 | "[601]\tvalidation_0-logloss:0.052835\n", 703 | "[602]\tvalidation_0-logloss:0.052836\n", 704 | "[603]\tvalidation_0-logloss:0.052772\n", 705 | "[604]\tvalidation_0-logloss:0.052793\n", 706 | "[605]\tvalidation_0-logloss:0.052779\n", 707 | "[606]\tvalidation_0-logloss:0.052794\n", 708 | "[607]\tvalidation_0-logloss:0.052762\n", 709 | "[608]\tvalidation_0-logloss:0.052738\n", 710 | "[609]\tvalidation_0-logloss:0.052731\n", 711 | "[610]\tvalidation_0-logloss:0.052672\n", 712 | "[611]\tvalidation_0-logloss:0.052667\n", 713 | "[612]\tvalidation_0-logloss:0.052662\n", 714 | "[613]\tvalidation_0-logloss:0.052616\n", 715 | "[614]\tvalidation_0-logloss:0.052652\n", 716 | "[615]\tvalidation_0-logloss:0.052661\n", 717 | "[616]\tvalidation_0-logloss:0.052658\n", 718 | "[617]\tvalidation_0-logloss:0.052642\n", 719 | "[618]\tvalidation_0-logloss:0.052638\n", 720 | "[619]\tvalidation_0-logloss:0.052555\n", 721 | "[620]\tvalidation_0-logloss:0.052538\n", 722 | "[621]\tvalidation_0-logloss:0.052526\n", 723 | "[622]\tvalidation_0-logloss:0.052486\n", 724 | "[623]\tvalidation_0-logloss:0.052416\n", 725 | "[624]\tvalidation_0-logloss:0.052436\n", 726 | "[625]\tvalidation_0-logloss:0.052398\n", 727 | "[626]\tvalidation_0-logloss:0.052392\n", 728 | "[627]\tvalidation_0-logloss:0.052395\n", 729 | "[628]\tvalidation_0-logloss:0.052397\n", 730 | "[629]\tvalidation_0-logloss:0.052409\n", 731 | "[630]\tvalidation_0-logloss:0.052435\n", 732 | "[631]\tvalidation_0-logloss:0.052394\n", 733 | "[632]\tvalidation_0-logloss:0.052355\n", 734 | "[633]\tvalidation_0-logloss:0.052344\n", 735 | "[634]\tvalidation_0-logloss:0.052336\n", 736 | "[635]\tvalidation_0-logloss:0.052305\n", 737 | "[636]\tvalidation_0-logloss:0.052199\n", 738 | "[637]\tvalidation_0-logloss:0.052182\n", 739 | "[638]\tvalidation_0-logloss:0.052142\n", 740 | "[639]\tvalidation_0-logloss:0.052124\n", 741 | "[640]\tvalidation_0-logloss:0.052137\n", 742 | "[641]\tvalidation_0-logloss:0.052084\n", 743 | "[642]\tvalidation_0-logloss:0.052103\n", 744 | "[643]\tvalidation_0-logloss:0.052099\n", 745 | "[644]\tvalidation_0-logloss:0.052099\n", 746 | "[645]\tvalidation_0-logloss:0.052116\n", 747 | "[646]\tvalidation_0-logloss:0.052112\n", 748 | "[647]\tvalidation_0-logloss:0.052088\n", 749 | "[648]\tvalidation_0-logloss:0.052097\n", 750 | "[649]\tvalidation_0-logloss:0.052055\n", 751 | "[650]\tvalidation_0-logloss:0.052058\n", 752 | "[651]\tvalidation_0-logloss:0.052078\n", 753 | "[652]\tvalidation_0-logloss:0.052001\n", 754 | "[653]\tvalidation_0-logloss:0.052\n", 755 | "[654]\tvalidation_0-logloss:0.051983\n", 756 | "[655]\tvalidation_0-logloss:0.052008\n", 757 | "[656]\tvalidation_0-logloss:0.052013\n", 758 | "[657]\tvalidation_0-logloss:0.052014\n", 759 | "[658]\tvalidation_0-logloss:0.052009\n", 760 | "[659]\tvalidation_0-logloss:0.052029\n", 761 | "[660]\tvalidation_0-logloss:0.052051\n", 762 | "[661]\tvalidation_0-logloss:0.052028\n", 763 | "[662]\tvalidation_0-logloss:0.052074\n", 764 | "[663]\tvalidation_0-logloss:0.05205\n", 765 | "[664]\tvalidation_0-logloss:0.052072\n", 766 | "[665]\tvalidation_0-logloss:0.052077\n", 767 | "[666]\tvalidation_0-logloss:0.05209\n", 768 | "[667]\tvalidation_0-logloss:0.052053\n", 769 | "[668]\tvalidation_0-logloss:0.052061\n", 770 | "[669]\tvalidation_0-logloss:0.05207\n", 771 | "[670]\tvalidation_0-logloss:0.052023\n", 772 | "[671]\tvalidation_0-logloss:0.052007\n", 773 | "[672]\tvalidation_0-logloss:0.052012\n", 774 | "[673]\tvalidation_0-logloss:0.052008\n", 775 | "[674]\tvalidation_0-logloss:0.051954\n", 776 | "[675]\tvalidation_0-logloss:0.051953\n", 777 | "[676]\tvalidation_0-logloss:0.051991\n", 778 | "[677]\tvalidation_0-logloss:0.051958\n", 779 | "[678]\tvalidation_0-logloss:0.05198\n", 780 | "[679]\tvalidation_0-logloss:0.051962\n", 781 | "[680]\tvalidation_0-logloss:0.051914\n", 782 | "[681]\tvalidation_0-logloss:0.051961\n", 783 | "[682]\tvalidation_0-logloss:0.05195\n", 784 | "[683]\tvalidation_0-logloss:0.051941\n", 785 | "[684]\tvalidation_0-logloss:0.051912\n", 786 | "[685]\tvalidation_0-logloss:0.051861\n", 787 | "[686]\tvalidation_0-logloss:0.051883\n", 788 | "[687]\tvalidation_0-logloss:0.051838\n" 789 | ] 790 | }, 791 | { 792 | "name": "stdout", 793 | "output_type": "stream", 794 | "text": [ 795 | "[688]\tvalidation_0-logloss:0.051825\n", 796 | "[689]\tvalidation_0-logloss:0.051828\n", 797 | "[690]\tvalidation_0-logloss:0.051813\n", 798 | "[691]\tvalidation_0-logloss:0.051809\n", 799 | "[692]\tvalidation_0-logloss:0.051805\n", 800 | "[693]\tvalidation_0-logloss:0.051802\n", 801 | "[694]\tvalidation_0-logloss:0.051789\n", 802 | "[695]\tvalidation_0-logloss:0.051779\n", 803 | "[696]\tvalidation_0-logloss:0.051771\n", 804 | "[697]\tvalidation_0-logloss:0.051792\n", 805 | "[698]\tvalidation_0-logloss:0.051777\n", 806 | "[699]\tvalidation_0-logloss:0.051755\n", 807 | "[700]\tvalidation_0-logloss:0.051762\n", 808 | "[701]\tvalidation_0-logloss:0.051762\n", 809 | "[702]\tvalidation_0-logloss:0.051826\n", 810 | "[703]\tvalidation_0-logloss:0.051799\n", 811 | "[704]\tvalidation_0-logloss:0.05172\n", 812 | "[705]\tvalidation_0-logloss:0.051718\n", 813 | "[706]\tvalidation_0-logloss:0.051724\n", 814 | "[707]\tvalidation_0-logloss:0.051724\n", 815 | "[708]\tvalidation_0-logloss:0.051711\n", 816 | "[709]\tvalidation_0-logloss:0.051649\n", 817 | "[710]\tvalidation_0-logloss:0.051604\n", 818 | "[711]\tvalidation_0-logloss:0.051583\n", 819 | "[712]\tvalidation_0-logloss:0.051587\n", 820 | "[713]\tvalidation_0-logloss:0.051611\n", 821 | "[714]\tvalidation_0-logloss:0.051559\n", 822 | "[715]\tvalidation_0-logloss:0.051518\n", 823 | "[716]\tvalidation_0-logloss:0.051534\n", 824 | "[717]\tvalidation_0-logloss:0.051541\n", 825 | "[718]\tvalidation_0-logloss:0.051593\n", 826 | "[719]\tvalidation_0-logloss:0.051571\n", 827 | "[720]\tvalidation_0-logloss:0.051518\n", 828 | "[721]\tvalidation_0-logloss:0.051526\n", 829 | "[722]\tvalidation_0-logloss:0.051499\n", 830 | "[723]\tvalidation_0-logloss:0.051467\n", 831 | "[724]\tvalidation_0-logloss:0.051452\n", 832 | "[725]\tvalidation_0-logloss:0.051417\n", 833 | "[726]\tvalidation_0-logloss:0.051336\n", 834 | "[727]\tvalidation_0-logloss:0.051315\n", 835 | "[728]\tvalidation_0-logloss:0.051337\n", 836 | "[729]\tvalidation_0-logloss:0.051345\n", 837 | "[730]\tvalidation_0-logloss:0.051331\n", 838 | "[731]\tvalidation_0-logloss:0.05136\n", 839 | "[732]\tvalidation_0-logloss:0.051387\n", 840 | "[733]\tvalidation_0-logloss:0.051389\n", 841 | "[734]\tvalidation_0-logloss:0.051364\n", 842 | "[735]\tvalidation_0-logloss:0.051365\n", 843 | "[736]\tvalidation_0-logloss:0.051364\n", 844 | "[737]\tvalidation_0-logloss:0.051354\n", 845 | "[738]\tvalidation_0-logloss:0.051376\n", 846 | "[739]\tvalidation_0-logloss:0.051378\n", 847 | "[740]\tvalidation_0-logloss:0.05139\n", 848 | "[741]\tvalidation_0-logloss:0.051393\n", 849 | "[742]\tvalidation_0-logloss:0.051361\n", 850 | "[743]\tvalidation_0-logloss:0.051348\n", 851 | "[744]\tvalidation_0-logloss:0.051352\n", 852 | "[745]\tvalidation_0-logloss:0.051371\n", 853 | "[746]\tvalidation_0-logloss:0.051374\n", 854 | "[747]\tvalidation_0-logloss:0.051373\n", 855 | "[748]\tvalidation_0-logloss:0.05134\n", 856 | "[749]\tvalidation_0-logloss:0.051287\n", 857 | "[750]\tvalidation_0-logloss:0.051284\n", 858 | "[751]\tvalidation_0-logloss:0.051233\n", 859 | "[752]\tvalidation_0-logloss:0.051263\n", 860 | "[753]\tvalidation_0-logloss:0.051265\n", 861 | "[754]\tvalidation_0-logloss:0.051236\n", 862 | "[755]\tvalidation_0-logloss:0.051194\n", 863 | "[756]\tvalidation_0-logloss:0.051195\n", 864 | "[757]\tvalidation_0-logloss:0.051184\n", 865 | "[758]\tvalidation_0-logloss:0.051185\n", 866 | "[759]\tvalidation_0-logloss:0.051193\n", 867 | "[760]\tvalidation_0-logloss:0.051189\n", 868 | "[761]\tvalidation_0-logloss:0.05121\n", 869 | "[762]\tvalidation_0-logloss:0.051224\n", 870 | "[763]\tvalidation_0-logloss:0.051215\n", 871 | "[764]\tvalidation_0-logloss:0.051225\n", 872 | "[765]\tvalidation_0-logloss:0.05122\n", 873 | "[766]\tvalidation_0-logloss:0.051214\n", 874 | "[767]\tvalidation_0-logloss:0.051217\n", 875 | "[768]\tvalidation_0-logloss:0.051153\n", 876 | "[769]\tvalidation_0-logloss:0.051175\n", 877 | "[770]\tvalidation_0-logloss:0.051184\n", 878 | "[771]\tvalidation_0-logloss:0.051155\n", 879 | "[772]\tvalidation_0-logloss:0.051161\n", 880 | "[773]\tvalidation_0-logloss:0.051173\n", 881 | "[774]\tvalidation_0-logloss:0.051129\n", 882 | "[775]\tvalidation_0-logloss:0.05113\n", 883 | "[776]\tvalidation_0-logloss:0.051133\n", 884 | "[777]\tvalidation_0-logloss:0.05113\n", 885 | "[778]\tvalidation_0-logloss:0.051179\n", 886 | "[779]\tvalidation_0-logloss:0.051184\n", 887 | "[780]\tvalidation_0-logloss:0.05119\n", 888 | "[781]\tvalidation_0-logloss:0.051223\n", 889 | "[782]\tvalidation_0-logloss:0.05124\n", 890 | "[783]\tvalidation_0-logloss:0.051257\n", 891 | "[784]\tvalidation_0-logloss:0.051237\n", 892 | "[785]\tvalidation_0-logloss:0.051253\n", 893 | "[786]\tvalidation_0-logloss:0.051251\n", 894 | "[787]\tvalidation_0-logloss:0.05127\n", 895 | "[788]\tvalidation_0-logloss:0.051229\n", 896 | "[789]\tvalidation_0-logloss:0.051234\n", 897 | "[790]\tvalidation_0-logloss:0.051248\n", 898 | "[791]\tvalidation_0-logloss:0.051239\n", 899 | "[792]\tvalidation_0-logloss:0.051252\n", 900 | "[793]\tvalidation_0-logloss:0.051244\n", 901 | "[794]\tvalidation_0-logloss:0.051216\n", 902 | "[795]\tvalidation_0-logloss:0.051157\n", 903 | "[796]\tvalidation_0-logloss:0.051146\n", 904 | "[797]\tvalidation_0-logloss:0.051136\n", 905 | "[798]\tvalidation_0-logloss:0.051145\n", 906 | "[799]\tvalidation_0-logloss:0.051095\n", 907 | "[800]\tvalidation_0-logloss:0.051038\n", 908 | "[801]\tvalidation_0-logloss:0.051042\n", 909 | "[802]\tvalidation_0-logloss:0.051068\n", 910 | "[803]\tvalidation_0-logloss:0.051057\n", 911 | "[804]\tvalidation_0-logloss:0.051037\n", 912 | "[805]\tvalidation_0-logloss:0.051038\n", 913 | "[806]\tvalidation_0-logloss:0.051051\n", 914 | "[807]\tvalidation_0-logloss:0.051066\n", 915 | "[808]\tvalidation_0-logloss:0.051082\n", 916 | "[809]\tvalidation_0-logloss:0.05111\n", 917 | "[810]\tvalidation_0-logloss:0.051078\n", 918 | "[811]\tvalidation_0-logloss:0.051088\n", 919 | "[812]\tvalidation_0-logloss:0.05107\n", 920 | "[813]\tvalidation_0-logloss:0.05109\n", 921 | "[814]\tvalidation_0-logloss:0.051096\n", 922 | "[815]\tvalidation_0-logloss:0.051063\n", 923 | "[816]\tvalidation_0-logloss:0.051078\n", 924 | "[817]\tvalidation_0-logloss:0.05112\n", 925 | "[818]\tvalidation_0-logloss:0.051098\n", 926 | "[819]\tvalidation_0-logloss:0.051084\n", 927 | "[820]\tvalidation_0-logloss:0.051089\n", 928 | "[821]\tvalidation_0-logloss:0.051091\n", 929 | "[822]\tvalidation_0-logloss:0.051126\n", 930 | "[823]\tvalidation_0-logloss:0.051129\n", 931 | "[824]\tvalidation_0-logloss:0.051108\n", 932 | "[825]\tvalidation_0-logloss:0.051081\n", 933 | "[826]\tvalidation_0-logloss:0.051024\n", 934 | "[827]\tvalidation_0-logloss:0.051035\n", 935 | "[828]\tvalidation_0-logloss:0.051073\n", 936 | "[829]\tvalidation_0-logloss:0.051068\n", 937 | "[830]\tvalidation_0-logloss:0.051069\n", 938 | "[831]\tvalidation_0-logloss:0.05107\n", 939 | "[832]\tvalidation_0-logloss:0.051078\n", 940 | "[833]\tvalidation_0-logloss:0.051109\n", 941 | "[834]\tvalidation_0-logloss:0.05114\n", 942 | "[835]\tvalidation_0-logloss:0.051139\n", 943 | "[836]\tvalidation_0-logloss:0.051149\n", 944 | "[837]\tvalidation_0-logloss:0.051157\n", 945 | "[838]\tvalidation_0-logloss:0.051101\n", 946 | "[839]\tvalidation_0-logloss:0.051059\n", 947 | "[840]\tvalidation_0-logloss:0.051035\n", 948 | "[841]\tvalidation_0-logloss:0.051051\n", 949 | "[842]\tvalidation_0-logloss:0.051019\n", 950 | "[843]\tvalidation_0-logloss:0.051027\n", 951 | "[844]\tvalidation_0-logloss:0.051023\n", 952 | "[845]\tvalidation_0-logloss:0.051014\n", 953 | "[846]\tvalidation_0-logloss:0.050978\n", 954 | "[847]\tvalidation_0-logloss:0.050986\n", 955 | "[848]\tvalidation_0-logloss:0.051006\n", 956 | "[849]\tvalidation_0-logloss:0.051006\n", 957 | "[850]\tvalidation_0-logloss:0.050975\n", 958 | "[851]\tvalidation_0-logloss:0.050924\n", 959 | "[852]\tvalidation_0-logloss:0.050981\n", 960 | "[853]\tvalidation_0-logloss:0.050986\n", 961 | "[854]\tvalidation_0-logloss:0.050999\n", 962 | "[855]\tvalidation_0-logloss:0.051015\n", 963 | "[856]\tvalidation_0-logloss:0.051002\n", 964 | "[857]\tvalidation_0-logloss:0.051028\n", 965 | "[858]\tvalidation_0-logloss:0.051043\n", 966 | "[859]\tvalidation_0-logloss:0.051069\n", 967 | "[860]\tvalidation_0-logloss:0.05104\n", 968 | "[861]\tvalidation_0-logloss:0.051037\n", 969 | "[862]\tvalidation_0-logloss:0.051034\n", 970 | "[863]\tvalidation_0-logloss:0.05105\n", 971 | "[864]\tvalidation_0-logloss:0.051067\n", 972 | "[865]\tvalidation_0-logloss:0.051106\n", 973 | "[866]\tvalidation_0-logloss:0.051126\n", 974 | "[867]\tvalidation_0-logloss:0.051137\n", 975 | "[868]\tvalidation_0-logloss:0.051116\n", 976 | "[869]\tvalidation_0-logloss:0.051134\n", 977 | "[870]\tvalidation_0-logloss:0.051117\n", 978 | "[871]\tvalidation_0-logloss:0.051115\n", 979 | "[872]\tvalidation_0-logloss:0.051127\n", 980 | "[873]\tvalidation_0-logloss:0.05112\n", 981 | "[874]\tvalidation_0-logloss:0.051123\n", 982 | "[875]\tvalidation_0-logloss:0.051123\n", 983 | "[876]\tvalidation_0-logloss:0.05116\n", 984 | "[877]\tvalidation_0-logloss:0.051147\n", 985 | "[878]\tvalidation_0-logloss:0.051142\n", 986 | "[879]\tvalidation_0-logloss:0.051151\n", 987 | "[880]\tvalidation_0-logloss:0.051158\n", 988 | "[881]\tvalidation_0-logloss:0.051153\n", 989 | "[882]\tvalidation_0-logloss:0.051168\n", 990 | "[883]\tvalidation_0-logloss:0.051183\n", 991 | "[884]\tvalidation_0-logloss:0.051166\n", 992 | "[885]\tvalidation_0-logloss:0.05115\n", 993 | "[886]\tvalidation_0-logloss:0.05119\n", 994 | "[887]\tvalidation_0-logloss:0.05113\n", 995 | "[888]\tvalidation_0-logloss:0.051128\n", 996 | "[889]\tvalidation_0-logloss:0.051138\n", 997 | "[890]\tvalidation_0-logloss:0.051185\n", 998 | "[891]\tvalidation_0-logloss:0.051177\n", 999 | "[892]\tvalidation_0-logloss:0.051123\n", 1000 | "[893]\tvalidation_0-logloss:0.051136\n", 1001 | "[894]\tvalidation_0-logloss:0.051123\n", 1002 | "[895]\tvalidation_0-logloss:0.051081\n", 1003 | "[896]\tvalidation_0-logloss:0.051078\n", 1004 | "[897]\tvalidation_0-logloss:0.051074\n", 1005 | "[898]\tvalidation_0-logloss:0.05104\n", 1006 | "[899]\tvalidation_0-logloss:0.051036\n", 1007 | "[900]\tvalidation_0-logloss:0.051008\n", 1008 | "[901]\tvalidation_0-logloss:0.051029\n", 1009 | "[902]\tvalidation_0-logloss:0.05099\n", 1010 | "[903]\tvalidation_0-logloss:0.050992\n", 1011 | "[904]\tvalidation_0-logloss:0.050986\n", 1012 | "[905]\tvalidation_0-logloss:0.050987\n", 1013 | "[906]\tvalidation_0-logloss:0.05101\n", 1014 | "[907]\tvalidation_0-logloss:0.05102\n", 1015 | "[908]\tvalidation_0-logloss:0.051002\n", 1016 | "[909]\tvalidation_0-logloss:0.051003\n", 1017 | "[910]\tvalidation_0-logloss:0.051029\n", 1018 | "[911]\tvalidation_0-logloss:0.05104\n", 1019 | "[912]\tvalidation_0-logloss:0.051014\n", 1020 | "[913]\tvalidation_0-logloss:0.050983\n", 1021 | "[914]\tvalidation_0-logloss:0.050989\n", 1022 | "[915]\tvalidation_0-logloss:0.050972\n", 1023 | "[916]\tvalidation_0-logloss:0.050984\n" 1024 | ] 1025 | }, 1026 | { 1027 | "name": "stdout", 1028 | "output_type": "stream", 1029 | "text": [ 1030 | "[917]\tvalidation_0-logloss:0.050993\n", 1031 | "[918]\tvalidation_0-logloss:0.050991\n", 1032 | "[919]\tvalidation_0-logloss:0.050984\n", 1033 | "[920]\tvalidation_0-logloss:0.051026\n", 1034 | "[921]\tvalidation_0-logloss:0.051019\n", 1035 | "[922]\tvalidation_0-logloss:0.051032\n", 1036 | "[923]\tvalidation_0-logloss:0.051023\n", 1037 | "[924]\tvalidation_0-logloss:0.051015\n", 1038 | "[925]\tvalidation_0-logloss:0.050967\n", 1039 | "[926]\tvalidation_0-logloss:0.050971\n", 1040 | "[927]\tvalidation_0-logloss:0.050952\n", 1041 | "[928]\tvalidation_0-logloss:0.050912\n", 1042 | "[929]\tvalidation_0-logloss:0.050893\n", 1043 | "[930]\tvalidation_0-logloss:0.050879\n", 1044 | "[931]\tvalidation_0-logloss:0.050896\n", 1045 | "[932]\tvalidation_0-logloss:0.050879\n", 1046 | "[933]\tvalidation_0-logloss:0.05084\n", 1047 | "[934]\tvalidation_0-logloss:0.050841\n", 1048 | "[935]\tvalidation_0-logloss:0.050831\n", 1049 | "[936]\tvalidation_0-logloss:0.050808\n", 1050 | "[937]\tvalidation_0-logloss:0.05077\n", 1051 | "[938]\tvalidation_0-logloss:0.050803\n", 1052 | "[939]\tvalidation_0-logloss:0.050806\n", 1053 | "[940]\tvalidation_0-logloss:0.050853\n", 1054 | "[941]\tvalidation_0-logloss:0.050862\n", 1055 | "[942]\tvalidation_0-logloss:0.050849\n", 1056 | "[943]\tvalidation_0-logloss:0.050879\n", 1057 | "[944]\tvalidation_0-logloss:0.050849\n", 1058 | "[945]\tvalidation_0-logloss:0.05086\n", 1059 | "[946]\tvalidation_0-logloss:0.050848\n", 1060 | "[947]\tvalidation_0-logloss:0.050843\n", 1061 | "[948]\tvalidation_0-logloss:0.050865\n", 1062 | "[949]\tvalidation_0-logloss:0.050871\n", 1063 | "[950]\tvalidation_0-logloss:0.050914\n", 1064 | "[951]\tvalidation_0-logloss:0.050821\n", 1065 | "[952]\tvalidation_0-logloss:0.050855\n", 1066 | "[953]\tvalidation_0-logloss:0.050885\n", 1067 | "[954]\tvalidation_0-logloss:0.050867\n", 1068 | "[955]\tvalidation_0-logloss:0.050838\n", 1069 | "[956]\tvalidation_0-logloss:0.050817\n", 1070 | "[957]\tvalidation_0-logloss:0.050813\n", 1071 | "[958]\tvalidation_0-logloss:0.050788\n", 1072 | "[959]\tvalidation_0-logloss:0.050803\n", 1073 | "[960]\tvalidation_0-logloss:0.050832\n", 1074 | "[961]\tvalidation_0-logloss:0.050808\n", 1075 | "[962]\tvalidation_0-logloss:0.050808\n", 1076 | "[963]\tvalidation_0-logloss:0.05082\n", 1077 | "[964]\tvalidation_0-logloss:0.050837\n", 1078 | "[965]\tvalidation_0-logloss:0.050838\n", 1079 | "[966]\tvalidation_0-logloss:0.050867\n", 1080 | "[967]\tvalidation_0-logloss:0.05089\n", 1081 | "[968]\tvalidation_0-logloss:0.050868\n", 1082 | "[969]\tvalidation_0-logloss:0.050897\n", 1083 | "[970]\tvalidation_0-logloss:0.050888\n", 1084 | "[971]\tvalidation_0-logloss:0.050896\n", 1085 | "[972]\tvalidation_0-logloss:0.050846\n", 1086 | "[973]\tvalidation_0-logloss:0.050849\n", 1087 | "[974]\tvalidation_0-logloss:0.050864\n", 1088 | "[975]\tvalidation_0-logloss:0.05087\n", 1089 | "[976]\tvalidation_0-logloss:0.05087\n", 1090 | "[977]\tvalidation_0-logloss:0.050834\n", 1091 | "[978]\tvalidation_0-logloss:0.05086\n", 1092 | "[979]\tvalidation_0-logloss:0.050873\n", 1093 | "[980]\tvalidation_0-logloss:0.050919\n", 1094 | "[981]\tvalidation_0-logloss:0.050921\n", 1095 | "[982]\tvalidation_0-logloss:0.050916\n", 1096 | "[983]\tvalidation_0-logloss:0.050923\n", 1097 | "[984]\tvalidation_0-logloss:0.050928\n", 1098 | "[985]\tvalidation_0-logloss:0.050924\n", 1099 | "[986]\tvalidation_0-logloss:0.050926\n", 1100 | "[987]\tvalidation_0-logloss:0.050944\n", 1101 | "[988]\tvalidation_0-logloss:0.050922\n", 1102 | "[989]\tvalidation_0-logloss:0.05094\n", 1103 | "[990]\tvalidation_0-logloss:0.050944\n", 1104 | "[991]\tvalidation_0-logloss:0.050953\n", 1105 | "[992]\tvalidation_0-logloss:0.051029\n", 1106 | "[993]\tvalidation_0-logloss:0.051029\n", 1107 | "[994]\tvalidation_0-logloss:0.051012\n", 1108 | "[995]\tvalidation_0-logloss:0.051004\n", 1109 | "[996]\tvalidation_0-logloss:0.051006\n", 1110 | "[997]\tvalidation_0-logloss:0.051032\n", 1111 | "[998]\tvalidation_0-logloss:0.051042\n", 1112 | "[999]\tvalidation_0-logloss:0.051023\n", 1113 | "[1000]\tvalidation_0-logloss:0.051003\n", 1114 | "[1001]\tvalidation_0-logloss:0.050997\n", 1115 | "[1002]\tvalidation_0-logloss:0.050996\n", 1116 | "[1003]\tvalidation_0-logloss:0.050995\n", 1117 | "[1004]\tvalidation_0-logloss:0.051002\n", 1118 | "[1005]\tvalidation_0-logloss:0.050969\n", 1119 | "[1006]\tvalidation_0-logloss:0.050973\n", 1120 | "[1007]\tvalidation_0-logloss:0.050965\n", 1121 | "[1008]\tvalidation_0-logloss:0.050963\n", 1122 | "[1009]\tvalidation_0-logloss:0.050916\n", 1123 | "[1010]\tvalidation_0-logloss:0.050933\n", 1124 | "[1011]\tvalidation_0-logloss:0.050879\n", 1125 | "[1012]\tvalidation_0-logloss:0.050873\n", 1126 | "[1013]\tvalidation_0-logloss:0.050844\n", 1127 | "[1014]\tvalidation_0-logloss:0.050832\n", 1128 | "[1015]\tvalidation_0-logloss:0.050857\n", 1129 | "[1016]\tvalidation_0-logloss:0.050871\n", 1130 | "[1017]\tvalidation_0-logloss:0.050874\n", 1131 | "[1018]\tvalidation_0-logloss:0.05086\n", 1132 | "[1019]\tvalidation_0-logloss:0.050907\n", 1133 | "[1020]\tvalidation_0-logloss:0.050902\n", 1134 | "[1021]\tvalidation_0-logloss:0.050864\n", 1135 | "[1022]\tvalidation_0-logloss:0.050864\n", 1136 | "[1023]\tvalidation_0-logloss:0.050817\n", 1137 | "[1024]\tvalidation_0-logloss:0.050843\n", 1138 | "[1025]\tvalidation_0-logloss:0.050874\n", 1139 | "[1026]\tvalidation_0-logloss:0.050866\n", 1140 | "[1027]\tvalidation_0-logloss:0.050883\n", 1141 | "[1028]\tvalidation_0-logloss:0.050906\n", 1142 | "[1029]\tvalidation_0-logloss:0.050922\n", 1143 | "[1030]\tvalidation_0-logloss:0.05092\n", 1144 | "[1031]\tvalidation_0-logloss:0.05093\n", 1145 | "[1032]\tvalidation_0-logloss:0.050953\n", 1146 | "[1033]\tvalidation_0-logloss:0.050957\n", 1147 | "[1034]\tvalidation_0-logloss:0.050978\n", 1148 | "[1035]\tvalidation_0-logloss:0.050978\n", 1149 | "[1036]\tvalidation_0-logloss:0.050981\n", 1150 | "[1037]\tvalidation_0-logloss:0.050973\n", 1151 | "Stopping. Best iteration:\n", 1152 | "[937]\tvalidation_0-logloss:0.05077\n", 1153 | "\n" 1154 | ] 1155 | }, 1156 | { 1157 | "data": { 1158 | "text/plain": [ 1159 | "XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,\n", 1160 | " gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=8,\n", 1161 | " min_child_weight=1, missing=None, n_estimators=2000, nthread=-1,\n", 1162 | " objective='binary:logistic', reg_alpha=0, reg_lambda=1,\n", 1163 | " scale_pos_weight=1, seed=0, silent=True, subsample=1)" 1164 | ] 1165 | }, 1166 | "execution_count": 11, 1167 | "metadata": {}, 1168 | "output_type": "execute_result" 1169 | } 1170 | ], 1171 | "source": [ 1172 | "model = XGBClassifier(\n", 1173 | " learning_rate =0.1,\n", 1174 | " n_estimators=2000,\n", 1175 | " max_depth=8,\n", 1176 | ")\n", 1177 | "\n", 1178 | "#model.fit(X, y, eval_set=[(X, y), (x_test, y_test)], eval_metric='logloss', verbose=True)\n", 1179 | "model.fit(x_train, y_train, eval_set=[(x_test, y_test)], eval_metric='logloss',early_stopping_rounds=500, verbose=True)" 1180 | ] 1181 | }, 1182 | { 1183 | "cell_type": "code", 1184 | "execution_count": 12, 1185 | "metadata": {}, 1186 | "outputs": [], 1187 | "source": [ 1188 | "\n", 1189 | "pickle.dump(model, open(\"xgb.model.dat\", \"wb\"))\n", 1190 | "#loaded_model = pickle.load(open(\"pima.pickle.dat\", \"rb\"))" 1191 | ] 1192 | }, 1193 | { 1194 | "cell_type": "code", 1195 | "execution_count": 7, 1196 | "metadata": {}, 1197 | "outputs": [], 1198 | "source": [ 1199 | "test = pd.read_csv('data/test.csv', header=None, names = list(range(0,max_features)))\n", 1200 | "\n", 1201 | "number_of_nan = test.isnull().sum(axis=1)\n", 1202 | "test = test.fillna(0)\n", 1203 | "test = test.astype(int)\n", 1204 | "number_valid = max_features - number_of_nan\n", 1205 | "test_describe = test.apply(pd.DataFrame.describe, axis=1)\n", 1206 | "test = pd.concat([test, number_of_nan, number_valid, test_describe], axis=1)\n", 1207 | "test.columns.values[1024] = \"number_of_nan\"\n", 1208 | "test.columns.values[1025] = \"number_valid\"" 1209 | ] 1210 | }, 1211 | { 1212 | "cell_type": "code", 1213 | "execution_count": 13, 1214 | "metadata": {}, 1215 | "outputs": [], 1216 | "source": [ 1217 | "best_iter = model.best_iteration\n", 1218 | "y_pred = model.predict_proba(test, ntree_limit = best_iter)[:, 1]\n", 1219 | "\n", 1220 | "df = pd.DataFrame(y_pred)\n", 1221 | "df.to_csv(\"data/sgb_prob.csv\")" 1222 | ] 1223 | }, 1224 | { 1225 | "cell_type": "code", 1226 | "execution_count": 14, 1227 | "metadata": {}, 1228 | "outputs": [ 1229 | { 1230 | "data": { 1231 | "text/plain": [ 1232 | "9561" 1233 | ] 1234 | }, 1235 | "execution_count": 14, 1236 | "metadata": {}, 1237 | "output_type": "execute_result" 1238 | } 1239 | ], 1240 | "source": [ 1241 | "best_iter" 1242 | ] 1243 | }, 1244 | { 1245 | "cell_type": "code", 1246 | "execution_count": null, 1247 | "metadata": {}, 1248 | "outputs": [], 1249 | "source": [] 1250 | } 1251 | ], 1252 | "metadata": { 1253 | "kernelspec": { 1254 | "display_name": "Python 3", 1255 | "language": "python", 1256 | "name": "python3" 1257 | }, 1258 | "language_info": { 1259 | "codemirror_mode": { 1260 | "name": "ipython", 1261 | "version": 3 1262 | }, 1263 | "file_extension": ".py", 1264 | "mimetype": "text/x-python", 1265 | "name": "python", 1266 | "nbconvert_exporter": "python", 1267 | "pygments_lexer": "ipython3", 1268 | "version": "3.6.6" 1269 | } 1270 | }, 1271 | "nbformat": 4, 1272 | "nbformat_minor": 2 1273 | } 1274 | --------------------------------------------------------------------------------