├── .kaggle-cli └── config ├── README.md └── model_construction.ipynb /.kaggle-cli/config: -------------------------------------------------------------------------------- 1 | [user] 2 | competition = dogs-vs-cats 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | You can read more about this repository [here](https://medium.com/@radekosmulski/can-we-beat-the-state-of-the-art-from-2013-with-only-0-046-of-training-examples-yes-we-can-18be24b8615f) 2 | 3 | Steps to reproduce: 4 | 1. Download the data from the dogs-vs-cats Kaggle competition into the root of the directory. 5 | 2. Unzip it (it should reside in `train` directory under root of the repository) 6 | 3. Run all the cells in the notebook 7 | 8 | The results may vary but they should be within reason. Among this being sensitive to how lucky we get on the draw of the 6 random images, the 41 epochs of training sometimes are not enough. 9 | -------------------------------------------------------------------------------- /model_construction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import os, shutil, random, glob\n", 18 | "import bcolz\n", 19 | "import keras\n", 20 | "import keras.preprocessing.image\n", 21 | "from keras.layers import Input, Flatten, Dense, Dropout, Activation, BatchNormalization, GlobalMaxPooling2D\n", 22 | "from keras.preprocessing.image import ImageDataGenerator\n", 23 | "from keras.optimizers import Adam\n", 24 | "from keras.applications.vgg19 import preprocess_input\n", 25 | "from keras.applications.vgg19 import VGG19\n", 26 | "from keras.models import Model\n", 27 | "%matplotlib inline\n", 28 | "from matplotlib import pyplot as plt\n", 29 | "import numpy as np\n", 30 | "import scipy\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "Code below assumes that the train data from the https://www.kaggle.com/c/dogs-vs-cats competition has been downloaded and unzipped into the `train` directory under root of the repository." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "files = glob.glob('train/*')\n", 49 | "fnames = [f.split('/')[1] for f in files]\n", 50 | "\n", 51 | "os.makedirs('train/cats')\n", 52 | "os.makedirs('train/dogs')\n", 53 | "\n", 54 | "for fname in fnames:\n", 55 | " dogs_or_cats = 'dogs' if 'dog' in fname else 'cats'\n", 56 | " shutil.move(f'train/{fname}', f'train/{dogs_or_cats}/{fname}')" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "Found 25000 images belonging to 2 classes.\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "gen = ImageDataGenerator(preprocessing_function=preprocess_input)\n", 74 | "train_data = gen.flow_from_directory('train', target_size=(224, 224), batch_size=1, shuffle=False)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "train_filenames = train_data.filenames\n", 86 | "bcolz.carray(train_filenames, rootdir='train_filenames', mode='w').flush()\n", 87 | "train_y = keras.utils.to_categorical(train_data.classes)\n", 88 | "bcolz.carray(train_y, rootdir='train_y', mode='w').flush()" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "base_model = VGG19(\n", 100 | " include_top=False,\n", 101 | " weights='imagenet',\n", 102 | " input_shape=(224, 224, 3),\n", 103 | " pooling=None\n", 104 | ")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "collapsed": true 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "train_X = base_model.predict_generator(train_data, steps=train_data.n)\n", 116 | "bcolz.carray(train_X, rootdir='train_X', mode='w').flush()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 7, 122 | "metadata": { 123 | "collapsed": true 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "trn_ids = np.random.randint(25000, size=6)\n", 128 | "val_ids = np.delete(np.arange(25000), trn_ids)\n", 129 | "\n", 130 | "trn_X = train_X[trn_ids, ...]\n", 131 | "trn_y = train_y[trn_ids]\n", 132 | "\n", 133 | "random_subset = np.random.randint(24994, size=500)\n", 134 | "val_X = train_X[random_subset, ...]\n", 135 | "val_y = train_y[random_subset]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "inputs = Input(shape=(7, 7, 512))\n", 147 | "# x = keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2))(inputs)\n", 148 | "# x = Flatten()(x)\n", 149 | "# x = Dense(4096)(x)\n", 150 | "\n", 151 | "x = GlobalMaxPooling2D()(inputs)\n", 152 | "x = Dense(4096)(x)\n", 153 | "x = BatchNormalization()(x)\n", 154 | "x = Activation('relu')(x)\n", 155 | "x = Dense(2)(x)\n", 156 | "x = BatchNormalization()(x)\n", 157 | "predictions = Activation('softmax')(x)\n", 158 | "\n", 159 | "model = Model(inputs, predictions)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 9, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "_________________________________________________________________\n", 172 | "Layer (type) Output Shape Param # \n", 173 | "=================================================================\n", 174 | "input_2 (InputLayer) (None, 7, 7, 512) 0 \n", 175 | "_________________________________________________________________\n", 176 | "global_max_pooling2d_1 (Glob (None, 512) 0 \n", 177 | "_________________________________________________________________\n", 178 | "dense_1 (Dense) (None, 4096) 2101248 \n", 179 | "_________________________________________________________________\n", 180 | "batch_normalization_1 (Batch (None, 4096) 16384 \n", 181 | "_________________________________________________________________\n", 182 | "activation_1 (Activation) (None, 4096) 0 \n", 183 | "_________________________________________________________________\n", 184 | "dense_2 (Dense) (None, 2) 8194 \n", 185 | "_________________________________________________________________\n", 186 | "batch_normalization_2 (Batch (None, 2) 8 \n", 187 | "_________________________________________________________________\n", 188 | "activation_2 (Activation) (None, 2) 0 \n", 189 | "=================================================================\n", 190 | "Total params: 2,125,834\n", 191 | "Trainable params: 2,117,638\n", 192 | "Non-trainable params: 8,196\n", 193 | "_________________________________________________________________\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "model.summary()" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 10, 204 | "metadata": { 205 | "collapsed": true 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "model.compile(Adam(lr=1e-4), 'categorical_crossentropy', metrics=['accuracy'])" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 11, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "Train on 6 samples, validate on 500 samples\n", 222 | "Epoch 1/40\n", 223 | "13s - loss: 1.3000 - acc: 0.1667 - val_loss: 3.6826 - val_acc: 0.5820\n", 224 | "Epoch 2/40\n", 225 | "0s - loss: 0.3923 - acc: 1.0000 - val_loss: 2.5259 - val_acc: 0.6480\n", 226 | "Epoch 3/40\n", 227 | "0s - loss: 0.2874 - acc: 1.0000 - val_loss: 1.9335 - val_acc: 0.7020\n", 228 | "Epoch 4/40\n", 229 | "0s - loss: 0.2282 - acc: 1.0000 - val_loss: 1.5719 - val_acc: 0.7280\n", 230 | "Epoch 5/40\n", 231 | "0s - loss: 0.1965 - acc: 1.0000 - val_loss: 1.3259 - val_acc: 0.7400\n", 232 | "Epoch 6/40\n", 233 | "0s - loss: 0.1788 - acc: 1.0000 - val_loss: 1.1501 - val_acc: 0.7540\n", 234 | "Epoch 7/40\n", 235 | "0s - loss: 0.1686 - acc: 1.0000 - val_loss: 1.0168 - val_acc: 0.7640\n", 236 | "Epoch 8/40\n", 237 | "0s - loss: 0.1625 - acc: 1.0000 - val_loss: 0.9130 - val_acc: 0.7720\n", 238 | "Epoch 9/40\n", 239 | "0s - loss: 0.1587 - acc: 1.0000 - val_loss: 0.8284 - val_acc: 0.7800\n", 240 | "Epoch 10/40\n", 241 | "0s - loss: 0.1562 - acc: 1.0000 - val_loss: 0.7584 - val_acc: 0.7860\n", 242 | "Epoch 11/40\n", 243 | "0s - loss: 0.1544 - acc: 1.0000 - val_loss: 0.7007 - val_acc: 0.7920\n", 244 | "Epoch 12/40\n", 245 | "0s - loss: 0.1531 - acc: 1.0000 - val_loss: 0.6527 - val_acc: 0.7980\n", 246 | "Epoch 13/40\n", 247 | "0s - loss: 0.1521 - acc: 1.0000 - val_loss: 0.6126 - val_acc: 0.8000\n", 248 | "Epoch 14/40\n", 249 | "0s - loss: 0.1512 - acc: 1.0000 - val_loss: 0.5793 - val_acc: 0.8000\n", 250 | "Epoch 15/40\n", 251 | "0s - loss: 0.1504 - acc: 1.0000 - val_loss: 0.5514 - val_acc: 0.8040\n", 252 | "Epoch 16/40\n", 253 | "0s - loss: 0.1496 - acc: 1.0000 - val_loss: 0.5282 - val_acc: 0.8040\n", 254 | "Epoch 17/40\n", 255 | "0s - loss: 0.1488 - acc: 1.0000 - val_loss: 0.5088 - val_acc: 0.8100\n", 256 | "Epoch 18/40\n", 257 | "0s - loss: 0.1480 - acc: 1.0000 - val_loss: 0.4927 - val_acc: 0.8100\n", 258 | "Epoch 19/40\n", 259 | "0s - loss: 0.1472 - acc: 1.0000 - val_loss: 0.4793 - val_acc: 0.8100\n", 260 | "Epoch 20/40\n", 261 | "0s - loss: 0.1464 - acc: 1.0000 - val_loss: 0.4682 - val_acc: 0.8120\n", 262 | "Epoch 21/40\n", 263 | "0s - loss: 0.1456 - acc: 1.0000 - val_loss: 0.4591 - val_acc: 0.8140\n", 264 | "Epoch 22/40\n", 265 | "0s - loss: 0.1448 - acc: 1.0000 - val_loss: 0.4516 - val_acc: 0.8140\n", 266 | "Epoch 23/40\n", 267 | "0s - loss: 0.1441 - acc: 1.0000 - val_loss: 0.4455 - val_acc: 0.8140\n", 268 | "Epoch 24/40\n", 269 | "0s - loss: 0.1433 - acc: 1.0000 - val_loss: 0.4405 - val_acc: 0.8140\n", 270 | "Epoch 25/40\n", 271 | "0s - loss: 0.1425 - acc: 1.0000 - val_loss: 0.4366 - val_acc: 0.8140\n", 272 | "Epoch 26/40\n", 273 | "0s - loss: 0.1418 - acc: 1.0000 - val_loss: 0.4336 - val_acc: 0.8140\n", 274 | "Epoch 27/40\n", 275 | "0s - loss: 0.1411 - acc: 1.0000 - val_loss: 0.4312 - val_acc: 0.8120\n", 276 | "Epoch 28/40\n", 277 | "0s - loss: 0.1404 - acc: 1.0000 - val_loss: 0.4295 - val_acc: 0.8100\n", 278 | "Epoch 29/40\n", 279 | "0s - loss: 0.1397 - acc: 1.0000 - val_loss: 0.4283 - val_acc: 0.8100\n", 280 | "Epoch 30/40\n", 281 | "0s - loss: 0.1391 - acc: 1.0000 - val_loss: 0.4275 - val_acc: 0.8100\n", 282 | "Epoch 31/40\n", 283 | "0s - loss: 0.1385 - acc: 1.0000 - val_loss: 0.4271 - val_acc: 0.8100\n", 284 | "Epoch 32/40\n", 285 | "0s - loss: 0.1379 - acc: 1.0000 - val_loss: 0.4270 - val_acc: 0.8100\n", 286 | "Epoch 33/40\n", 287 | "0s - loss: 0.1374 - acc: 1.0000 - val_loss: 0.4270 - val_acc: 0.8060\n", 288 | "Epoch 34/40\n", 289 | "0s - loss: 0.1369 - acc: 1.0000 - val_loss: 0.4273 - val_acc: 0.8060\n", 290 | "Epoch 35/40\n", 291 | "0s - loss: 0.1364 - acc: 1.0000 - val_loss: 0.4277 - val_acc: 0.8040\n", 292 | "Epoch 36/40\n", 293 | "0s - loss: 0.1359 - acc: 1.0000 - val_loss: 0.4283 - val_acc: 0.8020\n", 294 | "Epoch 37/40\n", 295 | "0s - loss: 0.1355 - acc: 1.0000 - val_loss: 0.4289 - val_acc: 0.8020\n", 296 | "Epoch 38/40\n", 297 | "0s - loss: 0.1351 - acc: 1.0000 - val_loss: 0.4297 - val_acc: 0.8000\n", 298 | "Epoch 39/40\n", 299 | "0s - loss: 0.1347 - acc: 1.0000 - val_loss: 0.4305 - val_acc: 0.8000\n", 300 | "Epoch 40/40\n", 301 | "0s - loss: 0.1343 - acc: 1.0000 - val_loss: 0.4313 - val_acc: 0.8000\n" 302 | ] 303 | }, 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "" 308 | ] 309 | }, 310 | "execution_count": 11, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "model.fit(x=trn_X, y=trn_y, batch_size=6, epochs=40, validation_data=(val_X, val_y), verbose=2)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "Let's validate on the entire training set." 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 12, 329 | "metadata": { 330 | "collapsed": true 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "val_X = train_X[val_ids, ...]\n", 335 | "val_y = train_y[val_ids]" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 13, 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "name": "stdout", 345 | "output_type": "stream", 346 | "text": [ 347 | "Train on 6 samples, validate on 24994 samples\n", 348 | "Epoch 1/1\n", 349 | "10s - loss: 0.1340 - acc: 1.0000 - val_loss: 0.4262 - val_acc: 0.8204\n" 350 | ] 351 | }, 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "" 356 | ] 357 | }, 358 | "execution_count": 13, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "model.fit(x=trn_X, y=trn_y, batch_size=6, epochs=1, validation_data=(val_X, val_y), verbose=2)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 14, 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "data": { 374 | "text/plain": [ 375 | "['dogs/dog.9455.jpg',\n", 376 | " 'cats/cat.4549.jpg',\n", 377 | " 'cats/cat.10649.jpg',\n", 378 | " 'dogs/dog.1881.jpg',\n", 379 | " 'dogs/dog.4863.jpg',\n", 380 | " 'cats/cat.9190.jpg']" 381 | ] 382 | }, 383 | "execution_count": 14, 384 | "metadata": {}, 385 | "output_type": "execute_result" 386 | } 387 | ], 388 | "source": [ 389 | "[train_filenames[idx] for idx in trn_ids]" 390 | ] 391 | } 392 | ], 393 | "metadata": { 394 | "kernelspec": { 395 | "display_name": "Python 3", 396 | "language": "python", 397 | "name": "python3" 398 | }, 399 | "language_info": { 400 | "codemirror_mode": { 401 | "name": "ipython", 402 | "version": 3 403 | }, 404 | "file_extension": ".py", 405 | "mimetype": "text/x-python", 406 | "name": "python", 407 | "nbconvert_exporter": "python", 408 | "pygments_lexer": "ipython3", 409 | "version": "3.6.2" 410 | } 411 | }, 412 | "nbformat": 4, 413 | "nbformat_minor": 2 414 | } 415 | --------------------------------------------------------------------------------