├── Examples.ipynb ├── Imagewoof from scratch.ipynb ├── LICENSE ├── Pretrained on ImageWoof.ipynb ├── README.md ├── efficientnet.py └── train.py /Examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Runs on ImageWoof dataset" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "lr: 0.003; eff_lr: 0.003; size: 300; alpha: 0.99; mom: 0.9; eps: 1e-06\n" 22 | ] 23 | }, 24 | { 25 | "data": { 26 | "text/html": [], 27 | "text/plain": [ 28 | "" 29 | ] 30 | }, 31 | "metadata": {}, 32 | "output_type": "display_data" 33 | }, 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n" 39 | ] 40 | }, 41 | { 42 | "data": { 43 | "image/png": "\n", 44 | "text/plain": [ 45 | "
" 46 | ] 47 | }, 48 | "metadata": { 49 | "needs_background": "light" 50 | }, 51 | "output_type": "display_data" 52 | } 53 | ], 54 | "source": [ 55 | "\n", 56 | "%run train.py --woof 1 --size 300 --bs 32 --mixup 0 --epoch 5 --lr 3e-3 --arch 'efficientnetB3' --lrfinder 1\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "lr: 0.001; eff_lr: 0.001; size: 300; alpha: 0.99; mom: 0.9; eps: 1e-06\n" 69 | ] 70 | }, 71 | { 72 | "data": { 73 | "text/html": [ 74 | "\n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.4254572.2780510.1360000.59600002:55
12.2113192.1462100.2040000.70800002:55
22.0811412.0388900.2680000.80200002:55
31.9548451.9287650.3300000.84600002:55
41.8796871.8830100.3660000.84600002:55
" 128 | ], 129 | "text/plain": [ 130 | "" 131 | ] 132 | }, 133 | "metadata": {}, 134 | "output_type": "display_data" 135 | } 136 | ], 137 | "source": [ 138 | "\n", 139 | "%run train.py --woof 1 --size 300 --bs 32 --mixup 0 --epoch 5 --lr 1e-3 --arch 'efficientnetB3' \n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 5, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "lr: 0.001; eff_lr: 0.001; size: 224; alpha: 0.99; mom: 0.9; eps: 1e-06\n" 152 | ] 153 | }, 154 | { 155 | "data": { 156 | "text/html": [ 157 | "\n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.3460462.3009280.1100000.56000000:55
12.2154812.1520910.2160000.71800000:54
22.0801962.0348560.2820000.79800000:54
31.9482251.8847090.3380000.82800000:54
41.8734701.8372160.3660000.85600000:54
" 211 | ], 212 | "text/plain": [ 213 | "" 214 | ] 215 | }, 216 | "metadata": {}, 217 | "output_type": "display_data" 218 | } 219 | ], 220 | "source": [ 221 | "\n", 222 | "%run train.py --woof 1 --size 224 --bs 64 --mixup 0 --epoch 5 --lr 1e-3 --arch 'efficientnetB0' \n" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | } 232 | ], 233 | "metadata": { 234 | "kernelspec": { 235 | "display_name": "Python 3", 236 | "language": "python", 237 | "name": "python3" 238 | }, 239 | "language_info": { 240 | "codemirror_mode": { 241 | "name": "ipython", 242 | "version": 3 243 | }, 244 | "file_extension": ".py", 245 | "mimetype": "text/x-python", 246 | "name": "python", 247 | "nbconvert_exporter": "python", 248 | "pygments_lexer": "ipython3", 249 | "version": "3.7.1" 250 | } 251 | }, 252 | "nbformat": 4, 253 | "nbformat_minor": 2 254 | } 255 | -------------------------------------------------------------------------------- /Imagewoof from scratch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Training from scratch on Imagewoof with the efficientnet_pytorch repo" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "https://github.com/lukemelas/EfficientNet-PyTorch" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "Collecting efficientnet_pytorch\n", 27 | " Downloading https://files.pythonhosted.org/packages/06/ff/881afd965c46b11fc6f3c8316de9e08d37fc3b71056dbab861b76faee6ca/efficientnet_pytorch-0.1.0-py3-none-any.whl\n", 28 | "Requirement already satisfied: torch in /opt/conda/lib/python3.7/site-packages (from efficientnet_pytorch) (1.0.0)\n", 29 | "Installing collected packages: efficientnet-pytorch\n", 30 | "Successfully installed efficientnet-pytorch-0.1.0\n", 31 | "\u001b[33mYou are using pip version 10.0.1, however version 19.1.1 is available.\n", 32 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "!pip install efficientnet_pytorch" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "from fastai.script import *\n", 47 | "from fastai.vision import *\n", 48 | "from fastai.callbacks import *\n", 49 | "from fastai.distributed import *\n", 50 | "from fastprogress import fastprogress\n", 51 | "from torchvision.models import *\n", 52 | "from efficientnet_pytorch import EfficientNet\n", 53 | "import sys\n", 54 | "\n", 55 | "torch.backends.cudnn.benchmark = True\n", 56 | "fastprogress.MAX_COLS = 80\n", 57 | "\n", 58 | "def get_data(size, woof, bs, workers=None):\n", 59 | " if size<=128: path = URLs.IMAGEWOOF_160 if woof else URLs.IMAGENETTE_160\n", 60 | " elif size<=224: path = URLs.IMAGEWOOF_320 if woof else URLs.IMAGENETTE_320\n", 61 | " else : path = URLs.IMAGEWOOF if woof else URLs.IMAGENETTE\n", 62 | " path = untar_data(path)\n", 63 | "\n", 64 | " n_gpus = num_distrib() or 1\n", 65 | " if workers is None: workers = min(8, num_cpus()//n_gpus)\n", 66 | "\n", 67 | " return (ImageList.from_folder(path).split_by_folder(valid='val')\n", 68 | " .label_from_folder().transform(([flip_lr(p=0.5)], []), size=size)\n", 69 | " .databunch(bs=bs, num_workers=workers)\n", 70 | " .presize(size, scale=(0.35,1))\n", 71 | " .normalize(imagenet_stats))\n", 72 | "\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "Change image size and batch size below depending on model:" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 14, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "data = get_data(300,1,16) #240, bs=32 for B1, 300, bs=16 for B3" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 15, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "opt_func = partial(optim.Adam, betas=(0.9,0.99), eps=1e-6)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "Pick model below:" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 33, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "m = EfficientNet.from_name('efficientnet-b3')" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 34, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "m._fc = nn.Linear(m._fc.in_features, out_features=10, bias=True)\n", 123 | "nn.init.kaiming_normal_(m._fc.weight);" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 35, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "\n", 133 | "learn = (Learner(data, m, wd=1e-5, opt_func=opt_func,\n", 134 | " metrics=[accuracy,top_k_accuracy],\n", 135 | " bn_wd=False, true_wd=True,\n", 136 | " loss_func = LabelSmoothingCrossEntropy())\n", 137 | " )\n", 138 | " " 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 36, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "mixup = 0\n", 148 | "if mixup: learn = learn.mixup(alpha=mixup)\n", 149 | "learn = learn.to_fp16(dynamic=True)\n", 150 | " \n", 151 | " \n" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 10, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/html": [], 162 | "text/plain": [ 163 | "" 164 | ] 165 | }, 166 | "metadata": {}, 167 | "output_type": "display_data" 168 | }, 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n" 174 | ] 175 | }, 176 | { 177 | "data": { 178 | "image/png": "\n", 179 | "text/plain": [ 180 | "
" 181 | ] 182 | }, 183 | "metadata": { 184 | "needs_background": "light" 185 | }, 186 | "output_type": "display_data" 187 | } 188 | ], 189 | "source": [ 190 | "#B1\n", 191 | "learn.lr_find()\n", 192 | "learn.recorder.plot()" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "## 5 epochs B1 (xresnet50 gets ~ 62%; xresnet50 + self attention gets 67%)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 9, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/html": [ 210 | "\n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.2430002.3057010.1000000.50000001:57
12.1167342.1529970.2480000.74200001:56
22.0290702.0065470.3100000.79000001:57
31.9578091.9236890.3600000.81200001:56
41.9300431.9213970.3680000.79400001:55
" 264 | ], 265 | "text/plain": [ 266 | "" 267 | ] 268 | }, 269 | "metadata": {}, 270 | "output_type": "display_data" 271 | } 272 | ], 273 | "source": [ 274 | "learn.fit_one_cycle(5, 1e-4, div_factor=10, pct_start=0.3)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 15, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "data": { 284 | "text/html": [ 285 | "\n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.1150522.3627080.1000000.50000001:56
11.9551346.9124240.1760000.70200001:58
21.7874051.8315670.4480000.89400001:56
31.6574721.5483860.5260000.93600001:55
41.5593271.4478170.5780000.94600001:55
" 339 | ], 340 | "text/plain": [ 341 | "" 342 | ] 343 | }, 344 | "metadata": {}, 345 | "output_type": "display_data" 346 | } 347 | ], 348 | "source": [ 349 | "# restarted from scratch\n", 350 | "learn.fit_one_cycle(5, 1e-3, div_factor=10, pct_start=0.3)" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 21, 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "data": { 360 | "text/html": [ 361 | "\n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.1660824.7656020.1000000.50200001:55
12.0789465.7777190.1580000.61600001:54
21.9760103.2232280.2140000.68800001:56
31.8469231.9058360.3100000.87800001:58
41.8084021.7176240.4440000.90400001:56
" 415 | ], 416 | "text/plain": [ 417 | "" 418 | ] 419 | }, 420 | "metadata": {}, 421 | "output_type": "display_data" 422 | } 423 | ], 424 | "source": [ 425 | "# restarted from scratch\n", 426 | "learn.fit_one_cycle(5, 5e-3, div_factor=10, pct_start=0.3)" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "## 80 epochs B1 (xresnet50 gets 89.9% on 256px)" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 11, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/html": [ 444 | "\n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.1844762.3117040.1000000.50000001:38
12.1035702.0550840.3220000.78200001:39
22.0193372.0181850.3200000.82200001:39
31.9769482.1576840.3160000.82600001:40
41.9026791.8773820.3960000.88000001:39
51.8650952.1331090.3660000.85800001:40
61.8079391.9293970.4100000.87600001:38
71.7787911.7039100.4740000.90800001:38
81.7293281.7396150.4620000.91800001:38
91.7006361.5910240.5260000.93400001:39
101.6799441.4771340.5960000.92000001:39
111.6263811.5047800.6100000.92200001:39
121.6034201.3738630.6120000.95200001:38
131.5555031.3307400.6360000.95800001:40
141.5239421.5662080.5720000.93800001:39
151.5107561.4729170.6120000.91600001:39
161.4630481.3547840.6100000.94400001:39
171.4342901.4068970.5940000.96400001:39
181.4114461.6764490.5220000.94400001:40
191.4101151.1852810.7120000.96800001:38
201.4029361.1397410.7320000.96600001:38
211.3449391.2115830.6860000.97400001:39
221.3524691.2579150.6960000.96400001:40
231.3542991.1190910.7300000.96800001:40
241.3160741.1632590.7140000.97600001:39
251.2980491.1635320.7140000.97600001:40
261.2944241.0850450.7520000.96600001:40
271.2715081.1074230.7400000.97400001:40
281.2656701.0028890.7820000.97600001:40
291.2304491.0240710.7820000.97600001:40
301.2112741.0389930.7880000.97400001:39
311.1908480.9551530.8140000.98400001:40
321.1893890.9978740.7940000.98000001:40
331.1994220.8991790.8280000.98400001:40
341.1480740.9553340.8000000.98600001:39
351.1519981.0476160.7980000.98600001:39
361.1489420.9845910.7780000.98400001:39
371.1391430.9381570.8260000.98600001:39
381.1246560.9604300.8140000.98200001:38
391.1148230.9134930.8180000.98400001:38
401.0946100.9022650.8360000.99200001:39
411.0738060.9405590.8140000.98800001:39
421.0682090.8973910.8260000.99600001:39
431.0985970.9288590.8200000.99200001:40
441.0730280.9513750.8180000.99000001:40
451.0530770.9050370.8260000.99600001:38
461.0382160.9340720.8340000.98800001:39
471.0256190.8658270.8380000.99200001:38
481.0247520.8890820.8400000.99000001:40
491.0288490.8797140.8440000.99000001:41
501.0155710.8690140.8500000.98600001:40
510.9766750.8954340.8400000.98800001:39
520.9987980.8928370.8420000.99200001:39
530.9914630.9439280.8420000.98400001:39
540.9547120.8585460.8500000.99000001:40
550.9746460.9579260.8360000.97400001:39
560.9735380.8855350.8520000.99000001:39
570.9580350.9371780.8560000.97200001:41
580.9308640.8739950.8660000.98600001:39
590.9358110.8893350.8460000.98600001:39
600.9057990.8878540.8480000.98600001:39
610.9284360.8865740.8520000.98800001:39
620.9133370.8997560.8600000.98600001:38
630.9171060.8537050.8660000.98800001:39
640.8993100.8603660.8680000.99000001:39
650.9210510.8699420.8700000.98200001:40
660.8950610.8623540.8620000.98400001:39
670.9176060.9048990.8480000.97400001:39
680.9230860.8812840.8620000.98200001:39
690.8742480.8742540.8600000.98400001:39
700.8983910.8608070.8700000.98200001:41
710.9016140.8569320.8600000.98400001:40
720.8985560.8680280.8660000.98200001:39
730.8862240.8528710.8680000.98200001:39
740.8931470.8596240.8660000.98600001:38
750.8823700.8649530.8640000.98200001:39
760.8763450.8618770.8660000.98400001:39
770.8771400.8622450.8700000.98400001:39
780.8806940.8630580.8660000.98400001:39
790.8810460.8633260.8660000.98400001:39
" 1098 | ], 1099 | "text/plain": [ 1100 | "" 1101 | ] 1102 | }, 1103 | "metadata": {}, 1104 | "output_type": "display_data" 1105 | } 1106 | ], 1107 | "source": [ 1108 | "# restarted from scratch, mixup =0.2\n", 1109 | "learn.fit_one_cycle(80, 1e-3, div_factor=10, pct_start=0.3)" 1110 | ] 1111 | }, 1112 | { 1113 | "cell_type": "markdown", 1114 | "metadata": {}, 1115 | "source": [ 1116 | "# 5 epochs B3" 1117 | ] 1118 | }, 1119 | { 1120 | "cell_type": "code", 1121 | "execution_count": 20, 1122 | "metadata": {}, 1123 | "outputs": [ 1124 | { 1125 | "data": { 1126 | "text/html": [], 1127 | "text/plain": [ 1128 | "" 1129 | ] 1130 | }, 1131 | "metadata": {}, 1132 | "output_type": "display_data" 1133 | }, 1134 | { 1135 | "name": "stdout", 1136 | "output_type": "stream", 1137 | "text": [ 1138 | "LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n" 1139 | ] 1140 | }, 1141 | { 1142 | "data": { 1143 | "image/png": "\n", 1144 | "text/plain": [ 1145 | "
" 1146 | ] 1147 | }, 1148 | "metadata": { 1149 | "needs_background": "light" 1150 | }, 1151 | "output_type": "display_data" 1152 | } 1153 | ], 1154 | "source": [ 1155 | "learn.lr_find()\n", 1156 | "learn.recorder.plot()" 1157 | ] 1158 | }, 1159 | { 1160 | "cell_type": "code", 1161 | "execution_count": 21, 1162 | "metadata": {}, 1163 | "outputs": [ 1164 | { 1165 | "data": { 1166 | "text/html": [ 1167 | "\n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.2036452.2318690.1820000.64200003:38
12.0791882.1540680.2760000.72800003:39
21.9158071.8419700.4040000.84800003:39
31.8107111.7144420.4500000.86600003:39
41.7166741.6681850.4760000.87600003:37
" 1221 | ], 1222 | "text/plain": [ 1223 | "" 1224 | ] 1225 | }, 1226 | "metadata": {}, 1227 | "output_type": "display_data" 1228 | } 1229 | ], 1230 | "source": [ 1231 | "learn.fit_one_cycle(5, 1e-4, div_factor=10, pct_start=0.3)" 1232 | ] 1233 | }, 1234 | { 1235 | "cell_type": "code", 1236 | "execution_count": 27, 1237 | "metadata": {}, 1238 | "outputs": [ 1239 | { 1240 | "data": { 1241 | "text/html": [ 1242 | "\n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.1644519.3963870.1540000.60800003:38
12.0468088.0890100.1200000.64800003:38
21.9492581.9184080.3300000.87400003:37
31.8545801.7827760.3960000.87800003:39
41.7773211.7114880.4600000.88800003:38
" 1296 | ], 1297 | "text/plain": [ 1298 | "" 1299 | ] 1300 | }, 1301 | "metadata": {}, 1302 | "output_type": "display_data" 1303 | } 1304 | ], 1305 | "source": [ 1306 | "#restart\n", 1307 | "learn.fit_one_cycle(5, 1e-3, div_factor=10, pct_start=0.3)" 1308 | ] 1309 | }, 1310 | { 1311 | "cell_type": "code", 1312 | "execution_count": 37, 1313 | "metadata": {}, 1314 | "outputs": [ 1315 | { 1316 | "data": { 1317 | "text/html": [ 1318 | "\n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
02.2209252.1885930.2020000.66200003:39
12.1220362.1705640.2320000.73400003:39
22.0377242.0309750.2840000.79600003:39
31.9395141.9340670.3560000.80800003:39
41.9048111.9047350.3660000.82000003:38
" 1372 | ], 1373 | "text/plain": [ 1374 | "" 1375 | ] 1376 | }, 1377 | "metadata": {}, 1378 | "output_type": "display_data" 1379 | } 1380 | ], 1381 | "source": [ 1382 | "#restart\n", 1383 | "learn.fit_one_cycle(5, 5e-5, div_factor=10, pct_start=0.3)" 1384 | ] 1385 | }, 1386 | { 1387 | "cell_type": "code", 1388 | "execution_count": null, 1389 | "metadata": {}, 1390 | "outputs": [], 1391 | "source": [] 1392 | } 1393 | ], 1394 | "metadata": { 1395 | "kernelspec": { 1396 | "display_name": "Python 3", 1397 | "language": "python", 1398 | "name": "python3" 1399 | }, 1400 | "language_info": { 1401 | "codemirror_mode": { 1402 | "name": "ipython", 1403 | "version": 3 1404 | }, 1405 | "file_extension": ".py", 1406 | "mimetype": "text/x-python", 1407 | "name": "python", 1408 | "nbconvert_exporter": "python", 1409 | "pygments_lexer": "ipython3", 1410 | "version": "3.7.1" 1411 | } 1412 | }, 1413 | "nbformat": 4, 1414 | "nbformat_minor": 2 1415 | } 1416 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Pretrained on ImageWoof.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from torch.utils import model_zoo\n", 10 | "from efficientnet import *\n", 11 | "import collections\n", 12 | "from torch import nn\n", 13 | "from fastai.script import *\n", 14 | "from fastai.vision import *\n", 15 | "from fastai.callbacks import *\n", 16 | "from fastai.distributed import *\n", 17 | "from fastprogress import fastprogress\n", 18 | "import torchvision.models as models\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "def get_data(size, woof, bs, workers=None):\n", 28 | " if size<=128: path = URLs.IMAGEWOOF_160 if woof else URLs.IMAGENETTE_160\n", 29 | " elif size<=224: path = URLs.IMAGEWOOF_320 if woof else URLs.IMAGENETTE_320\n", 30 | " else : path = URLs.IMAGEWOOF if woof else URLs.IMAGENETTE\n", 31 | " path = untar_data(path)\n", 32 | "\n", 33 | " n_gpus = num_distrib() or 1\n", 34 | " if workers is None: workers = min(8, num_cpus()//n_gpus)\n", 35 | "\n", 36 | " return (ImageList.from_folder(path).split_by_folder(valid='val')\n", 37 | " .label_from_folder().transform(([flip_lr(p=0.5)], []), size=size)\n", 38 | " .databunch(bs=bs, num_workers=workers)\n", 39 | " .presize(size, scale=(0.35,1))\n", 40 | " .normalize(imagenet_stats))" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "data = get_data(size=300, woof=1, bs=16)\n", 50 | "opt_func = partial(optim.Adam, betas=(0.9,0.99), eps=1e-6)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "# Efficient Net Transfer Learning" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 12, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "# from https://github.com/lukemelas/EfficientNet-PyTorch - Thank you!\n", 67 | "url_map = {\n", 68 | " 'efficientnetB0': 'http://storage.googleapis.com/public-models/efficientnet-b0-08094119.pth',\n", 69 | " 'efficientnetB1': 'http://storage.googleapis.com/public-models/efficientnet-b1-dbc7070a.pth',\n", 70 | " 'efficientnetB2': 'http://storage.googleapis.com/public-models/efficientnet-b2-27687264.pth',\n", 71 | " 'efficientnetB3': 'http://storage.googleapis.com/public-models/efficientnet-b3-c8376fa2.pth',\n", 72 | "}" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "Note the default resolution for each model\n", 80 | "\n", 81 | "(width_coefficient, depth_coefficient, resolution, dropout_rate)\n", 82 | "\n", 83 | " 'efficientnet-b0': (1.0, 1.0, 224, 0.2),\n", 84 | " 'efficientnet-b1': (1.0, 1.1, 240, 0.2),\n", 85 | " 'efficientnet-b2': (1.1, 1.2, 260, 0.3),\n", 86 | " 'efficientnet-b3': (1.2, 1.4, 300, 0.3),\n", 87 | " 'efficientnet-b4': (1.4, 1.8, 380, 0.4),\n", 88 | " 'efficientnet-b5': (1.6, 2.2, 456, 0.4),\n", 89 | " 'efficientnet-b6': (1.8, 2.6, 528, 0.5),\n", 90 | " 'efficientnet-b7': (2.0, 3.1, 600, 0.5)," 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 24, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "name = 'efficientnetB3'\n", 100 | "m = globals()[name]()" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 25, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "#load pretrained weights\n", 110 | "# layers are named differently\n", 111 | "state_dict_load = model_zoo.load_url(url_map[name])\n", 112 | "keys_new=list(state_dict_load)\n", 113 | "keys_curr = list(m.state_dict())\n", 114 | "\n", 115 | "state_dict_combined = collections.OrderedDict()\n", 116 | "\n", 117 | "for i in range(len(keys_new)):\n", 118 | " state_dict_combined[keys_curr[i]] = state_dict_load[keys_new[i]]\n", 119 | "\n", 120 | " \n", 121 | "m.load_state_dict(state_dict_combined)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 26, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# change the last FC layer\n", 131 | "c_out = 10\n", 132 | "m[-1] = nn.Linear(m[-1].in_features,c_out)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 27, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "learn = Learner(data, m, wd=1e-5, opt_func=opt_func,metrics=[accuracy,top_k_accuracy],\n", 149 | " bn_wd=False, true_wd=True,\n", 150 | " loss_func = LabelSmoothingCrossEntropy())" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 28, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "learn.model;" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 29, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "learn.layer_groups;" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 30, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "# Pick a layer to freeze the model to\n", 178 | "# cf learn.layer_groups\n", 179 | "# will differ depending on which model you use\n", 180 | "\n", 181 | "learn.freeze_to(139) " 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 32, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/html": [], 192 | "text/plain": [ 193 | "" 194 | ] 195 | }, 196 | "metadata": {}, 197 | "output_type": "display_data" 198 | }, 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n" 204 | ] 205 | }, 206 | { 207 | "data": { 208 | "image/png": "\n", 209 | "text/plain": [ 210 | "
" 211 | ] 212 | }, 213 | "metadata": { 214 | "needs_background": "light" 215 | }, 216 | "output_type": "display_data" 217 | } 218 | ], 219 | "source": [ 220 | "learn.lr_find(wd=1e-5)\n", 221 | "learn.recorder.plot()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 33, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/html": [ 232 | "\n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
01.1393331.7084080.3960000.90800002:09
11.1824511.7386080.4940000.88600002:09
21.1072181.2412320.6800000.97200002:10
30.9724690.8656670.8480000.99800002:09
40.9182140.8161040.8640000.99800002:10
" 286 | ], 287 | "text/plain": [ 288 | "" 289 | ] 290 | }, 291 | "metadata": {}, 292 | "output_type": "display_data" 293 | } 294 | ], 295 | "source": [ 296 | "learn.fit_one_cycle(5, 5e-2,wd=1e-5, div_factor=10, pct_start=0.3)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "# Resnet 152 baseline" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 8, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "learn = cnn_learner(data, models.resnet152, metrics=[accuracy,top_k_accuracy],loss_func = LabelSmoothingCrossEntropy())" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 9, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "learn.freeze()" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 6, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "data": { 331 | "text/html": [], 332 | "text/plain": [ 333 | "" 334 | ] 335 | }, 336 | "metadata": {}, 337 | "output_type": "display_data" 338 | }, 339 | { 340 | "name": "stdout", 341 | "output_type": "stream", 342 | "text": [ 343 | "LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n" 344 | ] 345 | }, 346 | { 347 | "data": { 348 | "image/png": "\n", 349 | "text/plain": [ 350 | "
" 351 | ] 352 | }, 353 | "metadata": { 354 | "needs_background": "light" 355 | }, 356 | "output_type": "display_data" 357 | } 358 | ], 359 | "source": [ 360 | "learn.lr_find(wd=1e-2)\n", 361 | "learn.recorder.plot()" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 7, 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/html": [ 372 | "\n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | "
epochtrain_lossvalid_lossaccuracytop_k_accuracytime
00.8666130.9000080.8400000.99200003:10
10.9194140.8166540.8900000.98600003:09
20.8312100.7214640.9080000.99600003:08
30.7264210.6758390.9320001.00000003:08
40.6897860.6865000.9300000.99800003:09
" 426 | ], 427 | "text/plain": [ 428 | "" 429 | ] 430 | }, 431 | "metadata": {}, 432 | "output_type": "display_data" 433 | } 434 | ], 435 | "source": [ 436 | "learn.fit_one_cycle(5, 3e-3,wd=1e-2, div_factor=10, pct_start=0.3)" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [] 445 | } 446 | ], 447 | "metadata": { 448 | "kernelspec": { 449 | "display_name": "Python 3", 450 | "language": "python", 451 | "name": "python3" 452 | }, 453 | "language_info": { 454 | "codemirror_mode": { 455 | "name": "ipython", 456 | "version": 3 457 | }, 458 | "file_extension": ".py", 459 | "mimetype": "text/x-python", 460 | "name": "python", 461 | "nbconvert_exporter": "python", 462 | "pygments_lexer": "ipython3", 463 | "version": "3.7.1" 464 | } 465 | }, 466 | "nbformat": 4, 467 | "nbformat_minor": 2 468 | } 469 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EfficientNet (Working but not validated) 2 | The objective of this repository is to convert EfficientNet to Pytorch for use with fastai. 3 | 4 | This is still work in progress. We currently have a functioning attempt at replicating EfficientNet-B0 to EfficientNet-B7, which still needs to be validated and tested. 5 | -------------------------------------------------------------------------------- /efficientnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch,math,sys 3 | import torch.utils.model_zoo as model_zoo 4 | from functools import partial 5 | import torch.nn.functional as F 6 | 7 | 8 | 9 | __all__ = ['EfficientNet', 'efficientnetB0','efficientnetB1', 'efficientnetB2', 'efficientnetB3', 'efficientnetB4', 'efficientnetB5', 'efficientnetB6', 'efficientnetB7'] 10 | 11 | class Swish(nn.Module): 12 | def forward(self, x): 13 | x = x * torch.sigmoid(x) #nn.functional.sigmoid is deprecated, use torch.sigmoid instead 14 | return x 15 | 16 | act_fn = Swish() #nn.ReLU(inplace=True) 17 | 18 | 19 | #from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/utils.py 20 | class Conv2dSamePadding(nn.Conv2d): 21 | """ 2D Convolutions like TensorFlow """ 22 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): 23 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 24 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]]*2 25 | 26 | def forward(self, x): 27 | ih, iw = x.size()[-2:] 28 | kh, kw = self.weight.size()[-2:] 29 | sh, sw = self.stride 30 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 31 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 32 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 33 | if pad_h > 0 or pad_w > 0: 34 | x = F.pad(x, [pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2]) 35 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 36 | 37 | #added groups, needed for DWConv 38 | #"The configuration when groups == in_channels and out_channels = K * in_channels where K is a positive integer is termed in literature as depthwise convolution." 39 | 40 | 41 | # gotta pick one of the returns for 'same' padding or a simpler ks//2 padding 42 | def conv(ni, nf, ks=3, stride=1, groups=1, bias=False): 43 | #return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, groups= groups, bias=bias) 44 | return Conv2dSamePadding(ni, nf, kernel_size=ks, stride=stride, groups= groups, bias=bias) 45 | 46 | 47 | #class noop(nn.Module): 48 | # def __init__(self): 49 | # super().__init__() 50 | # def forward(self,x): return x 51 | 52 | def noop(x): return x 53 | 54 | def init_cnn(m): 55 | if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0) 56 | if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight) 57 | for l in m.children(): init_cnn(l) 58 | 59 | 60 | # not compatible with fp16 training 61 | class Drop_Connect(nn.Module): 62 | """create a tensor mask and apply to inputs, for removing drop_ratio % of weights""" 63 | def __init__(self, drop_ratio=0): 64 | super().__init__() 65 | self.keep_percent = 1.0 - drop_ratio 66 | 67 | def forward(self, x): 68 | if not self.training: 69 | return x 70 | 71 | batch_size = x.size(0) 72 | random_tensor = self.keep_percent 73 | random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=x.dtype,device=x.device) #dtype is causing issues with fp16 training 74 | binary_tensor = torch.floor(random_tensor) 75 | output = x / self.keep_percent * binary_tensor 76 | 77 | return output 78 | 79 | 80 | def drop_connect(inputs, p, training): 81 | """ Drop connect. """ 82 | if not training: return inputs 83 | batch_size = inputs.shape[0] 84 | keep_prob = 1 - p 85 | random_tensor = keep_prob 86 | random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype,device=inputs.device) # uniform [0,1) 87 | binary_tensor = torch.floor(random_tensor) 88 | output = inputs / keep_prob * binary_tensor 89 | return output 90 | 91 | 92 | #added groups, needed for DWConv 93 | #fixed batch norm momentum = 1- Tensorflow value 94 | def conv_layer(ni, nf, ks=3, stride=1,groups=1, zero_bn=False, act=True, eps=1e-03, momentum=0.01): 95 | bn = nn.BatchNorm2d(nf, eps=eps, momentum=momentum) 96 | nn.init.constant_(bn.weight, 0. if zero_bn else 1.) 97 | layers = [conv(ni, nf, ks, stride=stride, groups=groups), bn] 98 | if act: layers.append(act_fn) 99 | return nn.Sequential(*layers) 100 | 101 | 102 | 103 | class SqueezeEx(nn.Module): 104 | def __init__(self, ni, ns): 105 | super().__init__() 106 | 107 | 108 | ns = max(1, int(ns)) 109 | 110 | layers = [nn.AdaptiveAvgPool2d(1), 111 | conv(ni,ns,ks=1,bias=True), 112 | act_fn, 113 | conv(ns,ni,ks=1,bias=True), 114 | nn.Sigmoid()] 115 | 116 | 117 | self.layers = nn.Sequential(*layers) 118 | 119 | def forward(self, x): 120 | 121 | return x * self.layers(x) 122 | 123 | 124 | 125 | 126 | class MBConv(nn.Module): 127 | def __init__(self, ni, nf, expand_ratio, ks=3, stride=2, se = None, skip=True, drop_connect_rate=None): 128 | super().__init__() 129 | 130 | 131 | 132 | self.drop_connect_rate = drop_connect_rate 133 | # Expansion (only if expand ratio>1) 134 | 135 | ne = ni*expand_ratio 136 | self.conv_exp = noop if ni==ne else conv_layer(ni, ne, ks=1) 137 | 138 | # Depthwise Convolution (implemented using 'groups') 139 | # This is where ks and stride get used 140 | #"The configuration when groups == in_channels and out_channels = K * in_channels 141 | # where K is a positive integer is termed in literature as depthwise convolution." 142 | # depth_multiplier=1 is default in original TF code so we keep the same number of channels 143 | 144 | self.dw_conv = conv_layer(ne, ne, ks=ks, stride= stride, groups=ne) 145 | 146 | 147 | # Squeeze and Excitation (if se ratio is specified) 148 | # se ratio applies to ni and not ne 149 | 150 | 151 | self.se = SqueezeEx(ne, ni*se) if se else noop 152 | 153 | # Output Conv (no relu) 154 | 155 | self.conv_out = conv_layer(ne, nf, ks=1, act=False) 156 | 157 | 158 | 159 | # add skip connection or not 160 | self.skip = skip and stride==1 and ni==nf 161 | 162 | # Drop connect 163 | 164 | #self.dc = Drop_Connect(drop_connect_rate) if drop_connect_rate else noop 165 | 166 | 167 | 168 | def forward(self, x): 169 | 170 | self.dc = partial(drop_connect,p=self.drop_connect_rate, training=self.training) if self.drop_connect_rate else noop 171 | 172 | out = self.conv_out(self.se(self.dw_conv(self.conv_exp(x)))) 173 | if self.skip: out = self.dc(out) + x 174 | 175 | 176 | return out 177 | 178 | 179 | 180 | class Flatten(nn.Module): 181 | def forward(self, x): return x.view(x.size(0), -1) 182 | 183 | class EfficientNet(nn.Sequential): 184 | def __init__(self, channels, repeat, ks, stride, expand, w_mult=1.0, d_mult=1.0, se = None, drop_connect_rate = None,dropout_rate= None, c_in=3, c_out=1000): 185 | 186 | 187 | repeat = [int(math.ceil(r*d_mult)) for r in repeat] 188 | channels = round_filters(channels, w_mult) 189 | 190 | 191 | stem = [conv_layer(c_in, channels[0], ks=3 ,stride=2)] 192 | 193 | blocks = [] 194 | #The first block needs to take care of stride and filter size increase. 195 | 196 | for i in range(len(repeat)): 197 | blocks+= [MBConv(channels[i], channels[i+1], expand[i], ks=ks[i], stride=stride[i], se = se, drop_connect_rate=drop_connect_rate)] 198 | blocks+= [MBConv(channels[i+1], channels[i+1], expand[i], ks=ks[i], stride=1, se = se, drop_connect_rate=drop_connect_rate)] *(repeat[i]-1) 199 | 200 | dropout = nn.Dropout(p=dropout_rate) if dropout_rate else noop 201 | 202 | head = [conv_layer(channels[-2], channels[-1], ks=1 ,stride=1), nn.AdaptiveAvgPool2d(1), Flatten(), dropout, nn.Linear(channels[-1], c_out)] 203 | 204 | 205 | super().__init__(*stem,*blocks, *head) 206 | 207 | init_cnn(self) 208 | 209 | 210 | 211 | 212 | def round_filters(filters, d_mult, divisor=8, min_depth=None): 213 | """ Calculate and round number of filters based on depth multiplier. """ 214 | 215 | if not d_mult: 216 | return filters 217 | 218 | filters = [f*d_mult for f in filters] 219 | min_depth = min_depth or divisor 220 | new_filters = [max(min_depth, int(f + divisor / 2) // divisor * divisor) for f in filters] 221 | # prevent rounding by more than 10% 222 | new_filters = [new_filters[i] + (new_filters[i] < 0.9 * filters[i])* divisor for i in range(len(new_filters))] 223 | new_filters = [int(f) for f in new_filters] 224 | return new_filters 225 | 226 | 227 | me = sys.modules[__name__] 228 | c = [32,16,24,40,80,112,192,320,1280] 229 | r = [1,2,2,3,3,4,1] 230 | ks = [3,3,5,3,5,5,3] 231 | str = [1,2,2,2,1,2,1] 232 | exp = [1,6,6,6,6,6,6] 233 | se = 0.25 234 | do = 0.2 235 | dc=0.2 236 | 237 | 238 | # base without multipliers and dropout 239 | setattr(me, 'efficientnet', partial(EfficientNet, channels=c, repeat=r, ks=ks, stride=str, expand=exp, se=se, drop_connect_rate=dc)) 240 | 241 | # (number, width_coefficient, depth_coefficient, dropout_rate) 242 | for n, wm, dm, do in [ 243 | [ 0, 1.0, 1.0, 0.2], 244 | [ 1, 1.0, 1.1, 0.2], 245 | [ 2, 1.1, 1.2, 0.3], 246 | [ 3, 1.2, 1.4, 0.3], 247 | [ 4, 1.4, 1.8, 0.4], 248 | [ 5, 1.6, 2.2, 0.4], 249 | [ 6, 1.8, 2.6, 0.5], 250 | [ 7, 2.0, 3.1, 0.5], 251 | ]: 252 | name = f'efficientnetB{n}' 253 | setattr(me, name, partial(efficientnet, d_mult=dm, w_mult=wm, dropout_rate=do)) 254 | 255 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from fastai.script import * 2 | from fastai.vision import * 3 | from fastai.callbacks import * 4 | from fastai.distributed import * 5 | from fastprogress import fastprogress 6 | from torchvision.models import * 7 | from efficientnet import * 8 | import sys 9 | 10 | torch.backends.cudnn.benchmark = True 11 | fastprogress.MAX_COLS = 80 12 | 13 | def get_data(size, woof, bs, workers=None): 14 | if size<=128: path = URLs.IMAGEWOOF_160 if woof else URLs.IMAGENETTE_160 15 | elif size<=224: path = URLs.IMAGEWOOF_320 if woof else URLs.IMAGENETTE_320 16 | else : path = URLs.IMAGEWOOF if woof else URLs.IMAGENETTE 17 | path = untar_data(path) 18 | 19 | n_gpus = num_distrib() or 1 20 | if workers is None: workers = min(8, num_cpus()//n_gpus) 21 | 22 | return (ImageList.from_folder(path).split_by_folder(valid='val') 23 | .label_from_folder().transform(([flip_lr(p=0.5)], []), size=size) 24 | .databunch(bs=bs, num_workers=workers) 25 | .presize(size, scale=(0.35,1)) 26 | .normalize(imagenet_stats)) 27 | 28 | @call_parse 29 | def main( 30 | gpu:Param("GPU to run on", str)=None, 31 | woof: Param("Use imagewoof (otherwise imagenette)", int)=0, 32 | lr: Param("Learning rate", float)=1e-3, 33 | size: Param("Size (px: 128,192,224)", int)=128, 34 | alpha: Param("Alpha", float)=0.99, 35 | mom: Param("Momentum", float)=0.9, 36 | eps: Param("epsilon", float)=1e-6, 37 | epochs: Param("Number of epochs", int)=5, 38 | bs: Param("Batch size", int)=256, 39 | mixup: Param("Mixup", float)=0., 40 | opt: Param("Optimizer (adam,rms,sgd)", str)='adam', 41 | arch: Param("Architecture (efficientnetB0)", str)='efficientnetB0', 42 | #sa: Param("Self-attention", int)=0, 43 | #sym: Param("Symmetry for self-attention", int)=0, 44 | dump: Param("Print model; don't train", int)=0, 45 | lrfinder: Param("Run learning rate finder; don't train", int)=0, 46 | wd: Param("weight decay", float)=1e-5, 47 | ): 48 | "Distributed training of Imagenette." 49 | 50 | 51 | bs_one_gpu = bs 52 | gpu = setup_distrib(gpu) 53 | if gpu is None: bs *= torch.cuda.device_count() 54 | if opt=='adam' : opt_func = partial(optim.Adam, betas=(mom,alpha), eps=eps) 55 | elif opt=='rms' : opt_func = partial(optim.RMSprop, alpha=alpha, eps=eps) 56 | elif opt=='sgd' : opt_func = partial(optim.SGD, momentum=mom) 57 | 58 | data = get_data(size, woof, bs) 59 | bs_rat = bs/bs_one_gpu #originally bs/256 60 | if gpu is not None: bs_rat *= num_distrib() 61 | if not gpu: print(f'lr: {lr}; eff_lr: {lr*bs_rat}; size: {size}; alpha: {alpha}; mom: {mom}; eps: {eps}') 62 | lr *= bs_rat 63 | 64 | m = globals()[arch] 65 | learn = (Learner(data, m(c_out=10), wd=wd, opt_func=opt_func, 66 | metrics=[accuracy,top_k_accuracy], 67 | bn_wd=False, true_wd=True, 68 | loss_func = LabelSmoothingCrossEntropy()) 69 | ) 70 | if dump: print(learn.model); sys.exit() 71 | if mixup: learn = learn.mixup(alpha=mixup) 72 | learn = learn.to_fp16(dynamic=True) 73 | if gpu is None: learn.to_parallel() 74 | elif num_distrib()>1: learn.to_distributed(gpu) # Requires `-m fastai.launch` 75 | 76 | if lrfinder: 77 | # run learning rate finder 78 | 79 | learn.lr_find(wd=wd) 80 | learn.recorder.plot() 81 | else: 82 | learn.fit_one_cycle(epochs, lr, div_factor=10, pct_start=0.3) 83 | --------------------------------------------------------------------------------