├── .gitignore
├── EfficientDet.ipynb
├── LICENSE
├── README.md
├── anchors.py
├── bifpn.py
├── coco_eval.py
├── csv_eval.py
├── dataloader.py
├── efficientdet.py
├── images
    ├── 1.jpg
    ├── 3.jpg
    ├── 4.jpg
    ├── 5.jpg
    ├── 6.jpg
    ├── 7.jpg
    └── 8.jpg
├── losses.py
├── oid_dataset.py
├── opt
    ├── nms_wrapper.py
    ├── soft_nms_cpu.pyx
    └── src
    │   ├── nms_cpu.cpp
    │   ├── nms_cuda.cpp
    │   ├── nms_kernel.cu
    │   └── soft_nms_cpu.pyx
├── retinanet.py
├── timeitdec.py
├── train.py
├── utils.py
└── visualize.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # dotenv
 82 | .env
 83 | 
 84 | # virtualenv
 85 | .venv
 86 | venv/
 87 | ENV/
 88 | 
 89 | # Spyder project settings
 90 | .spyderproject
 91 | .spyproject
 92 | 
 93 | # Rope project settings
 94 | .ropeproject
 95 | 
 96 | # mkdocs documentation
 97 | /site
 98 | 
 99 | # mypy
100 | .mypy_cache/
101 | 
102 | *.zip
103 | *.pt
104 | 


--------------------------------------------------------------------------------
/EfficientDet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# EfficientDet"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 49,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "CUDA available: True\n",
 20 |       "loading annotations into memory...\n",
 21 |       "Done (t=16.03s)\n",
 22 |       "creating index...\n",
 23 |       "index created!\n",
 24 |       "loading annotations into memory...\n",
 25 |       "Done (t=0.61s)\n",
 26 |       "creating index...\n",
 27 |       "index created!\n",
 28 |       "Loaded pretrained weights for efficientnet-b0\n",
 29 |       "DataParallel(\n",
 30 |       "  2.009 M, 100.000% Params, 3.742 GMac, 100.000% MACs, \n",
 31 |       "  (module): EfficientDet(\n",
 32 |       "    2.009 M, 100.000% Params, 3.742 GMac, 100.000% MACs, \n",
 33 |       "    (efficientnet): Sequential(\n",
 34 |       "      1.114 M, 55.476% Params, 0.065 GMac, 1.726% MACs, \n",
 35 |       "      (0): Conv2dStaticSamePadding(\n",
 36 |       "        0.001 M, 0.043% Params, 0.0 GMac, 0.000% MACs, 3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False\n",
 37 |       "        (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(0, 1, 0, 1), value=0.0)\n",
 38 |       "      )\n",
 39 |       "      (1): BatchNorm2d(0.0 M, 0.003% Params, 0.004 GMac, 0.112% MACs, 32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
 40 |       "      (2): MBConvBlock(\n",
 41 |       "        0.001 M, 0.072% Params, 0.006 GMac, 0.168% MACs, \n",
 42 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
 43 |       "          0.0 M, 0.014% Params, 0.0 GMac, 0.000% MACs, 32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False\n",
 44 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(1, 1, 1, 1), value=0.0)\n",
 45 |       "        )\n",
 46 |       "        (_bn1): BatchNorm2d(0.0 M, 0.003% Params, 0.004 GMac, 0.112% MACs, 32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
 47 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
 48 |       "          0.0 M, 0.013% Params, 0.0 GMac, 0.000% MACs, 32, 8, kernel_size=(1, 1), stride=(1, 1)\n",
 49 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 50 |       "        )\n",
 51 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
 52 |       "          0.0 M, 0.014% Params, 0.0 GMac, 0.000% MACs, 8, 32, kernel_size=(1, 1), stride=(1, 1)\n",
 53 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 54 |       "        )\n",
 55 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
 56 |       "          0.001 M, 0.025% Params, 0.0 GMac, 0.000% MACs, 32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
 57 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 58 |       "        )\n",
 59 |       "        (_bn2): BatchNorm2d(0.0 M, 0.002% Params, 0.002 GMac, 0.056% MACs, 16, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
 60 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 61 |       "      )\n",
 62 |       "      (3): MBConvBlock(\n",
 63 |       "        0.006 M, 0.299% Params, 0.017 GMac, 0.441% MACs, \n",
 64 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
 65 |       "          0.002 M, 0.076% Params, 0.0 GMac, 0.000% MACs, 16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
 66 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 67 |       "        )\n",
 68 |       "        (_bn0): BatchNorm2d(0.0 M, 0.010% Params, 0.013 GMac, 0.336% MACs, 96, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
 69 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
 70 |       "          0.001 M, 0.043% Params, 0.0 GMac, 0.000% MACs, 96, 96, kernel_size=(3, 3), stride=[2, 2], groups=96, bias=False\n",
 71 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(0, 1, 0, 1), value=0.0)\n",
 72 |       "        )\n",
 73 |       "        (_bn1): BatchNorm2d(0.0 M, 0.010% Params, 0.003 GMac, 0.084% MACs, 96, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
 74 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
 75 |       "          0.0 M, 0.019% Params, 0.0 GMac, 0.000% MACs, 96, 4, kernel_size=(1, 1), stride=(1, 1)\n",
 76 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 77 |       "        )\n",
 78 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
 79 |       "          0.0 M, 0.024% Params, 0.0 GMac, 0.000% MACs, 4, 96, kernel_size=(1, 1), stride=(1, 1)\n",
 80 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 81 |       "        )\n",
 82 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
 83 |       "          0.002 M, 0.115% Params, 0.0 GMac, 0.000% MACs, 96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
 84 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 85 |       "        )\n",
 86 |       "        (_bn2): BatchNorm2d(0.0 M, 0.002% Params, 0.001 GMac, 0.021% MACs, 24, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
 87 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 88 |       "      )\n",
 89 |       "      (4): MBConvBlock(\n",
 90 |       "        0.011 M, 0.533% Params, 0.01 GMac, 0.273% MACs, \n",
 91 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
 92 |       "          0.003 M, 0.172% Params, 0.0 GMac, 0.000% MACs, 24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
 93 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
 94 |       "        )\n",
 95 |       "        (_bn0): BatchNorm2d(0.0 M, 0.014% Params, 0.005 GMac, 0.126% MACs, 144, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
 96 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
 97 |       "          0.001 M, 0.065% Params, 0.0 GMac, 0.000% MACs, 144, 144, kernel_size=(3, 3), stride=(1, 1), groups=144, bias=False\n",
 98 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(1, 1, 1, 1), value=0.0)\n",
 99 |       "        )\n",
100 |       "        (_bn1): BatchNorm2d(0.0 M, 0.014% Params, 0.005 GMac, 0.126% MACs, 144, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
101 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
102 |       "          0.001 M, 0.043% Params, 0.0 GMac, 0.000% MACs, 144, 6, kernel_size=(1, 1), stride=(1, 1)\n",
103 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
104 |       "        )\n",
105 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
106 |       "          0.001 M, 0.050% Params, 0.0 GMac, 0.000% MACs, 6, 144, kernel_size=(1, 1), stride=(1, 1)\n",
107 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
108 |       "        )\n",
109 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
110 |       "          0.003 M, 0.172% Params, 0.0 GMac, 0.000% MACs, 144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
111 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
112 |       "        )\n",
113 |       "        (_bn2): BatchNorm2d(0.0 M, 0.002% Params, 0.001 GMac, 0.021% MACs, 24, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
114 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
115 |       "      )\n",
116 |       "      (5): MBConvBlock(\n",
117 |       "        0.015 M, 0.764% Params, 0.006 GMac, 0.166% MACs, \n",
118 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
119 |       "          0.003 M, 0.172% Params, 0.0 GMac, 0.000% MACs, 24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
120 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
121 |       "        )\n",
122 |       "        (_bn0): BatchNorm2d(0.0 M, 0.014% Params, 0.005 GMac, 0.126% MACs, 144, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
123 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
124 |       "          0.004 M, 0.179% Params, 0.0 GMac, 0.000% MACs, 144, 144, kernel_size=(5, 5), stride=[2, 2], groups=144, bias=False\n",
125 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(1, 2, 1, 2), value=0.0)\n",
126 |       "        )\n",
127 |       "        (_bn1): BatchNorm2d(0.0 M, 0.014% Params, 0.001 GMac, 0.032% MACs, 144, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
128 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
129 |       "          0.001 M, 0.043% Params, 0.0 GMac, 0.000% MACs, 144, 6, kernel_size=(1, 1), stride=(1, 1)\n",
130 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
131 |       "        )\n",
132 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
133 |       "          0.001 M, 0.050% Params, 0.0 GMac, 0.000% MACs, 6, 144, kernel_size=(1, 1), stride=(1, 1)\n",
134 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
135 |       "        )\n",
136 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
137 |       "          0.006 M, 0.287% Params, 0.0 GMac, 0.000% MACs, 144, 40, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
138 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
139 |       "        )\n",
140 |       "        (_bn2): BatchNorm2d(0.0 M, 0.004% Params, 0.0 GMac, 0.009% MACs, 40, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
141 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
142 |       "      )\n",
143 |       "      (6): MBConvBlock(\n",
144 |       "        0.031 M, 1.558% Params, 0.004 GMac, 0.114% MACs, \n",
145 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
146 |       "          0.01 M, 0.478% Params, 0.0 GMac, 0.000% MACs, 40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
147 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
148 |       "        )\n",
149 |       "        (_bn0): BatchNorm2d(0.0 M, 0.024% Params, 0.002 GMac, 0.053% MACs, 240, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
150 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
151 |       "          0.006 M, 0.299% Params, 0.0 GMac, 0.000% MACs, 240, 240, kernel_size=(5, 5), stride=(1, 1), groups=240, bias=False\n",
152 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(2, 2, 2, 2), value=0.0)\n",
153 |       "        )\n",
154 |       "        (_bn1): BatchNorm2d(0.0 M, 0.024% Params, 0.002 GMac, 0.053% MACs, 240, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
155 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
156 |       "          0.002 M, 0.120% Params, 0.0 GMac, 0.000% MACs, 240, 10, kernel_size=(1, 1), stride=(1, 1)\n",
157 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
158 |       "        )\n",
159 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
160 |       "          0.003 M, 0.131% Params, 0.0 GMac, 0.000% MACs, 10, 240, kernel_size=(1, 1), stride=(1, 1)\n",
161 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
162 |       "        )\n",
163 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
164 |       "          0.01 M, 0.478% Params, 0.0 GMac, 0.000% MACs, 240, 40, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
165 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
166 |       "        )\n",
167 |       "        (_bn2): BatchNorm2d(0.0 M, 0.004% Params, 0.0 GMac, 0.009% MACs, 40, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
168 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
169 |       "      )\n",
170 |       "      (7): MBConvBlock(\n",
171 |       "        0.037 M, 1.849% Params, 0.003 GMac, 0.070% MACs, \n",
172 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
173 |       "          0.01 M, 0.478% Params, 0.0 GMac, 0.000% MACs, 40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
174 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
175 |       "        )\n",
176 |       "        (_bn0): BatchNorm2d(0.0 M, 0.024% Params, 0.002 GMac, 0.053% MACs, 240, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
177 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
178 |       "          0.002 M, 0.108% Params, 0.0 GMac, 0.000% MACs, 240, 240, kernel_size=(3, 3), stride=[2, 2], groups=240, bias=False\n",
179 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(0, 1, 0, 1), value=0.0)\n",
180 |       "        )\n",
181 |       "        (_bn1): BatchNorm2d(0.0 M, 0.024% Params, 0.0 GMac, 0.013% MACs, 240, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
182 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
183 |       "          0.002 M, 0.120% Params, 0.0 GMac, 0.000% MACs, 240, 10, kernel_size=(1, 1), stride=(1, 1)\n",
184 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
185 |       "        )\n",
186 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
187 |       "          0.003 M, 0.131% Params, 0.0 GMac, 0.000% MACs, 10, 240, kernel_size=(1, 1), stride=(1, 1)\n",
188 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
189 |       "        )\n",
190 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
191 |       "          0.019 M, 0.956% Params, 0.0 GMac, 0.000% MACs, 240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
192 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
193 |       "        )\n",
194 |       "        (_bn2): BatchNorm2d(0.0 M, 0.008% Params, 0.0 GMac, 0.004% MACs, 80, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
195 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
196 |       "      )\n",
197 |       "      (8): MBConvBlock(\n",
198 |       "        0.103 M, 5.123% Params, 0.002 GMac, 0.057% MACs, \n",
199 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
200 |       "          0.038 M, 1.912% Params, 0.0 GMac, 0.000% MACs, 80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
201 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
202 |       "        )\n",
203 |       "        (_bn0): BatchNorm2d(0.001 M, 0.048% Params, 0.001 GMac, 0.026% MACs, 480, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
204 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
205 |       "          0.004 M, 0.215% Params, 0.0 GMac, 0.000% MACs, 480, 480, kernel_size=(3, 3), stride=(1, 1), groups=480, bias=False\n",
206 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(1, 1, 1, 1), value=0.0)\n",
207 |       "        )\n",
208 |       "        (_bn1): BatchNorm2d(0.001 M, 0.048% Params, 0.001 GMac, 0.026% MACs, 480, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
209 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
210 |       "          0.01 M, 0.479% Params, 0.0 GMac, 0.000% MACs, 480, 20, kernel_size=(1, 1), stride=(1, 1)\n",
211 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
212 |       "        )\n",
213 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
214 |       "          0.01 M, 0.502% Params, 0.0 GMac, 0.000% MACs, 20, 480, kernel_size=(1, 1), stride=(1, 1)\n",
215 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
216 |       "        )\n",
217 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
218 |       "          0.038 M, 1.912% Params, 0.0 GMac, 0.000% MACs, 480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
219 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
220 |       "        )\n",
221 |       "        (_bn2): BatchNorm2d(0.0 M, 0.008% Params, 0.0 GMac, 0.004% MACs, 80, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
222 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
223 |       "      )\n",
224 |       "      (9): MBConvBlock(\n",
225 |       "        0.103 M, 5.123% Params, 0.002 GMac, 0.057% MACs, \n",
226 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
227 |       "          0.038 M, 1.912% Params, 0.0 GMac, 0.000% MACs, 80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
228 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
229 |       "        )\n",
230 |       "        (_bn0): BatchNorm2d(0.001 M, 0.048% Params, 0.001 GMac, 0.026% MACs, 480, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
231 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
232 |       "          0.004 M, 0.215% Params, 0.0 GMac, 0.000% MACs, 480, 480, kernel_size=(3, 3), stride=(1, 1), groups=480, bias=False\n",
233 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(1, 1, 1, 1), value=0.0)\n",
234 |       "        )\n",
235 |       "        (_bn1): BatchNorm2d(0.001 M, 0.048% Params, 0.001 GMac, 0.026% MACs, 480, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
236 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
237 |       "          0.01 M, 0.479% Params, 0.0 GMac, 0.000% MACs, 480, 20, kernel_size=(1, 1), stride=(1, 1)\n",
238 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
239 |       "        )\n",
240 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
241 |       "          0.01 M, 0.502% Params, 0.0 GMac, 0.000% MACs, 20, 480, kernel_size=(1, 1), stride=(1, 1)\n",
242 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
243 |       "        )\n",
244 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
245 |       "          0.038 M, 1.912% Params, 0.0 GMac, 0.000% MACs, 480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
246 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
247 |       "        )\n",
248 |       "        (_bn2): BatchNorm2d(0.0 M, 0.008% Params, 0.0 GMac, 0.004% MACs, 80, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
249 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
250 |       "      )\n",
251 |       "      (10): MBConvBlock(\n",
252 |       "        0.126 M, 6.273% Params, 0.002 GMac, 0.059% MACs, \n",
253 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
254 |       "          0.038 M, 1.912% Params, 0.0 GMac, 0.000% MACs, 80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
255 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
256 |       "        )\n",
257 |       "        (_bn0): BatchNorm2d(0.001 M, 0.048% Params, 0.001 GMac, 0.026% MACs, 480, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
258 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
259 |       "          0.012 M, 0.597% Params, 0.0 GMac, 0.000% MACs, 480, 480, kernel_size=(5, 5), stride=[1, 1], groups=480, bias=False\n",
260 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(2, 2, 2, 2), value=0.0)\n",
261 |       "        )\n",
262 |       "        (_bn1): BatchNorm2d(0.001 M, 0.048% Params, 0.001 GMac, 0.026% MACs, 480, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
263 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
264 |       "          0.01 M, 0.479% Params, 0.0 GMac, 0.000% MACs, 480, 20, kernel_size=(1, 1), stride=(1, 1)\n",
265 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
266 |       "        )\n",
267 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
268 |       "          0.01 M, 0.502% Params, 0.0 GMac, 0.000% MACs, 20, 480, kernel_size=(1, 1), stride=(1, 1)\n",
269 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
270 |       "        )\n",
271 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
272 |       "          0.054 M, 2.676% Params, 0.0 GMac, 0.000% MACs, 480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
273 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
274 |       "        )\n",
275 |       "        (_bn2): BatchNorm2d(0.0 M, 0.011% Params, 0.0 GMac, 0.006% MACs, 112, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
276 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
277 |       "      )\n",
278 |       "      (11): MBConvBlock(\n",
279 |       "        0.209 M, 10.384% Params, 0.003 GMac, 0.080% MACs, \n",
280 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
281 |       "          0.075 M, 3.747% Params, 0.0 GMac, 0.000% MACs, 112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
282 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
283 |       "        )\n",
284 |       "        (_bn0): BatchNorm2d(0.001 M, 0.067% Params, 0.001 GMac, 0.037% MACs, 672, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
285 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
286 |       "          0.017 M, 0.836% Params, 0.0 GMac, 0.000% MACs, 672, 672, kernel_size=(5, 5), stride=(1, 1), groups=672, bias=False\n",
287 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(2, 2, 2, 2), value=0.0)\n",
288 |       "        )\n",
289 |       "        (_bn1): BatchNorm2d(0.001 M, 0.067% Params, 0.001 GMac, 0.037% MACs, 672, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
290 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
291 |       "          0.019 M, 0.938% Params, 0.0 GMac, 0.000% MACs, 672, 28, kernel_size=(1, 1), stride=(1, 1)\n",
292 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
293 |       "        )\n",
294 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
295 |       "          0.019 M, 0.970% Params, 0.0 GMac, 0.000% MACs, 28, 672, kernel_size=(1, 1), stride=(1, 1)\n",
296 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
297 |       "        )\n",
298 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
299 |       "          0.075 M, 3.747% Params, 0.0 GMac, 0.000% MACs, 672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
300 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
301 |       "        )\n",
302 |       "        (_bn2): BatchNorm2d(0.0 M, 0.011% Params, 0.0 GMac, 0.006% MACs, 112, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
303 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
304 |       "      )\n",
305 |       "      (12): MBConvBlock(\n",
306 |       "        0.209 M, 10.384% Params, 0.003 GMac, 0.080% MACs, \n",
307 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
308 |       "          0.075 M, 3.747% Params, 0.0 GMac, 0.000% MACs, 112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
309 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
310 |       "        )\n",
311 |       "        (_bn0): BatchNorm2d(0.001 M, 0.067% Params, 0.001 GMac, 0.037% MACs, 672, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
312 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
313 |       "          0.017 M, 0.836% Params, 0.0 GMac, 0.000% MACs, 672, 672, kernel_size=(5, 5), stride=(1, 1), groups=672, bias=False\n",
314 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(2, 2, 2, 2), value=0.0)\n",
315 |       "        )\n",
316 |       "        (_bn1): BatchNorm2d(0.001 M, 0.067% Params, 0.001 GMac, 0.037% MACs, 672, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
317 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
318 |       "          0.019 M, 0.938% Params, 0.0 GMac, 0.000% MACs, 672, 28, kernel_size=(1, 1), stride=(1, 1)\n",
319 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
320 |       "        )\n",
321 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
322 |       "          0.019 M, 0.970% Params, 0.0 GMac, 0.000% MACs, 28, 672, kernel_size=(1, 1), stride=(1, 1)\n",
323 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
324 |       "        )\n",
325 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
326 |       "          0.075 M, 3.747% Params, 0.0 GMac, 0.000% MACs, 672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
327 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
328 |       "        )\n",
329 |       "        (_bn2): BatchNorm2d(0.0 M, 0.011% Params, 0.0 GMac, 0.006% MACs, 112, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
330 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
331 |       "      )\n",
332 |       "      (13): MBConvBlock(\n",
333 |       "        0.262 M, 13.068% Params, 0.002 GMac, 0.049% MACs, \n",
334 |       "        (_expand_conv): Conv2dStaticSamePadding(\n",
335 |       "          0.075 M, 3.747% Params, 0.0 GMac, 0.000% MACs, 112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
336 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
337 |       "        )\n",
338 |       "        (_bn0): BatchNorm2d(0.001 M, 0.067% Params, 0.001 GMac, 0.037% MACs, 672, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
339 |       "        (_depthwise_conv): Conv2dStaticSamePadding(\n",
340 |       "          0.017 M, 0.836% Params, 0.0 GMac, 0.000% MACs, 672, 672, kernel_size=(5, 5), stride=[2, 2], groups=672, bias=False\n",
341 |       "          (static_padding): ZeroPad2d(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, padding=(1, 2, 1, 2), value=0.0)\n",
342 |       "        )\n",
343 |       "        (_bn1): BatchNorm2d(0.001 M, 0.067% Params, 0.0 GMac, 0.009% MACs, 672, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
344 |       "        (_se_reduce): Conv2dStaticSamePadding(\n",
345 |       "          0.019 M, 0.938% Params, 0.0 GMac, 0.000% MACs, 672, 28, kernel_size=(1, 1), stride=(1, 1)\n",
346 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
347 |       "        )\n",
348 |       "        (_se_expand): Conv2dStaticSamePadding(\n",
349 |       "          0.019 M, 0.970% Params, 0.0 GMac, 0.000% MACs, 28, 672, kernel_size=(1, 1), stride=(1, 1)\n",
350 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
351 |       "        )\n",
352 |       "        (_project_conv): Conv2dStaticSamePadding(\n",
353 |       "          0.129 M, 6.424% Params, 0.0 GMac, 0.000% MACs, 672, 192, kernel_size=(1, 1), stride=(1, 1), bias=False\n",
354 |       "          (static_padding): Identity(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
355 |       "        )\n",
356 |       "        (_bn2): BatchNorm2d(0.0 M, 0.019% Params, 0.0 GMac, 0.003% MACs, 192, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)\n",
357 |       "        (_swish): MemoryEfficientSwish(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
358 |       "      )\n",
359 |       "    )\n",
360 |       "    (fpn): BiFPN(\n",
361 |       "      0.237 M, 11.775% Params, 0.086 GMac, 2.301% MACs, \n",
362 |       "      (p3): Conv2d(0.003 M, 0.131% Params, 0.011 GMac, 0.287% MACs, 40, 64, kernel_size=(1, 1), stride=(1, 1))\n",
363 |       "      (p4): Conv2d(0.005 M, 0.258% Params, 0.005 GMac, 0.142% MACs, 80, 64, kernel_size=(1, 1), stride=(1, 1))\n",
364 |       "      (p5): Conv2d(0.012 M, 0.615% Params, 0.003 GMac, 0.085% MACs, 192, 64, kernel_size=(1, 1), stride=(1, 1))\n",
365 |       "      (p6): Conv2d(0.111 M, 5.509% Params, 0.007 GMac, 0.189% MACs, 192, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
366 |       "      (p7): ConvBlock(\n",
367 |       "        0.037 M, 1.845% Params, 0.001 GMac, 0.016% MACs, \n",
368 |       "        (conv): Conv2d(0.037 M, 1.838% Params, 0.001 GMac, 0.016% MACs, 64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
369 |       "        (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.000% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
370 |       "        (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
371 |       "      )\n",
372 |       "      (bifpn): Sequential(\n",
373 |       "        0.069 M, 3.418% Params, 0.059 GMac, 1.582% MACs, \n",
374 |       "        (0): BiFPNBlock(\n",
375 |       "          0.034 M, 1.709% Params, 0.03 GMac, 0.791% MACs, \n",
376 |       "          (p3_td): DepthwiseConvBlock(\n",
377 |       "            0.004 M, 0.213% Params, 0.018 GMac, 0.476% MACs, \n",
378 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.007% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
379 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.017 GMac, 0.448% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
380 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.001 GMac, 0.014% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
381 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.007% MACs, )\n",
382 |       "          )\n",
383 |       "          (p4_td): DepthwiseConvBlock(\n",
384 |       "            0.004 M, 0.213% Params, 0.004 GMac, 0.119% MACs, \n",
385 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.002% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
386 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.004 GMac, 0.112% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
387 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.004% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
388 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.002% MACs, )\n",
389 |       "          )\n",
390 |       "          (p5_td): DepthwiseConvBlock(\n",
391 |       "            0.004 M, 0.213% Params, 0.001 GMac, 0.030% MACs, \n",
392 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
393 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.001 GMac, 0.028% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
394 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.001% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
395 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
396 |       "          )\n",
397 |       "          (p6_td): DepthwiseConvBlock(\n",
398 |       "            0.004 M, 0.213% Params, 0.0 GMac, 0.007% MACs, \n",
399 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
400 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.0 GMac, 0.007% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
401 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.000% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
402 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
403 |       "          )\n",
404 |       "          (p4_out): DepthwiseConvBlock(\n",
405 |       "            0.004 M, 0.213% Params, 0.004 GMac, 0.119% MACs, \n",
406 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.002% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
407 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.004 GMac, 0.112% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
408 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.004% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
409 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.002% MACs, )\n",
410 |       "          )\n",
411 |       "          (p5_out): DepthwiseConvBlock(\n",
412 |       "            0.004 M, 0.213% Params, 0.001 GMac, 0.030% MACs, \n",
413 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
414 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.001 GMac, 0.028% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
415 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.001% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
416 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
417 |       "          )\n",
418 |       "          (p6_out): DepthwiseConvBlock(\n",
419 |       "            0.004 M, 0.213% Params, 0.0 GMac, 0.007% MACs, \n",
420 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
421 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.0 GMac, 0.007% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
422 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.000% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
423 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
424 |       "          )\n",
425 |       "          (p7_out): DepthwiseConvBlock(\n",
426 |       "            0.004 M, 0.213% Params, 0.0 GMac, 0.002% MACs, \n",
427 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
428 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.0 GMac, 0.002% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
429 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.000% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
430 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
431 |       "          )\n",
432 |       "          (w1_relu): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
433 |       "          (w2_relu): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
434 |       "        )\n",
435 |       "        (1): BiFPNBlock(\n",
436 |       "          0.034 M, 1.709% Params, 0.03 GMac, 0.791% MACs, \n",
437 |       "          (p3_td): DepthwiseConvBlock(\n",
438 |       "            0.004 M, 0.213% Params, 0.018 GMac, 0.476% MACs, \n",
439 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.007% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
440 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.017 GMac, 0.448% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
441 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.001 GMac, 0.014% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
442 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.007% MACs, )\n",
443 |       "          )\n",
444 |       "          (p4_td): DepthwiseConvBlock(\n",
445 |       "            0.004 M, 0.213% Params, 0.004 GMac, 0.119% MACs, \n",
446 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.002% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
447 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.004 GMac, 0.112% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
448 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.004% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
449 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.002% MACs, )\n",
450 |       "          )\n",
451 |       "          (p5_td): DepthwiseConvBlock(\n",
452 |       "            0.004 M, 0.213% Params, 0.001 GMac, 0.030% MACs, \n",
453 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
454 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.001 GMac, 0.028% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
455 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.001% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
456 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
457 |       "          )\n",
458 |       "          (p6_td): DepthwiseConvBlock(\n",
459 |       "            0.004 M, 0.213% Params, 0.0 GMac, 0.007% MACs, \n",
460 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
461 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.0 GMac, 0.007% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
462 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.000% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
463 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
464 |       "          )\n",
465 |       "          (p4_out): DepthwiseConvBlock(\n",
466 |       "            0.004 M, 0.213% Params, 0.004 GMac, 0.119% MACs, \n",
467 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.002% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
468 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.004 GMac, 0.112% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
469 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.004% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
470 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.002% MACs, )\n",
471 |       "          )\n",
472 |       "          (p5_out): DepthwiseConvBlock(\n",
473 |       "            0.004 M, 0.213% Params, 0.001 GMac, 0.030% MACs, \n",
474 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
475 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.001 GMac, 0.028% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
476 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.001% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
477 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
478 |       "          )\n",
479 |       "          (p6_out): DepthwiseConvBlock(\n",
480 |       "            0.004 M, 0.213% Params, 0.0 GMac, 0.007% MACs, \n",
481 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
482 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.0 GMac, 0.007% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
483 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.000% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
484 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
485 |       "          )\n",
486 |       "          (p7_out): DepthwiseConvBlock(\n",
487 |       "            0.004 M, 0.213% Params, 0.0 GMac, 0.002% MACs, \n",
488 |       "            (depthwise): Conv2d(0.0 M, 0.003% Params, 0.0 GMac, 0.000% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), groups=64, bias=False)\n",
489 |       "            (pointwise): Conv2d(0.004 M, 0.204% Params, 0.0 GMac, 0.002% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
490 |       "            (bn): BatchNorm2d(0.0 M, 0.006% Params, 0.0 GMac, 0.000% MACs, 64, eps=4e-05, momentum=0.9997, affine=True, track_running_stats=True)\n",
491 |       "            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
492 |       "          )\n",
493 |       "          (w1_relu): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
494 |       "          (w2_relu): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
495 |       "        )\n",
496 |       "      )\n",
497 |       "    )\n",
498 |       "    (regressionModel): RegressionModel(\n",
499 |       "      0.132 M, 6.550% Params, 0.719 GMac, 19.212% MACs, \n",
500 |       "      (prediction_net): Sequential(\n",
501 |       "        0.111 M, 5.515% Params, 0.605 GMac, 16.183% MACs, \n",
502 |       "        (0): Conv2d(0.037 M, 1.838% Params, 0.201 GMac, 5.385% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
503 |       "        (1): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.009% MACs, )\n",
504 |       "        (2): Conv2d(0.037 M, 1.838% Params, 0.201 GMac, 5.385% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
505 |       "        (3): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.009% MACs, )\n",
506 |       "        (4): Conv2d(0.037 M, 1.838% Params, 0.201 GMac, 5.385% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
507 |       "        (5): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.009% MACs, )\n",
508 |       "      )\n",
509 |       "      (output): Conv2d(0.021 M, 1.034% Params, 0.113 GMac, 3.029% MACs, 64, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
510 |       "    )\n",
511 |       "    (classificationModel): ClassificationModel(\n",
512 |       "      0.526 M, 26.199% Params, 2.872 GMac, 76.762% MACs, \n",
513 |       "      (classification_net): Sequential(\n",
514 |       "        0.111 M, 5.515% Params, 0.605 GMac, 16.183% MACs, \n",
515 |       "        (0): Conv2d(0.037 M, 1.838% Params, 0.201 GMac, 5.385% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
516 |       "        (1): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.009% MACs, )\n",
517 |       "        (2): Conv2d(0.037 M, 1.838% Params, 0.201 GMac, 5.385% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
518 |       "        (3): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.009% MACs, )\n",
519 |       "        (4): Conv2d(0.037 M, 1.838% Params, 0.201 GMac, 5.385% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
520 |       "        (5): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.009% MACs, )\n",
521 |       "      )\n",
522 |       "      (output): Conv2d(0.415 M, 20.683% Params, 2.267 GMac, 60.580% MACs, 64, 720, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
523 |       "      (output_act): Sigmoid(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
524 |       "    )\n",
525 |       "    (anchors): Anchors(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
526 |       "    (regressBoxes): BBoxTransform(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
527 |       "    (clipBoxes): ClipBoxes(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
528 |       "    (focalLoss): FocalLoss(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, )\n",
529 |       "  )\n",
530 |       ")\n",
531 |       "Computational complexity:       3.74 GMac\n",
532 |       "Number of parameters:           2.01 M  \n",
533 |       "Num training images: 118287\n",
534 |       "  0%|                                                  | 0/3697 [00:00<?, ?it/s]'forward'  1855.45 ms\n",
535 |       "9.33G | 1.48364 | 1.03141 | 2.51504:   0%|   | 1/3697 [00:06<7:03:24,  6.87s/it]'forward'  1919.42 ms\n",
536 |       "9.33G | 3.36920 | 1.00036 | 3.44230:   0%|   | 2/3697 [00:09<5:48:22,  5.66s/it]'forward'  1673.92 ms\n",
537 |       "9.33G | 1.53904 | 1.04889 | 3.15751:   0%|   | 3/3697 [00:12<4:54:30,  4.78s/it]^C\n",
538 |       "Traceback (most recent call last):\n",
539 |       "  File \"train.py\", line 202, in <module>\n",
540 |       "    main()\n",
541 |       "  File \"train.py\", line 153, in main\n",
542 |       "    classification_loss, regression_loss = model([data['img'].cuda().float(), data['annot']])\n",
543 |       "  File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 541, in __call__\n",
544 |       "    result = self.forward(*input, **kwargs)\n",
545 |       "  File \"/usr/local/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py\", line 150, in forward\n",
546 |       "    return self.module(*inputs[0], **kwargs[0])\n",
547 |       "  File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 541, in __call__\n",
548 |       "    result = self.forward(*input, **kwargs)\n",
549 |       "  File \"/mnt/synology/pelvis/projects/tristan/Repositories/Efficientdet-PT/efficientdet.py\", line 174, in forward\n",
550 |       "    return self.focalLoss(classification, regression, anchors, annotations)\n",
551 |       "  File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 541, in __call__\n",
552 |       "    result = self.forward(*input, **kwargs)\n",
553 |       "  File \"/mnt/synology/pelvis/projects/tristan/Repositories/Efficientdet-PT/timeitdec.py\", line 6, in timed\n",
554 |       "    result = method(*args, **kw)\n",
555 |       "  File \"/mnt/synology/pelvis/projects/tristan/Repositories/Efficientdet-PT/losses.py\", line 87, in forward\n",
556 |       "    alpha_factor = torch.ones(targets.shape).cuda() * alpha\n",
557 |       "KeyboardInterrupt\n"
558 |      ]
559 |     }
560 |    ],
561 |    "source": [
562 |     "!python3.7 train.py --dataset coco --coco_path ../../Datasets/COCO2017 --efficientdet --batch-size 32 --scaling-compound 0 --print-model-complexity"
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "code",
567 |    "execution_count": 12,
568 |    "metadata": {},
569 |    "outputs": [
570 |     {
571 |      "name": "stdout",
572 |      "output_type": "stream",
573 |      "text": [
574 |       "Requirement already satisfied: cython in /home/user/.local/lib/python3.7/site-packages (0.29.14)\n",
575 |       "\u001b[33mYou are using pip version 18.1, however version 19.3.1 is available.\n",
576 |       "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
577 |       "Requirement already satisfied: pycocotools in /home/user/.local/lib/python3.7/site-packages (2.0.0)\n",
578 |       "\u001b[33mYou are using pip version 18.1, however version 19.3.1 is available.\n",
579 |       "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
580 |       "Requirement already satisfied: efficientnet_pytorch in /home/user/.local/lib/python3.7/site-packages (0.5.1)\n",
581 |       "Requirement already satisfied: torch in /usr/local/lib/python3.7/site-packages (from efficientnet_pytorch) (1.3.0)\n",
582 |       "Requirement already satisfied: numpy in /usr/local/lib/python3.7/site-packages (from torch->efficientnet_pytorch) (1.15.3)\n",
583 |       "\u001b[33mYou are using pip version 18.1, however version 19.3.1 is available.\n",
584 |       "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
585 |       "Requirement already satisfied: pthflops in /home/user/.local/lib/python3.7/site-packages (0.2.1)\n",
586 |       "Requirement already satisfied: torch in /usr/local/lib/python3.7/site-packages (from pthflops) (1.3.0)\n",
587 |       "Requirement already satisfied: numpy in /usr/local/lib/python3.7/site-packages (from torch->pthflops) (1.15.3)\n",
588 |       "\u001b[33mYou are using pip version 18.1, however version 19.3.1 is available.\n",
589 |       "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
590 |      ]
591 |     }
592 |    ],
593 |    "source": [
594 |     "!pip3 install cython --user\n",
595 |     "!pip3 install pycocotools --user\n",
596 |     "!pip3 install efficientnet_pytorch --user\n",
597 |     "!pip3 install pthflops --user\n"
598 |    ]
599 |   },
600 |   {
601 |    "cell_type": "code",
602 |    "execution_count": null,
603 |    "metadata": {},
604 |    "outputs": [],
605 |    "source": []
606 |   }
607 |  ],
608 |  "metadata": {
609 |   "kernelspec": {
610 |    "display_name": "Python 3",
611 |    "language": "python",
612 |    "name": "python3"
613 |   },
614 |   "language_info": {
615 |    "codemirror_mode": {
616 |     "name": "ipython",
617 |     "version": 3
618 |    },
619 |    "file_extension": ".py",
620 |    "mimetype": "text/x-python",
621 |    "name": "python",
622 |    "nbconvert_exporter": "python",
623 |    "pygments_lexer": "ipython3",
624 |    "version": "3.7.1"
625 |   }
626 |  },
627 |  "nbformat": 4,
628 |  "nbformat_minor": 4
629 | }
630 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch EfficientDet
 2 | Here we implement [EfficientDet](https://arxiv.org/abs/1911.09070). The code is based on a RetinaNet implementation by [yhenon/pytorch-retinanet](https://github.com/yhenon/pytorch-retinanet). We use the EfficientNet backend by [rwightman/gen-efficientnet-pytorch](https://github.com/rwightman/gen-efficientnet-pytorch).
 3 | 
 4 | ## Current status
 5 | Current implementation is able to run. I'll update this document as soon as I have some preliminary results. The paper by Tan et al. gives a few more details, which we would like to implement and report on:
 6 | * Use exponential moving average with decay 0.9998.
 7 | * Initialize convolution layers
 8 | * Train model using using SGD optimizer with momentum 0.9 and weight decay 4e-5.
 9 | * Implement described learning rate, which is first linearly increased from 0 to 0.08 in the initial 5% warm-up training steps and then annealed down using cosine decay rule. 
10 | * Report performance.
11 | 
12 | If you have other issues that need my attention, feel free to make a pull request or leave an [issue](https://github.com/tristandb/EfficientDet-PyTorch/issues). 
13 | 
14 | ## Results
15 | 
16 | Model | mAP | #Params | #FLOPS
17 | 
18 | 
19 | ## Installation
20 | 
21 | 1) Clone this repo
22 | 
23 | 2) Install the required packages:
24 | 
25 | ```
26 | apt-get install tk-dev python-tk
27 | ```
28 | 
29 | 3) Install the python packages:
30 | 	
31 | ```
32 | 
33 | pip install pandas
34 | 
35 | pip install pycocotools
36 | 
37 | pip install cython
38 | 
39 | pip install opencv-python
40 | 
41 | pip install requests
42 | 
43 | pip install efficientnet_pytorch
44 | 
45 | ```
46 | 
47 | Note that you may have to edit line 14 of `build.sh` if you want to change which version of python you are building the extension for.
48 | 
49 | ## Training
50 | 
51 | The network can be trained using the `train.py` script. Currently, two dataloaders are available: COCO and CSV. For training on coco, use
52 | 
53 | ```
54 | python3 train.py --efficientnet --dataset coco --coco_path ../../Datasets/COCO2017 --scaling-compound 0 --batch-size 8
55 | ```
56 | 
57 | For training using a custom dataset, with annotations in CSV format (see below), use
58 | 
59 | ```
60 | python train.py --dataset csv --csv_train <path/to/train_annots.csv>  --csv_classes <path/to/train/class_list.csv>  --csv_val <path/to/val_annots.csv>
61 | ```
62 | 
63 | Note that the --csv_val argument is optional, in which case no validation will be performed.
64 | 
65 | ## Acknowledgements
66 | - The code is based on a RetinaNet implementation by [yhenon/pytorch-retinanet](https://github.com/yhenon/pytorch-retinanet). 
67 |     - Significant amounts of code are borrowed from the [keras retinanet implementation](https://github.com/fizyr/keras-retinanet)
68 |     - The NMS module used is from the [pytorch faster-rcnn implementation](https://github.com/ruotianluo/pytorch-faster-rcnn)
69 | - We use the EfficientNet backend by [rwightman/gen-efficientnet-pytorch](https://github.com/rwightman/gen-efficientnet-pytorch).


--------------------------------------------------------------------------------
/anchors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | class Anchors(nn.Module):
  7 |     def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
  8 |         super(Anchors, self).__init__()
  9 | 
 10 |         if pyramid_levels is None:
 11 |             self.pyramid_levels = [3, 4, 5, 6, 7]
 12 |         if strides is None:
 13 |             self.strides = [2 ** x for x in self.pyramid_levels]
 14 |         if sizes is None:
 15 |             self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
 16 |         if ratios is None:
 17 |             self.ratios = np.array([0.5, 1, 2])
 18 |         if scales is None:
 19 |             self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 20 | 
 21 |     def forward(self, image):
 22 |         
 23 |         image_shape = image.shape[2:]
 24 |         image_shape = np.array(image_shape)
 25 |         image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
 26 | 
 27 |         # compute anchors over all pyramid levels
 28 |         all_anchors = np.zeros((0, 4)).astype(np.float32)
 29 | 
 30 |         for idx, p in enumerate(self.pyramid_levels):
 31 |             anchors         = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
 32 |             shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
 33 |             all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)
 34 | 
 35 |         all_anchors = np.expand_dims(all_anchors, axis=0)
 36 | 
 37 |         return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
 38 | 
 39 | def generate_anchors(base_size=16, ratios=None, scales=None):
 40 |     """
 41 |     Generate anchor (reference) windows by enumerating aspect ratios X
 42 |     scales w.r.t. a reference window.
 43 |     """
 44 | 
 45 |     if ratios is None:
 46 |         ratios = np.array([0.5, 1, 2])
 47 | 
 48 |     if scales is None:
 49 |         scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 50 | 
 51 |     num_anchors = len(ratios) * len(scales)
 52 | 
 53 |     # initialize output anchors
 54 |     anchors = np.zeros((num_anchors, 4))
 55 | 
 56 |     # scale base_size
 57 |     anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
 58 | 
 59 |     # compute areas of anchors
 60 |     areas = anchors[:, 2] * anchors[:, 3]
 61 | 
 62 |     # correct for ratios
 63 |     anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
 64 |     anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
 65 | 
 66 |     # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
 67 |     anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
 68 |     anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
 69 | 
 70 |     return anchors
 71 | 
 72 | def compute_shape(image_shape, pyramid_levels):
 73 |     """Compute shapes based on pyramid levels.
 74 | 
 75 |     :param image_shape:
 76 |     :param pyramid_levels:
 77 |     :return:
 78 |     """
 79 |     image_shape = np.array(image_shape[:2])
 80 |     image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
 81 |     return image_shapes
 82 | 
 83 | 
 84 | def anchors_for_shape(
 85 |     image_shape,
 86 |     pyramid_levels=None,
 87 |     ratios=None,
 88 |     scales=None,
 89 |     strides=None,
 90 |     sizes=None,
 91 |     shapes_callback=None,
 92 | ):
 93 | 
 94 |     image_shapes = compute_shape(image_shape, pyramid_levels)
 95 | 
 96 |     # compute anchors over all pyramid levels
 97 |     all_anchors = np.zeros((0, 4))
 98 |     for idx, p in enumerate(pyramid_levels):
 99 |         anchors         = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
100 |         shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
101 |         all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)
102 | 
103 |     return all_anchors
104 | 
105 | 
106 | def shift(shape, stride, anchors):
107 |     shift_x = (np.arange(0, shape[1]) + 0.5) * stride
108 |     shift_y = (np.arange(0, shape[0]) + 0.5) * stride
109 | 
110 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
111 | 
112 |     shifts = np.vstack((
113 |         shift_x.ravel(), shift_y.ravel(),
114 |         shift_x.ravel(), shift_y.ravel()
115 |     )).transpose()
116 | 
117 |     # add A anchors (1, A, 4) to
118 |     # cell K shifts (K, 1, 4) to get
119 |     # shift anchors (K, A, 4)
120 |     # reshape to (K*A, 4) shifted anchors
121 |     A = anchors.shape[0]
122 |     K = shifts.shape[0]
123 |     all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
124 |     all_anchors = all_anchors.reshape((K * A, 4))
125 | 
126 |     return all_anchors
127 | 
128 | 


--------------------------------------------------------------------------------
/bifpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from torch.autograd import Variable
  6 | 
  7 | class DepthwiseConvBlock(nn.Module):
  8 |     """
  9 |     Depthwise seperable convolution. 
 10 |     
 11 |     
 12 |     """
 13 |     def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, freeze_bn=False):
 14 |         super(DepthwiseConvBlock,self).__init__()
 15 |         self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size, stride, 
 16 |                                padding, dilation, groups=in_channels, bias=False)
 17 |         self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, 
 18 |                                    stride=1, padding=0, dilation=1, groups=1, bias=False)
 19 |         
 20 |         
 21 |         self.bn = nn.BatchNorm2d(out_channels, momentum=0.9997, eps=4e-5)
 22 |         self.act = nn.ReLU()
 23 |         
 24 |     def forward(self, inputs):
 25 |         x = self.depthwise(inputs)
 26 |         x = self.pointwise(x)
 27 |         x = self.bn(x)
 28 |         return self.act(x)
 29 |     
 30 | class ConvBlock(nn.Module):
 31 |     """
 32 |     Convolution block with Batch Normalization and ReLU activation.
 33 |     
 34 |     """
 35 |     def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, freeze_bn=False):
 36 |         super(ConvBlock,self).__init__()
 37 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
 38 |         self.bn = nn.BatchNorm2d(out_channels, momentum=0.9997, eps=4e-5)
 39 |         self.act = nn.ReLU()
 40 | 
 41 |     def forward(self, inputs):
 42 |         x = self.conv(inputs)
 43 |         x = self.bn(x)
 44 |         return self.act(x)
 45 | 
 46 | class BiFPNBlock(nn.Module):
 47 |     """
 48 |     Bi-directional Feature Pyramid Network
 49 |     """
 50 |     def __init__(self, feature_size=64, epsilon=0.0001):
 51 |         super(BiFPNBlock, self).__init__()
 52 |         self.epsilon = epsilon
 53 |         
 54 |         self.p3_td = DepthwiseConvBlock(feature_size, feature_size)
 55 |         self.p4_td = DepthwiseConvBlock(feature_size, feature_size)
 56 |         self.p5_td = DepthwiseConvBlock(feature_size, feature_size)
 57 |         self.p6_td = DepthwiseConvBlock(feature_size, feature_size)
 58 |         
 59 |         self.p4_out = DepthwiseConvBlock(feature_size, feature_size)
 60 |         self.p5_out = DepthwiseConvBlock(feature_size, feature_size)
 61 |         self.p6_out = DepthwiseConvBlock(feature_size, feature_size)
 62 |         self.p7_out = DepthwiseConvBlock(feature_size, feature_size)
 63 |         
 64 |         # TODO: Init weights
 65 |         self.w1 = nn.Parameter(torch.Tensor(2, 4))
 66 |         self.w1_relu = nn.ReLU()
 67 |         self.w2 = nn.Parameter(torch.Tensor(3, 4))
 68 |         self.w2_relu = nn.ReLU()
 69 |     
 70 |     def forward(self, inputs):
 71 |         p3_x, p4_x, p5_x, p6_x, p7_x = inputs
 72 |         
 73 |         # Calculate Top-Down Pathway
 74 |         w1 = self.w1_relu(self.w1)
 75 |         w1 /= torch.sum(w1, dim=0) + self.epsilon
 76 |         w2 = self.w2_relu(self.w2)
 77 |         w2 /= torch.sum(w2, dim=0) + self.epsilon
 78 |         
 79 |         p7_td = p7_x
 80 |         p6_td = self.p6_td(w1[0, 0] * p6_x + w1[1, 0] * F.interpolate(p7_td, scale_factor=2))        
 81 |         p5_td = self.p5_td(w1[0, 1] * p5_x + w1[1, 1] * F.interpolate(p6_td, scale_factor=2))
 82 |         p4_td = self.p4_td(w1[0, 2] * p4_x + w1[1, 2] * F.interpolate(p5_td, scale_factor=2))
 83 |         p3_td = self.p3_td(w1[0, 3] * p3_x + w1[1, 3] * F.interpolate(p4_td, scale_factor=2))
 84 |         
 85 |         # Calculate Bottom-Up Pathway
 86 |         p3_out = p3_td
 87 |         p4_out = self.p4_out(w2[0, 0] * p4_x + w2[1, 0] * p4_td + w2[2, 0] * nn.Upsample(scale_factor=0.5)(p3_out))
 88 |         p5_out = self.p5_out(w2[0, 1] * p5_x + w2[1, 1] * p5_td + w2[2, 1] * nn.Upsample(scale_factor=0.5)(p4_out))
 89 |         p6_out = self.p6_out(w2[0, 2] * p6_x + w2[1, 2] * p6_td + w2[2, 2] * nn.Upsample(scale_factor=0.5)(p5_out))
 90 |         p7_out = self.p7_out(w2[0, 3] * p7_x + w2[1, 3] * p7_td + w2[2, 3] * nn.Upsample(scale_factor=0.5)(p6_out))
 91 | 
 92 |         return [p3_out, p4_out, p5_out, p6_out, p7_out]
 93 |     
 94 | class BiFPN(nn.Module):
 95 |     def __init__(self, size, feature_size=64, num_layers=2, epsilon=0.0001):
 96 |         super(BiFPN, self).__init__()
 97 |         self.p3 = nn.Conv2d(size[0], feature_size, kernel_size=1, stride=1, padding=0)
 98 |         self.p4 = nn.Conv2d(size[1], feature_size, kernel_size=1, stride=1, padding=0)
 99 |         self.p5 = nn.Conv2d(size[2], feature_size, kernel_size=1, stride=1, padding=0)
100 |         
101 |         # p6 is obtained via a 3x3 stride-2 conv on C5
102 |         self.p6 = nn.Conv2d(size[2], feature_size, kernel_size=3, stride=2, padding=1)
103 |         
104 |         # p7 is computed by applying ReLU followed by a 3x3 stride-2 conv on p6
105 |         self.p7 = ConvBlock(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
106 | 
107 |         bifpns = []
108 |         for _ in range(num_layers):
109 |             bifpns.append(BiFPNBlock(feature_size))
110 |         self.bifpn = nn.Sequential(*bifpns)
111 |     
112 |     def forward(self, inputs):
113 |         c3, c4, c5 = inputs
114 |         
115 |         # Calculate the input column of BiFPN
116 |         p3_x = self.p3(c3)        
117 |         p4_x = self.p4(c4)
118 |         p5_x = self.p5(c5)
119 |         p6_x = self.p6(c5)
120 |         p7_x = self.p7(p6_x)
121 |         
122 |         features = [p3_x, p4_x, p5_x, p6_x, p7_x]
123 |         return self.bifpn(features)
124 | 


--------------------------------------------------------------------------------
/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | from pycocotools.coco import COCO
 4 | from pycocotools.cocoeval import COCOeval
 5 | 
 6 | import numpy as np
 7 | import json
 8 | import os
 9 | 
10 | import torch
11 | 
12 | def evaluate_coco(dataset, model, threshold=0.05):
13 |     
14 |     model.eval()
15 |     
16 |     with torch.no_grad():
17 | 
18 |         # start collecting results
19 |         results = []
20 |         image_ids = []
21 | 
22 |         for index in range(len(dataset)):
23 |             data = dataset[index]
24 |             scale = data['scale']
25 | 
26 |             # run network
27 |             scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
28 |             scores = scores.cpu()
29 |             labels = labels.cpu()
30 |             boxes  = boxes.cpu()
31 | 
32 |             # correct boxes for image scale
33 |             boxes /= scale
34 | 
35 |             if boxes.shape[0] > 0:
36 |                 # change to (x, y, w, h) (MS COCO standard)
37 |                 boxes[:, 2] -= boxes[:, 0]
38 |                 boxes[:, 3] -= boxes[:, 1]
39 | 
40 |                 # compute predicted labels and scores
41 |                 #for box, score, label in zip(boxes[0], scores[0], labels[0]):
42 |                 for box_id in range(boxes.shape[0]):
43 |                     score = float(scores[box_id])
44 |                     label = int(labels[box_id])
45 |                     box = boxes[box_id, :]
46 | 
47 |                     # scores are sorted, so we can break
48 |                     if score < threshold:
49 |                         break
50 | 
51 |                     # append detection for each positively labeled class
52 |                     image_result = {
53 |                         'image_id'    : dataset.image_ids[index],
54 |                         'category_id' : dataset.label_to_coco_label(label),
55 |                         'score'       : float(score),
56 |                         'bbox'        : box.tolist(),
57 |                     }
58 | 
59 |                     # append detection to results
60 |                     results.append(image_result)
61 | 
62 |             # append image to list of processed images
63 |             image_ids.append(dataset.image_ids[index])
64 | 
65 |             # print progress
66 |             print('{}/{}'.format(index, len(dataset)), end='\r')
67 | 
68 |         if not len(results):
69 |             return
70 | 
71 |         # write output
72 |         json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
73 | 
74 |         # load results in COCO evaluation tool
75 |         coco_true = dataset.coco
76 |         coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
77 | 
78 |         # run COCO evaluation
79 |         coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
80 |         coco_eval.params.imgIds = image_ids
81 |         coco_eval.evaluate()
82 |         coco_eval.accumulate()
83 |         coco_eval.summarize()
84 | 
85 |         model.train()
86 | 
87 |         return
88 | 


--------------------------------------------------------------------------------
/csv_eval.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import json
  5 | import os
  6 | 
  7 | import torch
  8 | 
  9 | 
 10 | 
 11 | def compute_overlap(a, b):
 12 |     """
 13 |     Parameters
 14 |     ----------
 15 |     a: (N, 4) ndarray of float
 16 |     b: (K, 4) ndarray of float
 17 |     Returns
 18 |     -------
 19 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 20 |     """
 21 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
 22 | 
 23 |     iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
 24 |     ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
 25 | 
 26 |     iw = np.maximum(iw, 0)
 27 |     ih = np.maximum(ih, 0)
 28 | 
 29 |     ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
 30 | 
 31 |     ua = np.maximum(ua, np.finfo(float).eps)
 32 | 
 33 |     intersection = iw * ih
 34 | 
 35 |     return intersection / ua
 36 | 
 37 | 
 38 | def _compute_ap(recall, precision):
 39 |     """ Compute the average precision, given the recall and precision curves.
 40 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
 41 |     # Arguments
 42 |         recall:    The recall curve (list).
 43 |         precision: The precision curve (list).
 44 |     # Returns
 45 |         The average precision as computed in py-faster-rcnn.
 46 |     """
 47 |     # correct AP calculation
 48 |     # first append sentinel values at the end
 49 |     mrec = np.concatenate(([0.], recall, [1.]))
 50 |     mpre = np.concatenate(([0.], precision, [0.]))
 51 | 
 52 |     # compute the precision envelope
 53 |     for i in range(mpre.size - 1, 0, -1):
 54 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 55 | 
 56 |     # to calculate area under PR curve, look for points
 57 |     # where X axis (recall) changes value
 58 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 59 | 
 60 |     # and sum (\Delta recall) * prec
 61 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 62 |     return ap
 63 | 
 64 | 
 65 | def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, save_path=None):
 66 |     """ Get the detections from the retinanet using the generator.
 67 |     The result is a list of lists such that the size is:
 68 |         all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
 69 |     # Arguments
 70 |         dataset         : The generator used to run images through the retinanet.
 71 |         retinanet           : The retinanet to run on the images.
 72 |         score_threshold : The score confidence threshold to use.
 73 |         max_detections  : The maximum number of detections to use per image.
 74 |         save_path       : The path to save the images with visualized detections to.
 75 |     # Returns
 76 |         A list of lists containing the detections for each image in the generator.
 77 |     """
 78 |     all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))]
 79 | 
 80 |     retinanet.eval()
 81 |     
 82 |     with torch.no_grad():
 83 | 
 84 |         for index in range(len(dataset)):
 85 |             data = dataset[index]
 86 |             scale = data['scale']
 87 | 
 88 |             # run network
 89 |             scores, labels, boxes = retinanet(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
 90 |             scores = scores.cpu().numpy()
 91 |             labels = labels.cpu().numpy()
 92 |             boxes  = boxes.cpu().numpy()
 93 | 
 94 |             # correct boxes for image scale
 95 |             boxes /= scale
 96 | 
 97 |             # select indices which have a score above the threshold
 98 |             indices = np.where(scores > score_threshold)[0]
 99 |             if indices.shape[0] > 0:
100 |                 # select those scores
101 |                 scores = scores[indices]
102 | 
103 |                 # find the order with which to sort the scores
104 |                 scores_sort = np.argsort(-scores)[:max_detections]
105 | 
106 |                 # select detections
107 |                 image_boxes      = boxes[indices[scores_sort], :]
108 |                 image_scores     = scores[scores_sort]
109 |                 image_labels     = labels[indices[scores_sort]]
110 |                 image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
111 | 
112 |                 # copy detections to all_detections
113 |                 for label in range(dataset.num_classes()):
114 |                     all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
115 |             else:
116 |                 # copy detections to all_detections
117 |                 for label in range(dataset.num_classes()):
118 |                     all_detections[index][label] = np.zeros((0, 5))
119 | 
120 |             print('{}/{}'.format(index + 1, len(dataset)), end='\r')
121 | 
122 |     return all_detections
123 | 
124 | 
125 | def _get_annotations(generator):
126 |     """ Get the ground truth annotations from the generator.
127 |     The result is a list of lists such that the size is:
128 |         all_detections[num_images][num_classes] = annotations[num_detections, 5]
129 |     # Arguments
130 |         generator : The generator used to retrieve ground truth annotations.
131 |     # Returns
132 |         A list of lists containing the annotations for each image in the generator.
133 |     """
134 |     all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))]
135 | 
136 |     for i in range(len(generator)):
137 |         # load the annotations
138 |         annotations = generator.load_annotations(i)
139 | 
140 |         # copy detections to all_annotations
141 |         for label in range(generator.num_classes()):
142 |             all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
143 | 
144 |         print('{}/{}'.format(i + 1, len(generator)), end='\r')
145 | 
146 |     return all_annotations
147 | 
148 | 
149 | def evaluate(
150 |     generator,
151 |     retinanet,
152 |     iou_threshold=0.5,
153 |     score_threshold=0.05,
154 |     max_detections=100,
155 |     save_path=None
156 | ):
157 |     """ Evaluate a given dataset using a given retinanet.
158 |     # Arguments
159 |         generator       : The generator that represents the dataset to evaluate.
160 |         retinanet           : The retinanet to evaluate.
161 |         iou_threshold   : The threshold used to consider when a detection is positive or negative.
162 |         score_threshold : The score confidence threshold to use for detections.
163 |         max_detections  : The maximum number of detections to use per image.
164 |         save_path       : The path to save images with visualized detections to.
165 |     # Returns
166 |         A dict mapping class names to mAP scores.
167 |     """
168 | 
169 | 
170 | 
171 |     # gather all detections and annotations
172 | 
173 |     all_detections     = _get_detections(generator, retinanet, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
174 |     all_annotations    = _get_annotations(generator)
175 | 
176 |     average_precisions = {}
177 | 
178 |     for label in range(generator.num_classes()):
179 |         false_positives = np.zeros((0,))
180 |         true_positives  = np.zeros((0,))
181 |         scores          = np.zeros((0,))
182 |         num_annotations = 0.0
183 | 
184 |         for i in range(len(generator)):
185 |             detections           = all_detections[i][label]
186 |             annotations          = all_annotations[i][label]
187 |             num_annotations     += annotations.shape[0]
188 |             detected_annotations = []
189 | 
190 |             for d in detections:
191 |                 scores = np.append(scores, d[4])
192 | 
193 |                 if annotations.shape[0] == 0:
194 |                     false_positives = np.append(false_positives, 1)
195 |                     true_positives  = np.append(true_positives, 0)
196 |                     continue
197 | 
198 |                 overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
199 |                 assigned_annotation = np.argmax(overlaps, axis=1)
200 |                 max_overlap         = overlaps[0, assigned_annotation]
201 | 
202 |                 if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
203 |                     false_positives = np.append(false_positives, 0)
204 |                     true_positives  = np.append(true_positives, 1)
205 |                     detected_annotations.append(assigned_annotation)
206 |                 else:
207 |                     false_positives = np.append(false_positives, 1)
208 |                     true_positives  = np.append(true_positives, 0)
209 | 
210 |         # no annotations -> AP for this class is 0 (is this correct?)
211 |         if num_annotations == 0:
212 |             average_precisions[label] = 0, 0
213 |             continue
214 | 
215 |         # sort by score
216 |         indices         = np.argsort(-scores)
217 |         false_positives = false_positives[indices]
218 |         true_positives  = true_positives[indices]
219 | 
220 |         # compute false positives and true positives
221 |         false_positives = np.cumsum(false_positives)
222 |         true_positives  = np.cumsum(true_positives)
223 | 
224 |         # compute recall and precision
225 |         recall    = true_positives / num_annotations
226 |         precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
227 | 
228 |         # compute average precision
229 |         average_precision  = _compute_ap(recall, precision)
230 |         average_precisions[label] = average_precision, num_annotations
231 |     
232 |     print('\nmAP:')
233 |     for label in range(generator.num_classes()):
234 |         label_name = generator.label_to_name(label)
235 |         print('{}: {}'.format(label_name, average_precisions[label][0]))
236 |     
237 |     return average_precisions
238 | 
239 | 


--------------------------------------------------------------------------------
/dataloader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import sys
  3 | import os
  4 | import torch
  5 | import numpy as np
  6 | import random
  7 | import csv
  8 | 
  9 | from torch.utils.data import Dataset, DataLoader
 10 | from torchvision import transforms, utils
 11 | from torch.utils.data.sampler import Sampler
 12 | 
 13 | from pycocotools.coco import COCO
 14 | 
 15 | import skimage.io
 16 | import skimage.transform
 17 | import skimage.color
 18 | import skimage
 19 | 
 20 | from PIL import Image
 21 | 
 22 | 
 23 | class CocoDataset(Dataset):
 24 |     """Coco dataset."""
 25 | 
 26 |     def __init__(self, root_dir, set_name='train2017', transform=None):
 27 |         """
 28 |         Args:
 29 |             root_dir (string): COCO directory.
 30 |             transform (callable, optional): Optional transform to be applied
 31 |                 on a sample.
 32 |         """
 33 |         self.root_dir = root_dir
 34 |         self.set_name = set_name
 35 |         self.transform = transform
 36 | 
 37 |         self.coco      = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
 38 |         self.image_ids = self.coco.getImgIds()
 39 | 
 40 |         self.load_classes()
 41 | 
 42 |     def load_classes(self):
 43 |         # load class names (name -> label)
 44 |         categories = self.coco.loadCats(self.coco.getCatIds())
 45 |         categories.sort(key=lambda x: x['id'])
 46 | 
 47 |         self.classes             = {}
 48 |         self.coco_labels         = {}
 49 |         self.coco_labels_inverse = {}
 50 |         for c in categories:
 51 |             self.coco_labels[len(self.classes)] = c['id']
 52 |             self.coco_labels_inverse[c['id']] = len(self.classes)
 53 |             self.classes[c['name']] = len(self.classes)
 54 | 
 55 |         # also load the reverse (label -> name)
 56 |         self.labels = {}
 57 |         for key, value in self.classes.items():
 58 |             self.labels[value] = key
 59 | 
 60 |     def __len__(self):
 61 |         return len(self.image_ids)
 62 | 
 63 |     def __getitem__(self, idx):
 64 | 
 65 |         img = self.load_image(idx)
 66 |         annot = self.load_annotations(idx)
 67 |         sample = {'img': img, 'annot': annot}
 68 |         if self.transform:
 69 |             sample = self.transform(sample)
 70 | 
 71 |         return sample
 72 | 
 73 |     def load_image(self, image_index):
 74 |         image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
 75 |         path       = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
 76 |         img = skimage.io.imread(path)
 77 | 
 78 |         if len(img.shape) == 2:
 79 |             img = skimage.color.gray2rgb(img)
 80 | 
 81 |         return img.astype(np.float32)/255.0
 82 | 
 83 |     def load_annotations(self, image_index):
 84 |         # get ground truth annotations
 85 |         annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
 86 |         annotations     = np.zeros((0, 5))
 87 | 
 88 |         # some images appear to miss annotations (like image with id 257034)
 89 |         if len(annotations_ids) == 0:
 90 |             return annotations
 91 | 
 92 |         # parse annotations
 93 |         coco_annotations = self.coco.loadAnns(annotations_ids)
 94 |         for idx, a in enumerate(coco_annotations):
 95 | 
 96 |             # some annotations have basically no width / height, skip them
 97 |             if a['bbox'][2] < 1 or a['bbox'][3] < 1:
 98 |                 continue
 99 | 
100 |             annotation        = np.zeros((1, 5))
101 |             annotation[0, :4] = a['bbox']
102 |             annotation[0, 4]  = self.coco_label_to_label(a['category_id'])
103 |             annotations       = np.append(annotations, annotation, axis=0)
104 | 
105 |         # transform from [x, y, w, h] to [x1, y1, x2, y2]
106 |         annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
107 |         annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
108 | 
109 |         return annotations
110 | 
111 |     def coco_label_to_label(self, coco_label):
112 |         return self.coco_labels_inverse[coco_label]
113 | 
114 | 
115 |     def label_to_coco_label(self, label):
116 |         return self.coco_labels[label]
117 | 
118 |     def image_aspect_ratio(self, image_index):
119 |         image = self.coco.loadImgs(self.image_ids[image_index])[0]
120 |         return float(image['width']) / float(image['height'])
121 | 
122 |     def num_classes(self):
123 |         return 80
124 | 
125 | 
126 | class CSVDataset(Dataset):
127 |     """CSV dataset."""
128 | 
129 |     def __init__(self, train_file, class_list, transform=None):
130 |         """
131 |         Args:
132 |             train_file (string): CSV file with training annotations
133 |             annotations (string): CSV file with class list
134 |             test_file (string, optional): CSV file with testing annotations
135 |         """
136 |         self.train_file = train_file
137 |         self.class_list = class_list
138 |         self.transform = transform
139 | 
140 |         # parse the provided class file
141 |         try:
142 |             with self._open_for_csv(self.class_list) as file:
143 |                 self.classes = self.load_classes(csv.reader(file, delimiter=','))
144 |         except ValueError as e:
145 |             raise_from(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None)
146 | 
147 |         self.labels = {}
148 |         for key, value in self.classes.items():
149 |             self.labels[value] = key
150 | 
151 |         # csv with img_path, x1, y1, x2, y2, class_name
152 |         try:
153 |             with self._open_for_csv(self.train_file) as file:
154 |                 self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes)
155 |         except ValueError as e:
156 |             raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None)
157 |         self.image_names = list(self.image_data.keys())
158 | 
159 |     def _parse(self, value, function, fmt):
160 |         """
161 |         Parse a string into a value, and format a nice ValueError if it fails.
162 |         Returns `function(value)`.
163 |         Any `ValueError` raised is catched and a new `ValueError` is raised
164 |         with message `fmt.format(e)`, where `e` is the caught `ValueError`.
165 |         """
166 |         try:
167 |             return function(value)
168 |         except ValueError as e:
169 |             raise_from(ValueError(fmt.format(e)), None)
170 | 
171 |     def _open_for_csv(self, path):
172 |         """
173 |         Open a file with flags suitable for csv.reader.
174 |         This is different for python2 it means with mode 'rb',
175 |         for python3 this means 'r' with "universal newlines".
176 |         """
177 |         if sys.version_info[0] < 3:
178 |             return open(path, 'rb')
179 |         else:
180 |             return open(path, 'r', newline='')
181 | 
182 | 
183 |     def load_classes(self, csv_reader):
184 |         result = {}
185 | 
186 |         for line, row in enumerate(csv_reader):
187 |             line += 1
188 | 
189 |             try:
190 |                 class_name, class_id = row
191 |             except ValueError:
192 |                 raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None)
193 |             class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
194 | 
195 |             if class_name in result:
196 |                 raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
197 |             result[class_name] = class_id
198 |         return result
199 | 
200 | 
201 |     def __len__(self):
202 |         return len(self.image_names)
203 | 
204 |     def __getitem__(self, idx):
205 | 
206 |         img = self.load_image(idx)
207 |         annot = self.load_annotations(idx)
208 |         sample = {'img': img, 'annot': annot}
209 |         if self.transform:
210 |             sample = self.transform(sample)
211 | 
212 |         return sample
213 | 
214 |     def load_image(self, image_index):
215 |         img = skimage.io.imread(self.image_names[image_index])
216 | 
217 |         if len(img.shape) == 2:
218 |             img = skimage.color.gray2rgb(img)
219 | 
220 |         return img.astype(np.float32)/255.0
221 | 
222 |     def load_annotations(self, image_index):
223 |         # get ground truth annotations
224 |         annotation_list = self.image_data[self.image_names[image_index]]
225 |         annotations     = np.zeros((0, 5))
226 | 
227 |         # some images appear to miss annotations (like image with id 257034)
228 |         if len(annotation_list) == 0:
229 |             return annotations
230 | 
231 |         # parse annotations
232 |         for idx, a in enumerate(annotation_list):
233 |             # some annotations have basically no width / height, skip them
234 |             x1 = a['x1']
235 |             x2 = a['x2']
236 |             y1 = a['y1']
237 |             y2 = a['y2']
238 | 
239 |             if (x2-x1) < 1 or (y2-y1) < 1:
240 |                 continue
241 | 
242 |             annotation        = np.zeros((1, 5))
243 |             
244 |             annotation[0, 0] = x1
245 |             annotation[0, 1] = y1
246 |             annotation[0, 2] = x2
247 |             annotation[0, 3] = y2
248 | 
249 |             annotation[0, 4]  = self.name_to_label(a['class'])
250 |             annotations       = np.append(annotations, annotation, axis=0)
251 | 
252 |         return annotations
253 | 
254 |     def _read_annotations(self, csv_reader, classes):
255 |         result = {}
256 |         for line, row in enumerate(csv_reader):
257 |             line += 1
258 | 
259 |             try:
260 |                 img_file, x1, y1, x2, y2, class_name = row[:6]
261 |             except ValueError:
262 |                 raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
263 | 
264 |             if img_file not in result:
265 |                 result[img_file] = []
266 | 
267 |             # If a row contains only an image path, it's an image without annotations.
268 |             if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
269 |                 continue
270 | 
271 |             x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
272 |             y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
273 |             x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
274 |             y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
275 | 
276 |             # Check that the bounding box is valid.
277 |             if x2 <= x1:
278 |                 raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
279 |             if y2 <= y1:
280 |                 raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
281 | 
282 |             # check if the current class name is correctly present
283 |             if class_name not in classes:
284 |                 raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
285 | 
286 |             result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
287 |         return result
288 | 
289 |     def name_to_label(self, name):
290 |         return self.classes[name]
291 | 
292 |     def label_to_name(self, label):
293 |         return self.labels[label]
294 | 
295 |     def num_classes(self):
296 |         return max(self.classes.values()) + 1
297 | 
298 |     def image_aspect_ratio(self, image_index):
299 |         image = Image.open(self.image_names[image_index])
300 |         return float(image.width) / float(image.height)
301 | 
302 | 
303 | def collater(data):
304 | 
305 |     imgs = [s['img'] for s in data]
306 |     annots = [s['annot'] for s in data]
307 |     scales = [s['scale'] for s in data]
308 |         
309 |     widths = [int(s.shape[0]) for s in imgs]
310 |     heights = [int(s.shape[1]) for s in imgs]
311 |     batch_size = len(imgs)
312 | 
313 |     max_width = np.array(widths).max()
314 |     max_height = np.array(heights).max()
315 | 
316 |     padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
317 | 
318 |     for i in range(batch_size):
319 |         img = imgs[i]
320 |         padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
321 | 
322 |     max_num_annots = max(annot.shape[0] for annot in annots)
323 |     
324 |     if max_num_annots > 0:
325 | 
326 |         annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
327 | 
328 |         if max_num_annots > 0:
329 |             for idx, annot in enumerate(annots):
330 |                 #print(annot.shape)
331 |                 if annot.shape[0] > 0:
332 |                     annot_padded[idx, :annot.shape[0], :] = annot
333 |     else:
334 |         annot_padded = torch.ones((len(annots), 1, 5)) * -1
335 | 
336 | 
337 |     padded_imgs = padded_imgs.permute(0, 3, 1, 2)
338 | 
339 |     return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}
340 | 
341 | class Resizer(object):
342 |     """Convert ndarrays in sample to Tensors."""
343 |     def __init__(self, img_size):
344 |         self.img_size = img_size
345 | 
346 |     def __call__(self, sample):
347 |         image, annots = sample['img'], sample['annot']
348 | 
349 |         rows, cols, cns = image.shape
350 | 
351 |         largest_side = max(rows, cols)
352 | 
353 |         scale = self.img_size / largest_side
354 | 
355 |         # resize the image with the computed scale
356 |         image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
357 |         rows, cols, cns = image.shape
358 | 
359 |         new_image = np.zeros((self.img_size, self.img_size, cns)).astype(np.float32)
360 |         new_image[:rows, :cols, :] = image.astype(np.float32)
361 | 
362 |         annots[:, :4] *= scale
363 | 
364 |         return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
365 | 
366 | 
367 | class Augmenter(object):
368 |     """Convert ndarrays in sample to Tensors."""
369 | 
370 |     def __call__(self, sample, flip_x=0.5):
371 | 
372 |         if np.random.rand() < flip_x:
373 |             image, annots = sample['img'], sample['annot']
374 |             image = image[:, ::-1, :]
375 | 
376 |             rows, cols, channels = image.shape
377 | 
378 |             x1 = annots[:, 0].copy()
379 |             x2 = annots[:, 2].copy()
380 |             
381 |             x_tmp = x1.copy()
382 | 
383 |             annots[:, 0] = cols - x2
384 |             annots[:, 2] = cols - x_tmp
385 | 
386 |             sample = {'img': image, 'annot': annots}
387 | 
388 |         return sample
389 | 
390 | 
391 | class Normalizer(object):
392 | 
393 |     def __init__(self):
394 |         self.mean = np.array([[[0.485, 0.456, 0.406]]])
395 |         self.std = np.array([[[0.229, 0.224, 0.225]]])
396 | 
397 |     def __call__(self, sample):
398 | 
399 |         image, annots = sample['img'], sample['annot']
400 | 
401 |         return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}
402 | 
403 | class UnNormalizer(object):
404 |     def __init__(self, mean=None, std=None):
405 |         if mean == None:
406 |             self.mean = [0.485, 0.456, 0.406]
407 |         else:
408 |             self.mean = mean
409 |         if std == None:
410 |             self.std = [0.229, 0.224, 0.225]
411 |         else:
412 |             self.std = std
413 | 
414 |     def __call__(self, tensor):
415 |         """
416 |         Args:
417 |             tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
418 |         Returns:
419 |             Tensor: Normalized image.
420 |         """
421 |         for t, m, s in zip(tensor, self.mean, self.std):
422 |             t.mul_(s).add_(m)
423 |         return tensor
424 | 
425 | 
426 | class AspectRatioBasedSampler(Sampler):
427 | 
428 |     def __init__(self, data_source, batch_size, drop_last):
429 |         self.data_source = data_source
430 |         self.batch_size = batch_size
431 |         self.drop_last = drop_last
432 |         self.groups = self.group_images()
433 | 
434 |     def __iter__(self):
435 |         random.shuffle(self.groups)
436 |         for group in self.groups:
437 |             yield group
438 | 
439 |     def __len__(self):
440 |         if self.drop_last:
441 |             return len(self.data_source) // self.batch_size
442 |         else:
443 |             return (len(self.data_source) + self.batch_size - 1) // self.batch_size
444 | 
445 |     def group_images(self):
446 |         # determine the order of the images
447 |         order = list(range(len(self.data_source)))
448 |         order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
449 | 
450 |         # divide into groups, one group = one batch
451 |         return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
452 | 


--------------------------------------------------------------------------------
/efficientdet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import math
  4 | import time
  5 | import torch.utils.model_zoo as model_zoo
  6 | from utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
  7 | from anchors import Anchors
  8 | import losses
  9 | from torchvision.ops import nms
 10 | 
 11 | from efficientnet_pytorch import EfficientNet
 12 | 
 13 | from bifpn import BiFPN
 14 | 
 15 | from timeitdec import timeit
 16 | 
 17 | w_bifpn = [64, 88, 112, 160, 224, 288, 384, 384]
 18 | 
 19 | class RegressionModel(nn.Module):
 20 |     def __init__(self, num_features_in, d_class=3, num_anchors=9, feature_size=64):
 21 |         super(RegressionModel, self).__init__()
 22 |         
 23 |         prediction_net = []
 24 |         for _ in range(d_class):
 25 |             prediction_net.append(nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1))
 26 |             prediction_net.append(nn.ReLU())
 27 |             num_features_in = feature_size
 28 |         self.prediction_net = nn.Sequential(*prediction_net)
 29 | 
 30 |         self.output = nn.Conv2d(feature_size, num_anchors*4, kernel_size=3, padding=1)
 31 | 
 32 |     def forward(self, x):
 33 |         out = self.prediction_net(x)
 34 |         out = self.output(out)
 35 | 
 36 |         # out is B x C x W x H, with C = 4*num_anchors
 37 |         out = out.permute(0, 2, 3, 1)
 38 | 
 39 |         return out.contiguous().view(out.shape[0], -1, 4)
 40 | 
 41 | class ClassificationModel(nn.Module):
 42 |     def __init__(self, num_features_in, num_anchors=9, d_class=3, num_classes=80, prior=0.01, feature_size=64):
 43 |         super(ClassificationModel, self).__init__()
 44 | 
 45 |         self.num_classes = num_classes
 46 |         self.num_anchors = num_anchors
 47 |         
 48 |         classification_net = []
 49 |         for _ in range(d_class):
 50 |             classification_net.append(nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1))
 51 |             classification_net.append(nn.ReLU())
 52 |             num_features_in = feature_size
 53 |         self.classification_net = nn.Sequential(*classification_net)
 54 | 
 55 |         self.output = nn.Conv2d(feature_size, num_anchors*num_classes, kernel_size=3, padding=1)
 56 |         self.output_act = nn.Sigmoid()
 57 | 
 58 |     def forward(self, x):
 59 | 
 60 |         out = self.classification_net(x)
 61 | 
 62 |         out = self.output(out)
 63 |         out = self.output_act(out)
 64 | 
 65 |         # out is B x C x W x H, with C = n_classes + n_anchors
 66 |         out1 = out.permute(0, 2, 3, 1)
 67 | 
 68 |         batch_size, width, height, channels = out1.shape
 69 | 
 70 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
 71 | 
 72 |         return out2.contiguous().view(x.shape[0], -1, self.num_classes)
 73 |     
 74 | class EfficientDet(nn.Module):
 75 | 
 76 |     def __init__(self, num_classes, block, pretrained=False, phi=0):
 77 |         self.inplanes = w_bifpn[phi]
 78 |         super(EfficientDet, self).__init__()
 79 |         efficientnet = EfficientNet.from_pretrained(f'efficientnet-b{phi}')
 80 |         blocks = []
 81 |         count = 0
 82 |         fpn_sizes = []
 83 |         for block in efficientnet._blocks:
 84 |             blocks.append(block)
 85 |             if block._depthwise_conv.stride == [2, 2]:
 86 |                 count += 1
 87 |                 fpn_sizes.append(block._project_conv.out_channels)
 88 |                 if len(fpn_sizes) >= 4:
 89 |                     break
 90 |                     
 91 |         self.efficientnet = nn.Sequential(efficientnet._conv_stem, efficientnet._bn0, *blocks)
 92 |         num_layers = min(phi+2, 8)
 93 |         self.fpn = BiFPN(fpn_sizes[1:], feature_size=w_bifpn[phi], num_layers=num_layers)
 94 |         
 95 |         d_class = 3 + (phi // 3)
 96 |         self.regressionModel = RegressionModel(w_bifpn[phi], feature_size=w_bifpn[phi], d_class=d_class)
 97 |         self.classificationModel = ClassificationModel(w_bifpn[phi], feature_size=w_bifpn[phi], d_class=d_class, num_classes=num_classes)
 98 | 
 99 |         self.anchors = Anchors()
100 | 
101 |         self.regressBoxes = BBoxTransform()
102 | 
103 |         self.clipBoxes = ClipBoxes()
104 |         
105 |         self.focalLoss = losses.FocalLoss().cuda()
106 |                 
107 |         for m in self.modules():
108 |             if isinstance(m, nn.Conv2d):
109 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
110 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
111 |             elif isinstance(m, nn.BatchNorm2d):
112 |                 m.weight.data.fill_(1)
113 |                 m.bias.data.zero_()
114 | 
115 |         prior = 0.01
116 |         
117 |         self.classificationModel.output.weight.data.fill_(0)
118 |         self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior))
119 | 
120 |         self.regressionModel.output.weight.data.fill_(0)
121 |         self.regressionModel.output.bias.data.fill_(0)
122 | 
123 |         self.freeze_bn()
124 | 
125 |     def _make_layer(self, block, planes, blocks, stride=1):
126 |         downsample = None
127 |         if stride != 1 or self.inplanes != planes * block.expansion:
128 |             downsample = nn.Sequential(
129 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
130 |                           kernel_size=1, stride=stride, bias=False),
131 |                 nn.BatchNorm2d(planes * block.expansion),
132 |             )
133 | 
134 |         layers = []
135 |         layers.append(block(self.inplanes, planes, stride, downsample))
136 |         self.inplanes = planes * block.expansion
137 |         for i in range(1, blocks):
138 |             layers.append(block(self.inplanes, planes))
139 | 
140 |         return nn.Sequential(*layers)
141 | 
142 |     def freeze_bn(self):
143 |         '''Freeze BatchNorm layers.'''
144 |         for layer in self.modules():
145 |             if isinstance(layer, nn.BatchNorm2d):
146 |                 layer.eval()
147 | 
148 |     def forward(self, inputs):
149 | 
150 |         if self.training:
151 |             img_batch, annotations = inputs
152 |         else:
153 |             img_batch = inputs
154 |                     
155 |         x = self.efficientnet[0](img_batch)
156 |         x = self.efficientnet[1](x)
157 |         
158 |         # Forward batch trough backbone
159 |         features = []
160 |         for block in self.efficientnet[2:]:
161 |             x = block(x)   
162 |             if block._depthwise_conv.stride == [2, 2]:
163 |                 features.append(x)
164 | 
165 |         features = self.fpn(features[1:])
166 |         
167 |         regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
168 | 
169 |         classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
170 | 
171 |         anchors = self.anchors(img_batch)
172 | 
173 |         if self.training:
174 |             return self.focalLoss(classification, regression, anchors, annotations)
175 |         else:
176 |             transformed_anchors = self.regressBoxes(anchors, regression)
177 |             transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
178 | 
179 |             scores = torch.max(classification, dim=2, keepdim=True)[0]
180 | 
181 |             scores_over_thresh = (scores>0.05)[0, :, 0]
182 | 
183 |             if scores_over_thresh.sum() == 0:
184 |                 # no boxes to NMS, just return
185 |                 return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
186 | 
187 |             classification = classification[:, scores_over_thresh, :]
188 |             transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
189 |             scores = scores[:, scores_over_thresh, :]
190 | 
191 |             anchors_nms_idx = nms(transformed_anchors, scores, 0.5)
192 | 
193 |             nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
194 | 
195 |             return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
196 |         
197 | 
198 | def efficientdet(num_classes, pretrained=True, **kwargs):
199 |     """Constructs an EfficientDet
200 |     Args:
201 |         pretrained (bool): If True, returns a model pre-trained on ImageNet.
202 |     """
203 |     model = EfficientDet(num_classes, Bottleneck, pretrained=pretrained, **kwargs)
204 |     return model


--------------------------------------------------------------------------------
/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tristandb/EfficientDet-PyTorch/b86f3661c9167ed9394bdfd430ea4673ad5177c7/images/1.jpg


--------------------------------------------------------------------------------
/images/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tristandb/EfficientDet-PyTorch/b86f3661c9167ed9394bdfd430ea4673ad5177c7/images/3.jpg


--------------------------------------------------------------------------------
/images/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tristandb/EfficientDet-PyTorch/b86f3661c9167ed9394bdfd430ea4673ad5177c7/images/4.jpg


--------------------------------------------------------------------------------
/images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tristandb/EfficientDet-PyTorch/b86f3661c9167ed9394bdfd430ea4673ad5177c7/images/5.jpg


--------------------------------------------------------------------------------
/images/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tristandb/EfficientDet-PyTorch/b86f3661c9167ed9394bdfd430ea4673ad5177c7/images/6.jpg


--------------------------------------------------------------------------------
/images/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tristandb/EfficientDet-PyTorch/b86f3661c9167ed9394bdfd430ea4673ad5177c7/images/7.jpg


--------------------------------------------------------------------------------
/images/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tristandb/EfficientDet-PyTorch/b86f3661c9167ed9394bdfd430ea4673ad5177c7/images/8.jpg


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | from timeitdec import timeit
  6 | 
  7 | def calc_iou(a, b):
  8 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
  9 | 
 10 |     iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
 11 |     ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
 12 | 
 13 |     iw = torch.clamp(iw, min=0)
 14 |     ih = torch.clamp(ih, min=0)
 15 | 
 16 |     ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
 17 | 
 18 |     ua = torch.clamp(ua, min=1e-8)
 19 | 
 20 |     intersection = iw * ih
 21 | 
 22 |     IoU = intersection / ua
 23 | 
 24 |     return IoU
 25 | 
 26 | class FocalLoss(nn.Module):
 27 |     #def __init__(self):
 28 |     
 29 |     def forward(self, classifications, regressions, anchors, annotations):
 30 |         #print("classifications", classifications.shape)
 31 |         #print("regressions", regressions.shape)
 32 |         #print("anchors", anchors.shape)
 33 |         #print("annotations", annotations.shape)
 34 |         #print(annotations)
 35 |         alpha = 0.25
 36 |         gamma = 1.5
 37 |         batch_size = classifications.shape[0]
 38 |         classification_losses = []
 39 |         regression_losses = []
 40 | 
 41 |         anchor = anchors[0, :, :]
 42 | 
 43 |         anchor_widths  = anchor[:, 2] - anchor[:, 0]
 44 |         anchor_heights = anchor[:, 3] - anchor[:, 1]
 45 |         anchor_ctr_x   = anchor[:, 0] + 0.5 * anchor_widths
 46 |         anchor_ctr_y   = anchor[:, 1] + 0.5 * anchor_heights
 47 | 
 48 |         for j in range(batch_size):
 49 | 
 50 |             classification = classifications[j, :, :]
 51 |             regression = regressions[j, :, :]
 52 | 
 53 |             bbox_annotation = annotations[j, :, :]
 54 |             bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
 55 | 
 56 |             if bbox_annotation.shape[0] == 0:
 57 |                 regression_losses.append(torch.tensor(0).float().cuda())
 58 |                 classification_losses.append(torch.tensor(0).float().cuda())
 59 | 
 60 |                 continue
 61 | 
 62 |             classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
 63 | 
 64 |             IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
 65 | 
 66 |             IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
 67 | 
 68 |             #import pdb
 69 |             #pdb.set_trace()
 70 | 
 71 |             # compute the loss for classification
 72 |             targets = torch.ones(classification.shape) * -1
 73 |             targets = targets.cuda()
 74 | 
 75 |             targets[torch.lt(IoU_max, 0.4), :] = 0
 76 | 
 77 |             positive_indices = torch.ge(IoU_max, 0.5)
 78 | 
 79 |             num_positive_anchors = positive_indices.sum()
 80 | 
 81 |             assigned_annotations = bbox_annotation[IoU_argmax, :]
 82 | 
 83 |             targets[positive_indices, :] = 0
 84 |             targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
 85 | 
 86 |             alpha_factor = torch.ones(targets.shape).cuda() * alpha
 87 | 
 88 |             alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
 89 |             focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
 90 |             focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
 91 | 
 92 |             bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
 93 | 
 94 |             # cls_loss = focal_weight * torch.pow(bce, gamma)
 95 |             cls_loss = focal_weight * bce
 96 | 
 97 |             cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
 98 | 
 99 |             classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
100 | 
101 |             # compute the loss for regression
102 | 
103 |             if positive_indices.sum() > 0:
104 |                 assigned_annotations = assigned_annotations[positive_indices, :]
105 | 
106 |                 anchor_widths_pi = anchor_widths[positive_indices]
107 |                 anchor_heights_pi = anchor_heights[positive_indices]
108 |                 anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
109 |                 anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
110 | 
111 |                 gt_widths  = assigned_annotations[:, 2] - assigned_annotations[:, 0]
112 |                 gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
113 |                 gt_ctr_x   = assigned_annotations[:, 0] + 0.5 * gt_widths
114 |                 gt_ctr_y   = assigned_annotations[:, 1] + 0.5 * gt_heights
115 | 
116 |                 # clip widths to 1
117 |                 gt_widths  = torch.clamp(gt_widths, min=1)
118 |                 gt_heights = torch.clamp(gt_heights, min=1)
119 | 
120 |                 targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
121 |                 targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
122 |                 targets_dw = torch.log(gt_widths / anchor_widths_pi)
123 |                 targets_dh = torch.log(gt_heights / anchor_heights_pi)
124 | 
125 |                 targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
126 |                 targets = targets.t()
127 | 
128 |                 targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
129 | 
130 | 
131 |                 negative_indices = ~ positive_indices
132 | 
133 |                 regression_diff = torch.abs(targets - regression[positive_indices, :])
134 | 
135 |                 regression_loss = torch.where(
136 |                     torch.le(regression_diff, 1.0 / 9.0),
137 |                     0.5 * 9.0 * torch.pow(regression_diff, 2),
138 |                     regression_diff - 0.5 / 9.0
139 |                 )
140 |                 regression_losses.append(regression_loss.mean())
141 |             else:
142 |                 regression_losses.append(torch.tensor(0).float().cuda())
143 | 
144 |         return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
145 | 
146 |     
147 | 
148 | import torch
149 | import torch.nn as nn
150 | import torch.nn.functional as F
151 | 
152 | from torch.autograd import Variable
153 | 
154 | import time
155 | 
156 | 
157 | class FocalLoss1(nn.Module):
158 |     def __init__(self, num_classes, device):
159 |         super(FocalLoss, self).__init__()
160 |         self.num_classes = num_classes
161 |         self.device = device
162 | 
163 |     def focal_loss(self, x, y):
164 |         '''Focal loss.
165 |         Args:
166 |           x: (tensor) sized [N,D].
167 |           y: (tensor) sized [N,].
168 |         Return:
169 |           (tensor) focal loss.
170 |         '''
171 |         alpha = 0.25
172 |         gamma = 2
173 | 
174 |         t = F.one_hot(y.data, 1+self.num_classes)  # [N,21]
175 |         t = t[:,1:]  # exclude background
176 |         t = Variable(t)
177 |         
178 |         p = x.sigmoid()
179 |         pt = p*t + (1-p)*(1-t)         # pt = p if t > 0 else 1-p
180 |         w = alpha*t + (1-alpha)*(1-t)  # w = alpha if t > 0 else 1-alpha
181 |         w = w * (1-pt).pow(gamma)
182 |         return F.binary_cross_entropy_with_logits(x, t, w, reduction='sum')
183 |     
184 |     def focal_loss_alt(self, x, y, alpha=0.25, gamma=1.5):
185 |         '''Focal loss alternative.
186 | 
187 |         Args:
188 |           x: (tensor) sized [N,D].
189 |           y: (tensor) sized [N,].
190 | 
191 |         Return:
192 |           (tensor) focal loss.
193 |         '''
194 |         t = F.one_hot(y, self.num_classes+1)
195 |         t = t[:,1:]
196 | 
197 |         xt = x*(2*t-1)  # xt = x if t > 0 else -x
198 |         pt = (2*xt+1).sigmoid()
199 |         pt = pt.clamp(1e-7, 1.0)
200 |         w = (0+alpha)*(0+t) + (1-alpha)*(1-t)
201 |         loss = -w*pt.log() / gamma
202 |         return loss.sum()
203 |     
204 | 
205 |     def forward(self, loc_preds, loc_targets, cls_preds, cls_targets):
206 |         '''Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).
207 |         Args:
208 |           loc_preds: (tensor) predicted locations, sized [batch_size, #anchors, 4].
209 |           loc_targets: (tensor) encoded target locations, sized [batch_size, #anchors, 4].
210 |           cls_preds: (tensor) predicted class confidences, sized [batch_size, #anchors, #classes].
211 |           cls_targets: (tensor) encoded target labels, sized [batch_size, #anchors].
212 |         loss:
213 |           (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets).
214 |         '''
215 |         
216 |         batch_size, num_boxes = cls_targets.size()
217 |         pos = cls_targets > 0  # [N,#anchors]
218 |         num_pos = pos.data.long().sum()
219 | 
220 |         ################################################################
221 |         # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
222 |         ################################################################
223 |         mask = pos.unsqueeze(2).expand_as(loc_preds)       # [N,#anchors,4]
224 |         masked_loc_preds = loc_preds[mask].view(-1,4)      # [#pos,4]
225 |         masked_loc_targets = loc_targets[mask].view(-1,4)  # [#pos,4]
226 |         loc_loss = F.smooth_l1_loss(masked_loc_preds, masked_loc_targets, reduction='sum')
227 |         
228 |         ################################################################
229 |         # cls_loss = FocalLoss(loc_preds, loc_targets)
230 |         ################################################################
231 |         pos_neg = cls_targets > -1  # exclude ignored anchors
232 |         num_peg = pos_neg.data.long().sum()
233 |         mask = pos_neg.unsqueeze(2).expand_as(cls_preds)
234 |         masked_cls_preds = cls_preds[mask].view(-1,self.num_classes)
235 |         cls_loss = self.focal_loss_alt(masked_cls_preds, cls_targets[pos_neg])
236 | 
237 |         #print('loc_loss: %.3f | cls_loss: %.3f' % (loc_loss.data[0]/num_pos, cls_loss.data[0]/num_peg), end=' | ')
238 |         loss = loc_loss/num_pos + cls_loss/num_peg
239 |         return loss


--------------------------------------------------------------------------------
/oid_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | 
  3 | import csv
  4 | import json
  5 | import os
  6 | import warnings
  7 | 
  8 | import numpy as np
  9 | import skimage
 10 | import skimage.color
 11 | import skimage.io
 12 | import skimage.transform
 13 | from PIL import Image
 14 | from torch.utils.data import Dataset
 15 | 
 16 | 
 17 | def get_labels(metadata_dir, version='v4'):
 18 |     if version == 'v4' or version == 'challenge2018':
 19 |         csv_file = 'class-descriptions-boxable.csv' if version == 'v4' else 'challenge-2018-class-descriptions-500.csv'
 20 | 
 21 |         boxable_classes_descriptions = os.path.join(metadata_dir, csv_file)
 22 |         id_to_labels = {}
 23 |         cls_index = {}
 24 | 
 25 |         i = 0
 26 |         with open(boxable_classes_descriptions) as f:
 27 |             for row in csv.reader(f):
 28 |                 # make sure the csv row is not empty (usually the last one)
 29 |                 if len(row):
 30 |                     label = row[0]
 31 |                     description = row[1].replace("\"", "").replace("'", "").replace('`', '')
 32 | 
 33 |                     id_to_labels[i] = description
 34 |                     cls_index[label] = i
 35 | 
 36 |                     i += 1
 37 |     else:
 38 |         trainable_classes_path = os.path.join(metadata_dir, 'classes-bbox-trainable.txt')
 39 |         description_path = os.path.join(metadata_dir, 'class-descriptions.csv')
 40 | 
 41 |         description_table = {}
 42 |         with open(description_path) as f:
 43 |             for row in csv.reader(f):
 44 |                 # make sure the csv row is not empty (usually the last one)
 45 |                 if len(row):
 46 |                     description_table[row[0]] = row[1].replace("\"", "").replace("'", "").replace('`', '')
 47 | 
 48 |         with open(trainable_classes_path, 'rb') as f:
 49 |             trainable_classes = f.read().split('\n')
 50 | 
 51 |         id_to_labels = dict([(i, description_table[c]) for i, c in enumerate(trainable_classes)])
 52 |         cls_index = dict([(c, i) for i, c in enumerate(trainable_classes)])
 53 | 
 54 |     return id_to_labels, cls_index
 55 | 
 56 | 
 57 | def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'):
 58 |     validation_image_ids = {}
 59 | 
 60 |     if version == 'v4':
 61 |         annotations_path = os.path.join(metadata_dir, subset, '{}-annotations-bbox.csv'.format(subset))
 62 |     elif version == 'challenge2018':
 63 |         validation_image_ids_path = os.path.join(metadata_dir, 'challenge-2018-image-ids-valset-od.csv')
 64 | 
 65 |         with open(validation_image_ids_path, 'r') as csv_file:
 66 |             reader = csv.DictReader(csv_file, fieldnames=['ImageID'])
 67 |             reader.next()
 68 |             for line, row in enumerate(reader):
 69 |                 image_id = row['ImageID']
 70 |                 validation_image_ids[image_id] = True
 71 | 
 72 |         annotations_path = os.path.join(metadata_dir, 'challenge-2018-train-annotations-bbox.csv')
 73 |     else:
 74 |         annotations_path = os.path.join(metadata_dir, subset, 'annotations-human-bbox.csv')
 75 | 
 76 |     fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence',
 77 |                   'XMin', 'XMax', 'YMin', 'YMax',
 78 |                   'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside']
 79 | 
 80 |     id_annotations = dict()
 81 |     with open(annotations_path, 'r') as csv_file:
 82 |         reader = csv.DictReader(csv_file, fieldnames=fieldnames)
 83 |         next(reader)
 84 | 
 85 |         images_sizes = {}
 86 |         for line, row in enumerate(reader):
 87 |             frame = row['ImageID']
 88 | 
 89 |             if version == 'challenge2018':
 90 |                 if subset == 'train':
 91 |                     if frame in validation_image_ids:
 92 |                         continue
 93 |                 elif subset == 'validation':
 94 |                     if frame not in validation_image_ids:
 95 |                         continue
 96 |                 else:
 97 |                     raise NotImplementedError('This generator handles only the train and validation subsets')
 98 | 
 99 |             class_name = row['LabelName']
100 | 
101 |             if class_name not in cls_index:
102 |                 continue
103 | 
104 |             cls_id = cls_index[class_name]
105 | 
106 |             if version == 'challenge2018':
107 |                 # We recommend participants to use the provided subset of the training set as a validation set.
108 |                 # This is preferable over using the V4 val/test sets, as the training set is more densely annotated.
109 |                 img_path = os.path.join(main_dir, 'images', 'train', frame + '.jpg')
110 |             else:
111 |                 img_path = os.path.join(main_dir, 'images', subset, frame + '.jpg')
112 | 
113 |             if frame in images_sizes:
114 |                 width, height = images_sizes[frame]
115 |             else:
116 |                 try:
117 |                     with Image.open(img_path) as img:
118 |                         width, height = img.width, img.height
119 |                         images_sizes[frame] = (width, height)
120 |                 except Exception as ex:
121 |                     if version == 'challenge2018':
122 |                         raise ex
123 |                     continue
124 | 
125 |             x1 = float(row['XMin'])
126 |             x2 = float(row['XMax'])
127 |             y1 = float(row['YMin'])
128 |             y2 = float(row['YMax'])
129 | 
130 |             x1_int = int(round(x1 * width))
131 |             x2_int = int(round(x2 * width))
132 |             y1_int = int(round(y1 * height))
133 |             y2_int = int(round(y2 * height))
134 | 
135 |             # Check that the bounding box is valid.
136 |             if x2 <= x1:
137 |                 raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
138 |             if y2 <= y1:
139 |                 raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
140 | 
141 |             if y2_int == y1_int:
142 |                 warnings.warn('filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1))
143 |                 continue
144 | 
145 |             if x2_int == x1_int:
146 |                 warnings.warn('filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1))
147 |                 continue
148 | 
149 |             img_id = row['ImageID']
150 |             annotation = {'cls_id': cls_id, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2}
151 | 
152 |             if img_id in id_annotations:
153 |                 annotations = id_annotations[img_id]
154 |                 annotations['boxes'].append(annotation)
155 |             else:
156 |                 id_annotations[img_id] = {'w': width, 'h': height, 'boxes': [annotation]}
157 |     return id_annotations
158 | 
159 | 
160 | class OidDataset(Dataset):
161 |     """Oid dataset."""
162 | 
163 |     def __init__(self, main_dir, subset, version='v4', annotation_cache_dir='.', transform=None):
164 |         if version == 'v4':
165 |             metadata = '2018_04'
166 |         elif version == 'challenge2018':
167 |             metadata = 'challenge2018'
168 |         elif version == 'v3':
169 |             metadata = '2017_11'
170 |         else:
171 |             raise NotImplementedError('There is currently no implementation for versions older than v3')
172 | 
173 |         self.transform = transform
174 | 
175 |         if version == 'challenge2018':
176 |             self.base_dir = os.path.join(main_dir, 'images', 'train')
177 |         else:
178 |             self.base_dir = os.path.join(main_dir, 'images', subset)
179 | 
180 |         metadata_dir = os.path.join(main_dir, metadata)
181 |         annotation_cache_json = os.path.join(annotation_cache_dir, subset + '.json')
182 | 
183 |         self.id_to_labels, cls_index = get_labels(metadata_dir, version=version)
184 | 
185 |         if os.path.exists(annotation_cache_json):
186 |             with open(annotation_cache_json, 'r') as f:
187 |                 self.annotations = json.loads(f.read())
188 |         else:
189 |             self.annotations = generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index,
190 |                                                                 version=version)
191 |             json.dump(self.annotations, open(annotation_cache_json, "w"))
192 | 
193 |         self.id_to_image_id = dict([(i, k) for i, k in enumerate(self.annotations)])
194 | 
195 |         # (label -> name)
196 |         self.labels = self.id_to_labels
197 | 
198 |     def __len__(self):
199 |         return len(self.annotations)
200 | 
201 |     def __getitem__(self, idx):
202 | 
203 |         img = self.load_image(idx)
204 |         annot = self.load_annotations(idx)
205 |         sample = {'img': img, 'annot': annot}
206 |         if self.transform:
207 |             sample = self.transform(sample)
208 | 
209 |         return sample
210 | 
211 |     def image_path(self, image_index):
212 |         path = os.path.join(self.base_dir, self.id_to_image_id[image_index] + '.jpg')
213 |         return path
214 | 
215 |     def load_image(self, image_index):
216 |         path = self.image_path(image_index)
217 |         img = skimage.io.imread(path)
218 | 
219 |         if len(img.shape) == 1:
220 |             img = img[0]
221 | 
222 |         if len(img.shape) == 2:
223 |             img = skimage.color.gray2rgb(img)
224 | 
225 |         try:
226 |             return img.astype(np.float32) / 255.0
227 |         except Exception:
228 |             print (path)
229 |             exit(0)
230 | 
231 |     def load_annotations(self, image_index):
232 |         # get ground truth annotations
233 |         image_annotations = self.annotations[self.id_to_image_id[image_index]]
234 | 
235 |         labels = image_annotations['boxes']
236 |         height, width = image_annotations['h'], image_annotations['w']
237 | 
238 |         boxes = np.zeros((len(labels), 5))
239 |         for idx, ann in enumerate(labels):
240 |             cls_id = ann['cls_id']
241 |             x1 = ann['x1'] * width
242 |             x2 = ann['x2'] * width
243 |             y1 = ann['y1'] * height
244 |             y2 = ann['y2'] * height
245 | 
246 |             boxes[idx, 0] = x1
247 |             boxes[idx, 1] = y1
248 |             boxes[idx, 2] = x2
249 |             boxes[idx, 3] = y2
250 |             boxes[idx, 4] = cls_id
251 | 
252 |         return boxes
253 | 
254 |     def image_aspect_ratio(self, image_index):
255 |         img_annotations = self.annotations[self.id_to_image_id[image_index]]
256 |         height, width = img_annotations['h'], img_annotations['w']
257 |         return float(width) / float(height)
258 | 
259 |     def num_classes(self):
260 |         return len(self.id_to_labels)
261 | 


--------------------------------------------------------------------------------
/opt/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | # from . import nms_cpu, nms_cuda
 5 | from .soft_nms_cpu import soft_nms_cpu
 6 | 
 7 | def nms(dets, iou_thr, device_id=None):
 8 |     """Dispatch to either CPU or GPU NMS implementations.
 9 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
10 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
11 |     will be used. The returned type will always be the same as inputs.
12 |     Arguments:
13 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
14 |         iou_thr (float): IoU threshold for NMS.
15 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
16 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
17 |     Returns:
18 |         tuple: kept bboxes and indice, which is always the same data type as
19 |             the input.
20 |     Example:
21 |         >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9],
22 |         >>>                  [49.3, 32.9, 51.0, 35.3, 0.9],
23 |         >>>                  [49.2, 31.8, 51.0, 35.4, 0.5],
24 |         >>>                  [35.1, 11.5, 39.1, 15.7, 0.5],
25 |         >>>                  [35.6, 11.8, 39.3, 14.2, 0.5],
26 |         >>>                  [35.3, 11.5, 39.9, 14.5, 0.4],
27 |         >>>                  [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32)
28 |         >>> iou_thr = 0.7
29 |         >>> supressed, inds = nms(dets, iou_thr)
30 |         >>> assert len(inds) == len(supressed) == 3
31 |     """
32 |     # convert dets (tensor or numpy array) to tensor
33 |     if isinstance(dets, torch.Tensor):
34 |         is_numpy = False
35 |         dets_th = dets
36 |     elif isinstance(dets, np.ndarray):
37 |         is_numpy = True
38 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
39 |         dets_th = torch.from_numpy(dets).to(device)
40 |     else:
41 |         raise TypeError(
42 |             'dets must be either a Tensor or numpy array, but got {}'.format(
43 |                 type(dets)))
44 | 
45 |     # execute cpu or cuda nms
46 |     if dets_th.shape[0] == 0:
47 |         inds = dets_th.new_zeros(0, dtype=torch.long)
48 |     else:
49 |         if dets_th.is_cuda:
50 |             pass
51 |             #inds = nms_cuda.nms(dets_th, iou_thr)
52 |         else:
53 |             pass
54 |             #inds = nms_cpu.nms(dets_th, iou_thr)
55 | 
56 |     if is_numpy:
57 |         inds = inds.cpu().numpy()
58 |     return dets[inds, :], inds
59 | 
60 | 
61 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
62 |     """
63 |     Example:
64 |         >>> dets = np.array([[4., 3., 5., 3., 0.9],
65 |         >>>                  [4., 3., 5., 4., 0.9],
66 |         >>>                  [3., 1., 3., 1., 0.5],
67 |         >>>                  [3., 1., 3., 1., 0.5],
68 |         >>>                  [3., 1., 3., 1., 0.4],
69 |         >>>                  [3., 1., 3., 1., 0.0]], dtype=np.float32)
70 |         >>> iou_thr = 0.7
71 |         >>> supressed, inds = soft_nms(dets, iou_thr, sigma=0.5)
72 |         >>> assert len(inds) == len(supressed) == 3
73 |     """
74 |     if isinstance(dets, torch.Tensor):
75 |         is_tensor = True
76 |         dets_np = dets.detach().cpu().numpy()
77 |     elif isinstance(dets, np.ndarray):
78 |         is_tensor = False
79 |         dets_np = dets
80 |     else:
81 |         raise TypeError(
82 |             'dets must be either a Tensor or numpy array, but got {}'.format(
83 |                 type(dets)))
84 | 
85 |     method_codes = {'linear': 1, 'gaussian': 2}
86 |     if method not in method_codes:
87 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
88 |     new_dets, inds = soft_nms_cpu(
89 |         dets_np,
90 |         iou_thr,
91 |         method=method_codes[method],
92 |         sigma=sigma,
93 |         min_score=min_score)
94 | 
95 |     if is_tensor:
96 |         return dets.new_tensor(new_dets), dets.new_tensor(
97 |             inds, dtype=torch.long)
98 |     else:
99 |         return new_dets.astype(np.float32), inds.astype(np.int64)


--------------------------------------------------------------------------------
/opt/soft_nms_cpu.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | def soft_nms_cpu(
  4 |     np.ndarray[float, ndim=2] boxes_in,
  5 |     float iou_thr,
  6 |     unsigned int method=1,
  7 |     float sigma=0.5,
  8 |     float min_score=0.001,
  9 | ):
 10 |     boxes = boxes_in.copy()
 11 |     cdef unsigned int N = boxes.shape[0]
 12 |     cdef float iw, ih, box_area
 13 |     cdef float ua
 14 |     cdef int pos = 0
 15 |     cdef float maxscore = 0
 16 |     cdef int maxpos = 0
 17 |     cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
 18 |     inds = np.arange(N)
 19 | 
 20 |     for i in range(N):
 21 |         maxscore = boxes[i, 4]
 22 |         maxpos = i
 23 | 
 24 |         tx1 = boxes[i, 0]
 25 |         ty1 = boxes[i, 1]
 26 |         tx2 = boxes[i, 2]
 27 |         ty2 = boxes[i, 3]
 28 |         ts = boxes[i, 4]
 29 |         ti = inds[i]
 30 | 
 31 |         pos = i + 1
 32 |         # get max box
 33 |         while pos < N:
 34 |             if maxscore < boxes[pos, 4]:
 35 |                 maxscore = boxes[pos, 4]
 36 |                 maxpos = pos
 37 |             pos = pos + 1
 38 | 
 39 |         # add max box as a detection
 40 |         boxes[i, 0] = boxes[maxpos, 0]
 41 |         boxes[i, 1] = boxes[maxpos, 1]
 42 |         boxes[i, 2] = boxes[maxpos, 2]
 43 |         boxes[i, 3] = boxes[maxpos, 3]
 44 |         boxes[i, 4] = boxes[maxpos, 4]
 45 |         inds[i] = inds[maxpos]
 46 | 
 47 |         # swap ith box with position of max box
 48 |         boxes[maxpos, 0] = tx1
 49 |         boxes[maxpos, 1] = ty1
 50 |         boxes[maxpos, 2] = tx2
 51 |         boxes[maxpos, 3] = ty2
 52 |         boxes[maxpos, 4] = ts
 53 |         inds[maxpos] = ti
 54 | 
 55 |         tx1 = boxes[i, 0]
 56 |         ty1 = boxes[i, 1]
 57 |         tx2 = boxes[i, 2]
 58 |         ty2 = boxes[i, 3]
 59 |         ts = boxes[i, 4]
 60 | 
 61 |         pos = i + 1
 62 |         # NMS iterations, note that N changes if detection boxes fall below
 63 |         # threshold
 64 |         while pos < N:
 65 |             x1 = boxes[pos, 0]
 66 |             y1 = boxes[pos, 1]
 67 |             x2 = boxes[pos, 2]
 68 |             y2 = boxes[pos, 3]
 69 |             s = boxes[pos, 4]
 70 | 
 71 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
 72 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
 73 |             if iw > 0:
 74 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
 75 |                 if ih > 0:
 76 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
 77 |                     ov = iw * ih / ua  # iou between max box and detection box
 78 | 
 79 |                     if method == 1:  # linear
 80 |                         if ov > iou_thr:
 81 |                             weight = 1 - ov
 82 |                         else:
 83 |                             weight = 1
 84 |                     elif method == 2:  # gaussian
 85 |                         weight = np.exp(-(ov * ov) / sigma)
 86 |                     else:  # original NMS
 87 |                         if ov > iou_thr:
 88 |                             weight = 0
 89 |                         else:
 90 |                             weight = 1
 91 | 
 92 |                     boxes[pos, 4] = weight * boxes[pos, 4]
 93 | 
 94 |                     # if box score falls below threshold, discard the box by
 95 |                     # swapping with last box update N
 96 |                     if boxes[pos, 4] < min_score:
 97 |                         boxes[pos, 0] = boxes[N-1, 0]
 98 |                         boxes[pos, 1] = boxes[N-1, 1]
 99 |                         boxes[pos, 2] = boxes[N-1, 2]
100 |                         boxes[pos, 3] = boxes[N-1, 3]
101 |                         boxes[pos, 4] = boxes[N-1, 4]
102 |                         inds[pos] = inds[N - 1]
103 |                         N = N - 1
104 |                         pos = pos - 1
105 | 
106 |             pos = pos + 1
107 | 
108 |     return boxes[:N], inds[:N]


--------------------------------------------------------------------------------
/opt/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data<uint8_t>();
27 |   auto order = order_t.data<int64_t>();
28 |   auto x1 = x1_t.data<scalar_t>();
29 |   auto y1 = y1_t.data<scalar_t>();
30 |   auto x2 = x2_t.data<scalar_t>();
31 |   auto y2 = y2_t.data<scalar_t>();
32 |   auto areas = areas_t.data<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }


--------------------------------------------------------------------------------
/opt/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/opt/src/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | #include <ATen/DeviceGuard.h>
  5 | 
  6 | #include <THC/THC.h>
  7 | #include <THC/THCDeviceUtils.cuh>
  8 | 
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 13 | 
 14 | __device__ inline float devIoU(float const * const a, float const * const b) {
 15 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 16 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 17 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 18 |   float interS = width * height;
 19 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 20 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 21 |   return interS / (Sa + Sb - interS);
 22 | }
 23 | 
 24 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 25 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 26 |   const int row_start = blockIdx.y;
 27 |   const int col_start = blockIdx.x;
 28 | 
 29 |   // if (row_start > col_start) return;
 30 | 
 31 |   const int row_size =
 32 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 33 |   const int col_size =
 34 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 35 | 
 36 |   __shared__ float block_boxes[threadsPerBlock * 5];
 37 |   if (threadIdx.x < col_size) {
 38 |     block_boxes[threadIdx.x * 5 + 0] =
 39 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 40 |     block_boxes[threadIdx.x * 5 + 1] =
 41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 42 |     block_boxes[threadIdx.x * 5 + 2] =
 43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 44 |     block_boxes[threadIdx.x * 5 + 3] =
 45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 46 |     block_boxes[threadIdx.x * 5 + 4] =
 47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 48 |   }
 49 |   __syncthreads();
 50 | 
 51 |   if (threadIdx.x < row_size) {
 52 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 53 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 54 |     int i = 0;
 55 |     unsigned long long t = 0;
 56 |     int start = 0;
 57 |     if (row_start == col_start) {
 58 |       start = threadIdx.x + 1;
 59 |     }
 60 |     for (i = start; i < col_size; i++) {
 61 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 62 |         t |= 1ULL << i;
 63 |       }
 64 |     }
 65 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 66 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 67 |   }
 68 | }
 69 | 
 70 | // boxes is a N x 5 tensor
 71 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 72 | 
 73 |   // Ensure CUDA uses the input tensor device.
 74 |   at::DeviceGuard guard(boxes.device());
 75 | 
 76 |   using scalar_t = float;
 77 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 78 |   auto scores = boxes.select(1, 4);
 79 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 80 |   auto boxes_sorted = boxes.index_select(0, order_t);
 81 | 
 82 |   int boxes_num = boxes.size(0);
 83 | 
 84 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 85 | 
 86 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 87 | 
 88 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 89 | 
 90 |   unsigned long long* mask_dev = NULL;
 91 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 92 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 93 | 
 94 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 95 | 
 96 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 97 |               THCCeilDiv(boxes_num, threadsPerBlock));
 98 |   dim3 threads(threadsPerBlock);
 99 |   nms_kernel<<<blocks, threads>>>(boxes_num,
100 |                                   nms_overlap_thresh,
101 |                                   boxes_dev,
102 |                                   mask_dev);
103 | 
104 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
105 |   THCudaCheck(cudaMemcpy(&mask_host[0],
106 |                         mask_dev,
107 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
108 |                         cudaMemcpyDeviceToHost));
109 | 
110 |   std::vector<unsigned long long> remv(col_blocks);
111 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
112 | 
113 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
114 |   int64_t* keep_out = keep.data<int64_t>();
115 | 
116 |   int num_to_keep = 0;
117 |   for (int i = 0; i < boxes_num; i++) {
118 |     int nblock = i / threadsPerBlock;
119 |     int inblock = i % threadsPerBlock;
120 | 
121 |     if (!(remv[nblock] & (1ULL << inblock))) {
122 |       keep_out[num_to_keep++] = i;
123 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
124 |       for (int j = nblock; j < col_blocks; j++) {
125 |         remv[j] |= p[j];
126 |       }
127 |     }
128 |   }
129 | 
130 |   THCudaFree(state, mask_dev);
131 |   // TODO improve this part
132 |   return std::get<0>(order_t.index({
133 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
134 |                          order_t.device(), keep.scalar_type())
135 |                      }).sort(0, false));
136 | }
137 | 


--------------------------------------------------------------------------------
/opt/src/soft_nms_cpu.pyx:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------
  2 | # Soft-NMS: Improving Object Detection With One Line of Code
  3 | # Copyright (c) University of Maryland, College Park
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Navaneeth Bodla and Bharat Singh
  6 | # Modified by Kai Chen
  7 | # ----------------------------------------------------------
  8 | 
  9 | # cython: language_level=3, boundscheck=False
 10 | 
 11 | import numpy as np
 12 | cimport numpy as np
 13 | 
 14 | 
 15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 16 |     return a if a >= b else b
 17 | 
 18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 19 |     return a if a <= b else b
 20 | 
 21 | 
 22 | def soft_nms_cpu(
 23 |     np.ndarray[float, ndim=2] boxes_in,
 24 |     float iou_thr,
 25 |     unsigned int method=1,
 26 |     float sigma=0.5,
 27 |     float min_score=0.001,
 28 | ):
 29 |     boxes = boxes_in.copy()
 30 |     cdef int N = boxes.shape[0]
 31 |     cdef float iw, ih, box_area
 32 |     cdef float ua
 33 |     cdef int pos = 0
 34 |     cdef float maxscore = 0
 35 |     cdef int maxpos = 0
 36 |     cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
 37 |     inds = np.arange(N)
 38 | 
 39 |     for i in range(N):
 40 |         maxscore = boxes[i, 4]
 41 |         maxpos = i
 42 | 
 43 |         tx1 = boxes[i, 0]
 44 |         ty1 = boxes[i, 1]
 45 |         tx2 = boxes[i, 2]
 46 |         ty2 = boxes[i, 3]
 47 |         ts = boxes[i, 4]
 48 |         ti = inds[i]
 49 | 
 50 |         pos = i + 1
 51 |         # get max box
 52 |         while pos < N:
 53 |             if maxscore < boxes[pos, 4]:
 54 |                 maxscore = boxes[pos, 4]
 55 |                 maxpos = pos
 56 |             pos = pos + 1
 57 | 
 58 |         # add max box as a detection
 59 |         boxes[i, 0] = boxes[maxpos, 0]
 60 |         boxes[i, 1] = boxes[maxpos, 1]
 61 |         boxes[i, 2] = boxes[maxpos, 2]
 62 |         boxes[i, 3] = boxes[maxpos, 3]
 63 |         boxes[i, 4] = boxes[maxpos, 4]
 64 |         inds[i] = inds[maxpos]
 65 | 
 66 |         # swap ith box with position of max box
 67 |         boxes[maxpos, 0] = tx1
 68 |         boxes[maxpos, 1] = ty1
 69 |         boxes[maxpos, 2] = tx2
 70 |         boxes[maxpos, 3] = ty2
 71 |         boxes[maxpos, 4] = ts
 72 |         inds[maxpos] = ti
 73 | 
 74 |         tx1 = boxes[i, 0]
 75 |         ty1 = boxes[i, 1]
 76 |         tx2 = boxes[i, 2]
 77 |         ty2 = boxes[i, 3]
 78 |         ts = boxes[i, 4]
 79 | 
 80 |         pos = i + 1
 81 |         # NMS iterations, note that N changes if detection boxes fall below
 82 |         # threshold
 83 |         while pos < N:
 84 |             x1 = boxes[pos, 0]
 85 |             y1 = boxes[pos, 1]
 86 |             x2 = boxes[pos, 2]
 87 |             y2 = boxes[pos, 3]
 88 |             s = boxes[pos, 4]
 89 | 
 90 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
 91 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
 92 |             if iw > 0:
 93 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
 94 |                 if ih > 0:
 95 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
 96 |                     ov = iw * ih / ua  # iou between max box and detection box
 97 | 
 98 |                     if method == 1:  # linear
 99 |                         if ov > iou_thr:
100 |                             weight = 1 - ov
101 |                         else:
102 |                             weight = 1
103 |                     elif method == 2:  # gaussian
104 |                         weight = np.exp(-(ov * ov) / sigma)
105 |                     else:  # original NMS
106 |                         if ov > iou_thr:
107 |                             weight = 0
108 |                         else:
109 |                             weight = 1
110 | 
111 |                     boxes[pos, 4] = weight * boxes[pos, 4]
112 | 
113 |                     # if box score falls below threshold, discard the box by
114 |                     # swapping with last box update N
115 |                     if boxes[pos, 4] < min_score:
116 |                         boxes[pos, 0] = boxes[N-1, 0]
117 |                         boxes[pos, 1] = boxes[N-1, 1]
118 |                         boxes[pos, 2] = boxes[N-1, 2]
119 |                         boxes[pos, 3] = boxes[N-1, 3]
120 |                         boxes[pos, 4] = boxes[N-1, 4]
121 |                         inds[pos] = inds[N - 1]
122 |                         N = N - 1
123 |                         pos = pos - 1
124 | 
125 |             pos = pos + 1
126 | 
127 |     return boxes[:N], inds[:N]
128 | 


--------------------------------------------------------------------------------
/retinanet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import math
  4 | import time
  5 | import torch.utils.model_zoo as model_zoo
  6 | from utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
  7 | from anchors import Anchors
  8 | import losses
  9 | from torchvision.ops import nms
 10 | 
 11 | model_urls = {
 12 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 13 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 14 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 15 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 16 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 17 | }
 18 | 
 19 | class PyramidFeatures(nn.Module):
 20 |     def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
 21 |         super(PyramidFeatures, self).__init__()
 22 |         
 23 |         # upsample C5 to get P5 from the FPN paper
 24 |         self.P5_1           = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
 25 |         self.P5_upsampled   = nn.Upsample(scale_factor=2, mode='nearest')
 26 |         self.P5_2           = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 27 | 
 28 |         # add P5 elementwise to C4
 29 |         self.P4_1           = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
 30 |         self.P4_upsampled   = nn.Upsample(scale_factor=2, mode='nearest')
 31 |         self.P4_2           = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 32 | 
 33 |         # add P4 elementwise to C3
 34 |         self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
 35 |         self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
 36 | 
 37 |         # "P6 is obtained via a 3x3 stride-2 conv on C5"
 38 |         self.P6 = nn.Conv2d(C5_size, feature_size, kernel_size=3, stride=2, padding=1)
 39 | 
 40 |         # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
 41 |         self.P7_1 = nn.ReLU()
 42 |         self.P7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
 43 | 
 44 |     def forward(self, inputs):
 45 | 
 46 |         C3, C4, C5 = inputs
 47 | 
 48 |         P5_x = self.P5_1(C5)
 49 |         P5_upsampled_x = self.P5_upsampled(P5_x)
 50 |         P5_x = self.P5_2(P5_x)
 51 |         
 52 |         P4_x = self.P4_1(C4)
 53 |         P4_x = P5_upsampled_x + P4_x
 54 |         P4_upsampled_x = self.P4_upsampled(P4_x)
 55 |         P4_x = self.P4_2(P4_x)
 56 | 
 57 |         P3_x = self.P3_1(C3)
 58 |         P3_x = P3_x + P4_upsampled_x
 59 |         P3_x = self.P3_2(P3_x)
 60 | 
 61 |         P6_x = self.P6(C5)
 62 | 
 63 |         P7_x = self.P7_1(P6_x)
 64 |         P7_x = self.P7_2(P7_x)
 65 | 
 66 |         return [P3_x, P4_x, P5_x, P6_x, P7_x]
 67 | 
 68 | 
 69 | class RegressionModel(nn.Module):
 70 |     def __init__(self, num_features_in, num_anchors=9, feature_size=256):
 71 |         super(RegressionModel, self).__init__()
 72 |         
 73 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
 74 |         self.act1 = nn.ReLU()
 75 | 
 76 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 77 |         self.act2 = nn.ReLU()
 78 | 
 79 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 80 |         self.act3 = nn.ReLU()
 81 | 
 82 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 83 |         self.act4 = nn.ReLU()
 84 | 
 85 |         self.output = nn.Conv2d(feature_size, num_anchors*4, kernel_size=3, padding=1)
 86 | 
 87 |     def forward(self, x):
 88 | 
 89 |         out = self.conv1(x)
 90 |         out = self.act1(out)
 91 | 
 92 |         out = self.conv2(out)
 93 |         out = self.act2(out)
 94 | 
 95 |         out = self.conv3(out)
 96 |         out = self.act3(out)
 97 | 
 98 |         out = self.conv4(out)
 99 |         out = self.act4(out)
100 | 
101 |         out = self.output(out)
102 | 
103 |         # out is B x C x W x H, with C = 4*num_anchors
104 |         out = out.permute(0, 2, 3, 1)
105 | 
106 |         return out.contiguous().view(out.shape[0], -1, 4)
107 | 
108 | class ClassificationModel(nn.Module):
109 |     def __init__(self, num_features_in, num_anchors=9, num_classes=80, prior=0.01, feature_size=256):
110 |         super(ClassificationModel, self).__init__()
111 | 
112 |         self.num_classes = num_classes
113 |         self.num_anchors = num_anchors
114 |         
115 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
116 |         self.act1 = nn.ReLU()
117 | 
118 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
119 |         self.act2 = nn.ReLU()
120 | 
121 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
122 |         self.act3 = nn.ReLU()
123 | 
124 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
125 |         self.act4 = nn.ReLU()
126 | 
127 |         self.output = nn.Conv2d(feature_size, num_anchors*num_classes, kernel_size=3, padding=1)
128 |         self.output_act = nn.Sigmoid()
129 | 
130 |     def forward(self, x):
131 | 
132 |         out = self.conv1(x)
133 |         out = self.act1(out)
134 | 
135 |         out = self.conv2(out)
136 |         out = self.act2(out)
137 | 
138 |         out = self.conv3(out)
139 |         out = self.act3(out)
140 | 
141 |         out = self.conv4(out)
142 |         out = self.act4(out)
143 | 
144 |         out = self.output(out)
145 |         out = self.output_act(out)
146 | 
147 |         # out is B x C x W x H, with C = n_classes + n_anchors
148 |         out1 = out.permute(0, 2, 3, 1)
149 | 
150 |         batch_size, width, height, channels = out1.shape
151 | 
152 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
153 | 
154 |         return out2.contiguous().view(x.shape[0], -1, self.num_classes)
155 | 
156 | class ResNet(nn.Module):
157 | 
158 |     def __init__(self, num_classes, block, layers):
159 |         self.inplanes = 64
160 |         super(ResNet, self).__init__()
161 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
162 |         self.bn1 = nn.BatchNorm2d(64)
163 |         self.relu = nn.ReLU(inplace=True)
164 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
165 |         self.layer1 = self._make_layer(block, 64, layers[0])
166 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
167 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
168 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
169 | 
170 |         if block == BasicBlock:
171 |             fpn_sizes = [self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
172 |         elif block == Bottleneck:
173 |             fpn_sizes = [self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]
174 | 
175 |         self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
176 | 
177 |         self.regressionModel = RegressionModel(256)
178 |         self.classificationModel = ClassificationModel(256, num_classes=num_classes)
179 | 
180 |         self.anchors = Anchors()
181 | 
182 |         self.regressBoxes = BBoxTransform()
183 | 
184 |         self.clipBoxes = ClipBoxes()
185 |         
186 |         self.focalLoss = losses.FocalLoss()
187 |                 
188 |         for m in self.modules():
189 |             if isinstance(m, nn.Conv2d):
190 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
191 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
192 |             elif isinstance(m, nn.BatchNorm2d):
193 |                 m.weight.data.fill_(1)
194 |                 m.bias.data.zero_()
195 | 
196 |         prior = 0.01
197 |         
198 |         self.classificationModel.output.weight.data.fill_(0)
199 |         self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior))
200 | 
201 |         self.regressionModel.output.weight.data.fill_(0)
202 |         self.regressionModel.output.bias.data.fill_(0)
203 | 
204 |         self.freeze_bn()
205 | 
206 |     def _make_layer(self, block, planes, blocks, stride=1):
207 |         downsample = None
208 |         if stride != 1 or self.inplanes != planes * block.expansion:
209 |             downsample = nn.Sequential(
210 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
211 |                           kernel_size=1, stride=stride, bias=False),
212 |                 nn.BatchNorm2d(planes * block.expansion),
213 |             )
214 | 
215 |         layers = []
216 |         layers.append(block(self.inplanes, planes, stride, downsample))
217 |         self.inplanes = planes * block.expansion
218 |         for i in range(1, blocks):
219 |             layers.append(block(self.inplanes, planes))
220 | 
221 |         return nn.Sequential(*layers)
222 | 
223 |     def freeze_bn(self):
224 |         '''Freeze BatchNorm layers.'''
225 |         for layer in self.modules():
226 |             if isinstance(layer, nn.BatchNorm2d):
227 |                 layer.eval()
228 | 
229 |     def forward(self, inputs):
230 | 
231 |         if self.training:
232 |             img_batch, annotations = inputs
233 |         else:
234 |             img_batch = inputs
235 |             
236 |         x = self.conv1(img_batch)
237 |         x = self.bn1(x)
238 |         x = self.relu(x)
239 |         x = self.maxpool(x)
240 | 
241 |         x1 = self.layer1(x)
242 |         x2 = self.layer2(x1)
243 |         x3 = self.layer3(x2)
244 |         x4 = self.layer4(x3)
245 | 
246 |         features = self.fpn([x2, x3, x4])
247 | 
248 |         regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
249 | 
250 |         classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
251 | 
252 |         anchors = self.anchors(img_batch)
253 | 
254 |         if self.training:
255 |             return self.focalLoss(classification, regression, anchors, annotations)
256 |         else:
257 |             transformed_anchors = self.regressBoxes(anchors, regression)
258 |             transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
259 | 
260 |             scores = torch.max(classification, dim=2, keepdim=True)[0]
261 | 
262 |             scores_over_thresh = (scores>0.05)[0, :, 0]
263 | 
264 |             if scores_over_thresh.sum() == 0:
265 |                 # no boxes to NMS, just return
266 |                 return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
267 | 
268 |             classification = classification[:, scores_over_thresh, :]
269 |             transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
270 |             scores = scores[:, scores_over_thresh, :]
271 | 
272 |             anchors_nms_idx = nms(transformed_anchors, scores, 0.5)
273 | 
274 |             nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
275 | 
276 |             return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
277 | 
278 | 
279 | 
280 | def resnet18(num_classes, pretrained=False, **kwargs):
281 |     """Constructs a ResNet-18 model.
282 |     Args:
283 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
284 |     """
285 |     model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs)
286 |     if pretrained:
287 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18'], model_dir='.'), strict=False)
288 |     return model
289 | 
290 | 
291 | def resnet34(num_classes, pretrained=False, **kwargs):
292 |     """Constructs a ResNet-34 model.
293 |     Args:
294 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
295 |     """
296 |     model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs)
297 |     if pretrained:
298 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34'], model_dir='.'), strict=False)
299 |     return model
300 | 
301 | 
302 | def resnet50(num_classes, pretrained=False, **kwargs):
303 |     """Constructs a ResNet-50 model.
304 |     Args:
305 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
306 |     """
307 |     model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs)
308 |     if pretrained:
309 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], model_dir='.'), strict=False)
310 |     return model
311 | 
312 | def resnet101(num_classes, pretrained=False, **kwargs):
313 |     """Constructs a ResNet-101 model.
314 |     Args:
315 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
316 |     """
317 |     model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs)
318 |     if pretrained:
319 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101'], model_dir='.'), strict=False)
320 |     return model
321 | 
322 | 
323 | def resnet152(num_classes, pretrained=False, **kwargs):
324 |     """Constructs a ResNet-152 model.
325 |     Args:
326 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
327 |     """
328 |     model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], **kwargs)
329 |     if pretrained:
330 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir='.'), strict=False)
331 |     return model


--------------------------------------------------------------------------------
/timeitdec.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | def timeit(method):
 4 |     def timed(*args, **kw):
 5 |         ts = time.time()
 6 |         result = method(*args, **kw)
 7 |         te = time.time()        
 8 |         if 'log_time' in kw:
 9 |             name = kw.get('log_name', method.__name__.upper())
10 |             kw['log_time'][name] = int((te - ts) * 1000)
11 |         else:
12 |             print ('%r  %2.2f ms' % \
13 |                   (method.__name__, (te - ts) * 1000))
14 |         return result    
15 |     return timed


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | import copy
  4 | import argparse
  5 | import pdb
  6 | import collections
  7 | import sys
  8 | 
  9 | import numpy as np
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.optim as optim
 14 | from torch.optim import lr_scheduler
 15 | from torch.autograd import Variable
 16 | from torchvision import datasets, models, transforms
 17 | import torchvision
 18 | 
 19 | import retinanet
 20 | import efficientdet
 21 | from anchors import Anchors
 22 | import losses
 23 | from dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer
 24 | from torch.utils.data import Dataset, DataLoader
 25 | 
 26 | import coco_eval
 27 | import csv_eval
 28 | 
 29 | from tqdm import tqdm
 30 | from ptflops import get_model_complexity_info
 31 | 
 32 | #assert torch.__version__.split('.')[1] == '4'
 33 | 
 34 | print('CUDA available: {}'.format(torch.cuda.is_available()))
 35 | 
 36 | 
 37 | 
 38 | def freeze_layer(layer):
 39 |     for param in layer.parameters():
 40 |         param.requires_grad = False
 41 | 
 42 | def main(args=None):
 43 | 
 44 |     parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
 45 | 
 46 |     parser.add_argument('--efficientdet', help='Use EfficientDet.', action="store_true")
 47 |     parser.add_argument('--scaling-compound', help='EfficientDet scaling compound phi.', type=int, default=0)
 48 |     parser.add_argument('--batch-size', help='Batchsize.', type=int, default=6)
 49 |     parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
 50 |     parser.add_argument('--coco_path', help='Path to COCO directory')
 51 |     parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
 52 |     parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
 53 |     parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
 54 |     
 55 |     parser.add_argument('--print-model-complexity', help='Print model complexity.', action="store_true")
 56 | 
 57 |     parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=None)
 58 |     parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)
 59 | 
 60 |     parser = parser.parse_args(args)
 61 |     
 62 |     img_size = parser.scaling_compound * 128 + 512
 63 | 
 64 |     # Create the data loaders
 65 |     if parser.dataset == 'coco':
 66 | 
 67 |         if parser.coco_path is None:
 68 |             raise ValueError('Must provide --coco_path when training on COCO,')
 69 | 
 70 |         dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(img_size=img_size)]))
 71 |         dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer(img_size=img_size)]))
 72 | 
 73 |     elif parser.dataset == 'csv':
 74 | 
 75 |         if parser.csv_train is None:
 76 |             raise ValueError('Must provide --csv_train when training on COCO,')
 77 | 
 78 |         if parser.csv_classes is None:
 79 |             raise ValueError('Must provide --csv_classes when training on COCO,')
 80 | 
 81 | 
 82 |         dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(img_size=img_size)]))
 83 | 
 84 |         if parser.csv_val is None:
 85 |             dataset_val = None
 86 |             print('No validation annotations provided.')
 87 |         else:
 88 |             dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer(img_size=img_size)]))
 89 | 
 90 |     else:
 91 |         raise ValueError('Dataset type not understood (must be csv or coco), exiting.')
 92 | 
 93 |     sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
 94 |     dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)
 95 | 
 96 |     if dataset_val is not None:
 97 |         sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
 98 |         dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)
 99 | 
100 |     # Create the model
101 |     if parser.depth == 18:
102 |         model = retinanet.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
103 |     elif parser.depth == 34:
104 |         model = retinanet.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
105 |     elif parser.depth == 50:
106 |         model = retinanet.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
107 |     elif parser.depth == 101:
108 |         model = retinanet.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
109 |     elif parser.depth == 152:
110 |         model = retinanet.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
111 |     elif parser.efficientdet:
112 |         model = efficientdet.efficientdet(num_classes=dataset_train.num_classes(), pretrained=True, phi=parser.scaling_compound)
113 |     else:
114 |         raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152, or specify ')        
115 | 
116 |     use_gpu = True
117 | 
118 |     if use_gpu:
119 |         model = model.cuda()
120 |     
121 |     model = torch.nn.DataParallel(model).cuda()
122 |     
123 |     if parser.print_model_complexity:
124 |         flops, params = get_model_complexity_info(model, (3, img_size, img_size), as_strings=True, print_per_layer_stat=True)
125 |         print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
126 |         print('{:<30}  {:<8}'.format('Number of parameters: ', params))
127 | 
128 |     model.training = True
129 | 
130 |     optimizer = optim.SGD(model.parameters(), lr=4e-5)
131 | 
132 |     scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
133 | 
134 |     loss_hist = collections.deque(maxlen=500)
135 |     
136 |     model.train()
137 |     model.module.freeze_bn()
138 | 
139 |     print('Num training images: {}'.format(len(dataset_train)))
140 | 
141 |     for epoch_num in range(parser.epochs):
142 | 
143 |         model.train()
144 |         model.module.freeze_bn()
145 |         
146 |         freeze_layer(model.module.efficientnet)
147 |                      
148 |         epoch_loss = []
149 |         pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train))
150 |         for iter_num, data in pbar:
151 |             optimizer.zero_grad()
152 | 
153 |             classification_loss, regression_loss = model([data['img'].cuda().float(), data['annot']])
154 | 
155 |             classification_loss = classification_loss.mean()
156 |             regression_loss = regression_loss.mean()
157 | 
158 |             loss = classification_loss + regression_loss
159 |                 
160 |             if bool(loss == 0):
161 |                 continue
162 |                 
163 |             loss.backward()
164 | 
165 |             torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
166 | 
167 |             optimizer.step()
168 | 
169 |             loss_hist.append(float(loss))
170 | 
171 |             epoch_loss.append(float(loss))
172 |                 
173 |             mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0
174 |             pbar.set_description(f'{mem:.3g}G | {float(classification_loss):1.5f} | {float(regression_loss):1.5f} | {np.mean(loss_hist):1.5f}')
175 |             #print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
176 |                 
177 |             del classification_loss
178 |             del regression_loss
179 | 
180 |         if parser.dataset == 'coco':
181 | 
182 |             print('Evaluating dataset')
183 | 
184 |             coco_eval.evaluate_coco(dataset_val, model)
185 | 
186 |         elif parser.dataset == 'csv' and parser.csv_val is not None:
187 | 
188 |             print('Evaluating dataset')
189 | 
190 |             mAP = csv_eval.evaluate(dataset_val, model)
191 | 
192 |         
193 |         scheduler.step(np.mean(epoch_loss))    
194 | 
195 |         torch.save(model.module_model_, '{}_model_{}.pt'.format(parser.dataset, epoch_num))
196 | 
197 |     model.eval()
198 | 
199 |     torch.save(model, 'model_final.pt'.format(epoch_num))
200 | 
201 | if __name__ == '__main__':
202 |     main()
203 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | def conv3x3(in_planes, out_planes, stride=1):
  6 |     """3x3 convolution with padding"""
  7 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
  8 |                      padding=1, bias=False)
  9 | 
 10 | class BasicBlock(nn.Module):
 11 |     expansion = 1
 12 | 
 13 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 14 |         super(BasicBlock, self).__init__()
 15 |         self.conv1 = conv3x3(inplanes, planes, stride)
 16 |         self.bn1 = nn.BatchNorm2d(planes)
 17 |         self.relu = nn.ReLU(inplace=True)
 18 |         self.conv2 = conv3x3(planes, planes)
 19 |         self.bn2 = nn.BatchNorm2d(planes)
 20 |         self.downsample = downsample
 21 |         self.stride = stride
 22 | 
 23 |     def forward(self, x):
 24 |         residual = x
 25 | 
 26 |         out = self.conv1(x)
 27 |         out = self.bn1(out)
 28 |         out = self.relu(out)
 29 | 
 30 |         out = self.conv2(out)
 31 |         out = self.bn2(out)
 32 | 
 33 |         if self.downsample is not None:
 34 |             residual = self.downsample(x)
 35 | 
 36 |         out += residual
 37 |         out = self.relu(out)
 38 | 
 39 |         return out
 40 | 
 41 | 
 42 | class Bottleneck(nn.Module):
 43 |     expansion = 4
 44 | 
 45 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 46 |         super(Bottleneck, self).__init__()
 47 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 48 |         self.bn1 = nn.BatchNorm2d(planes)
 49 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 50 |                                padding=1, bias=False)
 51 |         self.bn2 = nn.BatchNorm2d(planes)
 52 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 53 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 54 |         self.relu = nn.ReLU(inplace=True)
 55 |         self.downsample = downsample
 56 |         self.stride = stride
 57 | 
 58 |     def forward(self, x):
 59 |         residual = x
 60 | 
 61 |         out = self.conv1(x)
 62 |         out = self.bn1(out)
 63 |         out = self.relu(out)
 64 | 
 65 |         out = self.conv2(out)
 66 |         out = self.bn2(out)
 67 |         out = self.relu(out)
 68 | 
 69 |         out = self.conv3(out)
 70 |         out = self.bn3(out)
 71 | 
 72 |         if self.downsample is not None:
 73 |             residual = self.downsample(x)
 74 | 
 75 |         out += residual
 76 |         out = self.relu(out)
 77 | 
 78 |         return out
 79 | 
 80 | class BBoxTransform(nn.Module):
 81 | 
 82 |     def __init__(self, mean=None, std=None):
 83 |         super(BBoxTransform, self).__init__()
 84 |         if mean is None:
 85 |             self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
 86 |         else:
 87 |             self.mean = mean
 88 |         if std is None:
 89 |             self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
 90 |         else:
 91 |             self.std = std
 92 | 
 93 |     def forward(self, boxes, deltas):
 94 | 
 95 |         widths  = boxes[:, :, 2] - boxes[:, :, 0]
 96 |         heights = boxes[:, :, 3] - boxes[:, :, 1]
 97 |         ctr_x   = boxes[:, :, 0] + 0.5 * widths
 98 |         ctr_y   = boxes[:, :, 1] + 0.5 * heights
 99 | 
100 |         dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
101 |         dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
102 |         dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
103 |         dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
104 | 
105 |         pred_ctr_x = ctr_x + dx * widths
106 |         pred_ctr_y = ctr_y + dy * heights
107 |         pred_w     = torch.exp(dw) * widths
108 |         pred_h     = torch.exp(dh) * heights
109 | 
110 |         pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
111 |         pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
112 |         pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
113 |         pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
114 | 
115 |         pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
116 | 
117 |         return pred_boxes
118 | 
119 | 
120 | class ClipBoxes(nn.Module):
121 | 
122 |     def __init__(self, width=None, height=None):
123 |         super(ClipBoxes, self).__init__()
124 | 
125 |     def forward(self, boxes, img):
126 | 
127 |         batch_size, num_channels, height, width = img.shape
128 | 
129 |         boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
130 |         boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
131 | 
132 |         boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
133 |         boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
134 |       
135 |         return boxes
136 | 


--------------------------------------------------------------------------------
/visualize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torchvision
 3 | import time
 4 | import os
 5 | import copy
 6 | import pdb
 7 | import time
 8 | import argparse
 9 | 
10 | import sys
11 | import cv2
12 | 
13 | import torch
14 | from torch.utils.data import Dataset, DataLoader
15 | from torchvision import datasets, models, transforms
16 | 
17 | from dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer
18 | 
19 | 
20 | assert torch.__version__.split('.')[1] == '4'
21 | 
22 | print('CUDA available: {}'.format(torch.cuda.is_available()))
23 | 
24 | 
25 | def main(args=None):
26 | 	parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
27 | 
28 | 	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
29 | 	parser.add_argument('--coco_path', help='Path to COCO directory')
30 | 	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
31 | 	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
32 | 
33 | 	parser.add_argument('--model', help='Path to model (.pt) file.')
34 | 
35 | 	parser = parser.parse_args(args)
36 | 
37 | 	if parser.dataset == 'coco':
38 | 		dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))
39 | 	elif parser.dataset == 'csv':
40 | 		dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))
41 | 	else:
42 | 		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')
43 | 
44 | 	sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
45 | 	dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)
46 | 
47 | 	retinanet = torch.load(parser.model)
48 | 
49 | 	use_gpu = True
50 | 
51 | 	if use_gpu:
52 | 		retinanet = retinanet.cuda()
53 | 
54 | 	retinanet.eval()
55 | 
56 | 	unnormalize = UnNormalizer()
57 | 
58 | 	def draw_caption(image, box, caption):
59 | 
60 | 		b = np.array(box).astype(int)
61 | 		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
62 | 		cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
63 | 
64 | 	for idx, data in enumerate(dataloader_val):
65 | 
66 | 		with torch.no_grad():
67 | 			st = time.time()
68 | 			scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
69 | 			print('Elapsed time: {}'.format(time.time()-st))
70 | 			idxs = np.where(scores>0.5)
71 | 			img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
72 | 
73 | 			img[img<0] = 0
74 | 			img[img>255] = 255
75 | 
76 | 			img = np.transpose(img, (1, 2, 0))
77 | 
78 | 			img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
79 | 
80 | 			for j in range(idxs[0].shape[0]):
81 | 				bbox = transformed_anchors[idxs[0][j], :]
82 | 				x1 = int(bbox[0])
83 | 				y1 = int(bbox[1])
84 | 				x2 = int(bbox[2])
85 | 				y2 = int(bbox[3])
86 | 				label_name = dataset_val.labels[int(classification[idxs[0][j]])]
87 | 				draw_caption(img, (x1, y1, x2, y2), label_name)
88 | 
89 | 				cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
90 | 				print(label_name)
91 | 
92 | 			cv2.imshow('img', img)
93 | 			cv2.waitKey(0)
94 | 
95 | 
96 | 
97 | if __name__ == '__main__':
98 |  main()


--------------------------------------------------------------------------------