├── mmdet
    └── models
    │   ├── utils
    │       ├── __init__.py
    │       └── effdet_utils.py
    │   ├── necks
    │       ├── __init__.py
    │       └── bifpn.py
    │   ├── backbones
    │       ├── __init__.py
    │       └── efficientnet.py
    │   └── anchor_heads
    │       ├── __init__.py
    │       └── retina_sepconv_head.py
├── LICENSE
├── .gitignore
├── README.md
└── configs
    └── efficientdet
        ├── efficientdet_d1_8gpu.py
        ├── efficientdet_d2_8gpu.py
        ├── efficientdet_d3_8gpu.py
        ├── efficientdet_d4_8gpu.py
        ├── efficientdet_d0_4gpu.py
        └── efficientdet_d5_8gpu.py


/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .weight_init import bias_init_with_prob
2 | from .effdet_utils import MemoryEfficientSwish, Swish, SeparableConv2d
3 | 
4 | __all__ = ['bias_init_with_prob', 'MemoryEfficientSwish', 'SeparableConv2d', 'Swish']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .bfp import BFP
2 | from .fpn import FPN
3 | from .fpn_carafe import FPN_CARAFE
4 | from .hrfpn import HRFPN
5 | from .nas_fpn import NASFPN
6 | from .bifpn import BiFPN
7 | 
8 | __all__ = ['FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'BiFPN']
9 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .hrnet import HRNet
2 | from .resnet import ResNet, make_res_layer
3 | from .resnext import ResNeXt
4 | from .ssd_vgg import SSDVGG
5 | from .efficientnet import EfficientNet
6 | 
7 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet', 'EfficientNet']
8 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_head import AnchorHead
 2 | from .atss_head import ATSSHead
 3 | from .fcos_head import FCOSHead
 4 | from .fovea_head import FoveaHead
 5 | from .free_anchor_retina_head import FreeAnchorRetinaHead
 6 | from .ga_retina_head import GARetinaHead
 7 | from .ga_rpn_head import GARPNHead
 8 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
 9 | from .reppoints_head import RepPointsHead
10 | from .retina_head import RetinaHead
11 | from .retina_sepbn_head import RetinaSepBNHead
12 | from .rpn_head import RPNHead
13 | from .ssd_head import SSDHead
14 | from .retina_sepconv_head import RetinaSepConvHead
15 | 
16 | __all__ = [
17 |     'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead',
18 |     'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 'SSDHead',
19 |     'FCOSHead', 'RepPointsHead', 'FoveaHead', 'FreeAnchorRetinaHead', 'RetinaSepConvHead',
20 |     'ATSSHead'
21 | ]
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 lucifer443
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/effdet_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from mmcv.cnn import xavier_init
 4 | from mmdet.ops import ConvModule
 5 | 
 6 | 
 7 | class SwishImplementation(torch.autograd.Function):
 8 |     @staticmethod
 9 |     def forward(ctx, i):
10 |         result = i * torch.sigmoid(i)
11 |         ctx.save_for_backward(i)
12 |         return result
13 | 
14 |     @staticmethod
15 |     def backward(ctx, grad_output):
16 |         i = ctx.saved_variables[0]
17 |         sigmoid_i = torch.sigmoid(i)
18 |         return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
19 | 
20 | 
21 | class MemoryEfficientSwish(nn.Module):
22 |     def forward(self, x):
23 |         return SwishImplementation.apply(x)
24 | 
25 | 
26 | class Swish(nn.Module):
27 |     def forward(self, x):
28 |         return x * torch.sigmoid(x)
29 | 
30 | 
31 | class SeparableConv2d(nn.Module):
32 |     def __init__(self, in_channels,
33 |                  out_channels,
34 |                  kernel_size=1,
35 |                  stride=1,
36 |                  padding=0,
37 |                  dilation=1,
38 |                  norm_cfg=dict(type='BN', momentum=0.003, eps=1e-4, requires_grad=True),
39 |                  activation=None,
40 |                  bias=False):
41 |         super(SeparableConv2d, self).__init__()
42 |         self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size,
43 |                                stride, padding, dilation, groups=in_channels, bias=False)
44 |         self.pointwise = ConvModule(in_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=None, bias=bias, inplace=False)
45 |         if activation == "ReLU":
46 |             self.act = nn.ReLU()
47 |         elif activation == "Swish":
48 |             self.act = MemoryEfficientSwish()
49 |         else:
50 |             self.act = None
51 | 
52 |     def init_weights(self):
53 |         xavier_init(self.depthwise, distribution='uniform')
54 |         xavier_init(self.pointwise.conv, distribution='uniform')
55 | 
56 |     def forward(self, x):
57 |         x = self.depthwise(x)
58 |         x = self.pointwise(x)
59 |         if self.act:
60 |             x = self.act(x)
61 |         return x
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # EfficientDet-Pytorch
 2 | This project is a kind of implementation of EfficientDet using mmdetection.
 3 | 
 4 | It is based on the
 5 | 
 6 | * the paper [EfficientDet: Scalable and Efficient Object Detection](https://arxiv.org/abs/1911.09070)
 7 | * [official TensorFlow implementation](https://github.com/google/automl)
 8 | * [Pytorch implementation of EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch)
 9 | 
10 | ## Models
11 | 
12 | | Variant | mAP(val2017) | Params | FLOPs   | mAP(val2017) in paper | Params in paper | FLOPs in paper |
13 | | ------- | ------------ | ------ | ------- | --------------------- | --------------- | -------------- |
14 | | D0      | 32.02        | 3.87M  | 2.55B   | 33.5                  | 3.9M            | 2.5B           |
15 | | D1      | 37.78        | 6.62M  | 6.12B   | 39.1                  | 6.6M            | 6.1B           |
16 | | D2      | ——           | 8.09M  | 11B     | 42.5                  | 8.1M            | 11B            |
17 | | D3      | ——           | 12.02M | 24.88B  | 45.9                  | 12M             | 25B            |
18 | | D4      | ——           | 20.7M  | 55.13B  | 49.0                  | 21M             | 55B            |
19 | | D5      | ——           | 33.63M | 135.31B | 50.5                  | 34M             | 135B           |
20 | | D6      | ——           | ——     | ——      | 51.3                  | 52M             | 226B           |
21 | 
22 | ## Usage
23 | 
24 | 1. **Install mmdetection**
25 | 
26 |    This implementation is based on [mmdetection](https://github.com/open-mmlab/mmdetection)(v1.1.0+8732ed9). Please refer to [INSTALL.md](docs/INSTALL.md) for installation and dataset preparation.
27 | 
28 | 2. **Copy the codes to mmdetection directory**
29 | 
30 |    ```shell
31 |    cp -r mmdet/ ${MMDETECTION_PATH}/
32 |    cp -r configs/ ${MMDETECTION_PATH}/
33 |    ```
34 | 
35 |  3. **Prepare data**
36 | 
37 |      The directories should be arranged like this:
38 |      
39 |         >   mmdetection
40 |         >     ├── mmdet
41 |         >     ├── tools
42 |         >     ├── configs
43 |         >     ├── data
44 |         >     │   ├── coco
45 |         >     │   │   ├── annotations
46 |         >     │   │   ├── train2017
47 |         >     │   │   ├── val2017
48 |         >     │   │   ├── test2017
49 | 
50 | 
51 |  4. **Train D0 with 4 GPUs**
52 | 
53 |     ```shell
54 |     CONFIG_FILE=configs/efficientdet/efficientdet_d0_4gpu.py
55 |     ./ tools/dist_train.py ${CONFIG_FILE} 4
56 |     ```
57 | 
58 |  5. **Calculate parameters and flops**
59 | 
60 |      ```shell
61 |       python tools/get_flops.py ${CONFIG_FILE} --shape $SIZE $SIZE
62 |      ```
63 | 
64 | 6. **Test**
65 | 
66 |    ```shell
67 |    python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} --out  ${OUTPUT_FILE} --eval bbox
68 |    ```
69 | 
70 | More usages can reference [mmdetection documentation](https://mmdetection.readthedocs.io/en/latest/GETTING_STARTED.html#inference-with-pretrained-models).
71 | 
72 | ## Update log
73 | 
74 | - [2020-04-27] Update results and add SyncBN in backbone.
75 | - [2020-04-20] Fix some bug in bifpn and use separate BN in head.
76 | - [2020-04-17] Add efficientdet-d0 training config.
77 | - [2020-04-16] Add efficientnet.py and retina_sepconv_head.py.
78 | - [2020-04-06] Create this repository.
79 | 
80 | ## Notice
81 | 
82 | 1. For small reason, I can't release the model. But you can reproduce the result easily using the config file that I provide.
83 | 2.  I find the training procedure of EfficientDet is unstable and  there is a small chance that results can be 3% mAP lower.
84 | 3. The number of bifpn in the latest version of paper is a little different from the first version, but the parameters and flops are the same. I use the structure in the latest version of paper.
85 | 4. Training from scratch is a time-consuming task. For exmaple, it took me 4 days to train D0 from scratch using 4 GTX TiTAN V GPUs.
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/configs/efficientdet/efficientdet_d1_8gpu.py:
--------------------------------------------------------------------------------
  1 | cudnn_benchmark = True
  2 | # model settings
  3 | norm_cfg = dict(type='BN', momentum=0.01, eps=1e-3, requires_grad=True)  # using SyncBN during training
  4 | model = dict(
  5 |     type='RetinaNet',
  6 |     pretrained='pretrained/efficientnet-b1-f1951068.pth',
  7 |     backbone=dict(
  8 |         type='EfficientNet',
  9 |         arch='efficientnet-b1',
 10 |         out_indices=[4, 6, 8],
 11 |         norm_cfg=norm_cfg,
 12 |         norm_eval=False),
 13 |     neck=dict(
 14 |         type='BiFPN',
 15 |         in_channels=[40, 112, 320],
 16 |         target_size_list=[80, 40, 20, 10, 5],
 17 |         out_channels=88,
 18 |         stack=4,
 19 |         start_level=0,
 20 |         norm_cfg=norm_cfg,
 21 |         num_outs=5),
 22 |     bbox_head=dict(
 23 |         type='RetinaSepConvHead',
 24 |         num_classes=81,
 25 |         num_ins=5,
 26 |         in_channels=88,
 27 |         stacked_convs=3,
 28 |         feat_channels=88,
 29 |         octave_base_scale=4,
 30 |         scales_per_octave=3,
 31 |         anchor_ratios=[0.5, 1.0, 2.0],
 32 |         anchor_strides=[8, 16, 32, 64, 128],
 33 |         target_means=[.0, .0, .0, .0],
 34 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 35 |         norm_cfg=norm_cfg,
 36 |         loss_cls=dict(
 37 |             type='FocalLoss',
 38 |             use_sigmoid=True,
 39 |             gamma=1.5,
 40 |             alpha=0.25,
 41 |             loss_weight=1.0),
 42 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
 43 | # training and testing settings
 44 | train_cfg = dict(
 45 |     assigner=dict(
 46 |         type='MaxIoUAssigner',
 47 |         pos_iou_thr=0.5,
 48 |         neg_iou_thr=0.5,
 49 |         min_pos_iou=0,
 50 |         ignore_iof_thr=-1),
 51 |     allowed_border=-1,
 52 |     pos_weight=-1,
 53 |     debug=False)
 54 | test_cfg = dict(
 55 |     nms_pre=1000,
 56 |     min_bbox_size=0,
 57 |     score_thr=0.05,
 58 |     nms=dict(type='nms', iou_thr=0.5),
 59 |     max_per_img=100)
 60 | # dataset settings
 61 | dataset_type = 'CocoDataset'
 62 | data_root = 'data/coco/'
 63 | img_norm_cfg = dict(
 64 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 65 | img_size = 640
 66 | train_pipeline = [
 67 |     dict(type='LoadImageFromFile'),
 68 |     dict(type='LoadAnnotations', with_bbox=True),
 69 |     dict(
 70 |         type='Resize',
 71 |         img_scale=(img_size, img_size),
 72 |         ratio_range=(0.1, 2.0),
 73 |         keep_ratio=True),
 74 |     dict(type='RandomCrop', crop_size=(img_size, img_size)),
 75 |     dict(type='RandomFlip', flip_ratio=0.5),
 76 |     dict(type='Normalize', **img_norm_cfg),
 77 |     dict(type='Pad', size=(img_size, img_size)),
 78 |     dict(type='DefaultFormatBundle'),
 79 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 80 | ]
 81 | test_pipeline = [
 82 |     dict(type='LoadImageFromFile'),
 83 |     dict(
 84 |         type='MultiScaleFlipAug',
 85 |         img_scale=(img_size, img_size),
 86 |         flip=False,
 87 |         transforms=[
 88 |             dict(type='Resize', keep_ratio=True),
 89 |             dict(type='RandomFlip'),
 90 |             dict(type='Normalize', **img_norm_cfg),
 91 |             dict(type='Pad', size=(img_size, img_size)),
 92 |             dict(type='ImageToTensor', keys=['img']),
 93 |             dict(type='Collect', keys=['img']),
 94 |         ])
 95 | ]
 96 | data = dict(
 97 |     imgs_per_gpu=8,
 98 |     workers_per_gpu=4,
 99 |     train=dict(
100 |         type=dataset_type,
101 |         ann_file=data_root + 'annotations/instances_train2017.json',
102 |         img_prefix=data_root + 'train2017/',
103 |         pipeline=train_pipeline),
104 |     val=dict(
105 |         type=dataset_type,
106 |         ann_file=data_root + 'annotations/instances_val2017.json',
107 |         img_prefix=data_root + 'val2017/',
108 |         pipeline=test_pipeline),
109 |     test=dict(
110 |         type=dataset_type,
111 |         ann_file=data_root + 'annotations/instances_val2017.json',
112 |         img_prefix=data_root + 'val2017/',
113 |         pipeline=test_pipeline))
114 | evaluation = dict(interval=1, metric='bbox')
115 | # optimizer
116 | optimizer = dict(
117 |     type='SGD',
118 |     lr=0.08,
119 |     momentum=0.9,
120 |     weight_decay=4e-5,
121 |     paramwise_options=dict(norm_decay_mult=0))
122 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
123 | # learning policy
124 | lr_config = dict(
125 |     policy='cosine',
126 |     warmup='linear',
127 |     warmup_iters=2000,
128 |     warmup_ratio=0.008)
129 | checkpoint_config = dict(interval=10)
130 | # yapf:disable
131 | log_config = dict(
132 |     interval=50,
133 |     hooks=[
134 |         dict(type='TextLoggerHook'),
135 |         # dict(type='TensorboardLoggerHook')
136 |     ])
137 | # yapf:enable
138 | # runtime settings
139 | total_epochs = 300
140 | dist_params = dict(backend='nccl')
141 | log_level = 'INFO'
142 | work_dir = './efficientdet_d1'
143 | load_from = None
144 | resume_from = None
145 | workflow = [('train', 1)]
146 | 


--------------------------------------------------------------------------------
/configs/efficientdet/efficientdet_d2_8gpu.py:
--------------------------------------------------------------------------------
  1 | cudnn_benchmark = True
  2 | # model settings
  3 | norm_cfg = dict(type='BN', momentum=0.01, eps=1e-3, requires_grad=True)  # using SyncBN during training
  4 | model = dict(
  5 |     type='RetinaNet',
  6 |     pretrained='pretrained/efficientnet-b2-8bb5946d.pth',
  7 |     backbone=dict(
  8 |         type='EfficientNet',
  9 |         arch='efficientnet-b2',
 10 |         out_indices=[4, 6, 8],
 11 |         norm_cfg=norm_cfg,
 12 |         norm_eval=False),
 13 |     neck=dict(
 14 |         type='BiFPN',
 15 |         in_channels=[40, 112, 320],
 16 |         target_size_list=[96, 48, 24, 12, 6],
 17 |         out_channels=112,
 18 |         stack=5,
 19 |         start_level=0,
 20 |         norm_cfg=norm_cfg,
 21 |         num_outs=5),
 22 |     bbox_head=dict(
 23 |         type='RetinaSepConvHead',
 24 |         num_classes=81,
 25 |         num_ins=5,
 26 |         in_channels=112,
 27 |         stacked_convs=3,
 28 |         feat_channels=112,
 29 |         octave_base_scale=4,
 30 |         scales_per_octave=3,
 31 |         anchor_ratios=[0.5, 1.0, 2.0],
 32 |         anchor_strides=[8, 16, 32, 64, 128],
 33 |         target_means=[.0, .0, .0, .0],
 34 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 35 |         norm_cfg=norm_cfg,
 36 |         loss_cls=dict(
 37 |             type='FocalLoss',
 38 |             use_sigmoid=True,
 39 |             gamma=1.5,
 40 |             alpha=0.25,
 41 |             loss_weight=1.0),
 42 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
 43 | # training and testing settings
 44 | train_cfg = dict(
 45 |     assigner=dict(
 46 |         type='MaxIoUAssigner',
 47 |         pos_iou_thr=0.5,
 48 |         neg_iou_thr=0.5,
 49 |         min_pos_iou=0,
 50 |         ignore_iof_thr=-1),
 51 |     allowed_border=-1,
 52 |     pos_weight=-1,
 53 |     debug=False)
 54 | test_cfg = dict(
 55 |     nms_pre=1000,
 56 |     min_bbox_size=0,
 57 |     score_thr=0.05,
 58 |     nms=dict(type='nms', iou_thr=0.5),
 59 |     max_per_img=100)
 60 | # dataset settings
 61 | dataset_type = 'CocoDataset'
 62 | data_root = 'data/coco/'
 63 | img_norm_cfg = dict(
 64 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 65 | img_size = 768
 66 | train_pipeline = [
 67 |     dict(type='LoadImageFromFile'),
 68 |     dict(type='LoadAnnotations', with_bbox=True),
 69 |     dict(
 70 |         type='Resize',
 71 |         img_scale=(img_size, img_size),
 72 |         ratio_range=(0.1, 2.0),
 73 |         keep_ratio=True),
 74 |     dict(type='RandomCrop', crop_size=(img_size, img_size)),
 75 |     dict(type='RandomFlip', flip_ratio=0.5),
 76 |     dict(type='Normalize', **img_norm_cfg),
 77 |     dict(type='Pad', size=(img_size, img_size)),
 78 |     dict(type='DefaultFormatBundle'),
 79 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 80 | ]
 81 | test_pipeline = [
 82 |     dict(type='LoadImageFromFile'),
 83 |     dict(
 84 |         type='MultiScaleFlipAug',
 85 |         img_scale=(img_size, img_size),
 86 |         flip=False,
 87 |         transforms=[
 88 |             dict(type='Resize', keep_ratio=True),
 89 |             dict(type='RandomFlip'),
 90 |             dict(type='Normalize', **img_norm_cfg),
 91 |             dict(type='Pad', size=(img_size, img_size)),
 92 |             dict(type='ImageToTensor', keys=['img']),
 93 |             dict(type='Collect', keys=['img']),
 94 |         ])
 95 | ]
 96 | data = dict(
 97 |     imgs_per_gpu=8,
 98 |     workers_per_gpu=4,
 99 |     train=dict(
100 |         type=dataset_type,
101 |         ann_file=data_root + 'annotations/instances_train2017.json',
102 |         img_prefix=data_root + 'train2017/',
103 |         pipeline=train_pipeline),
104 |     val=dict(
105 |         type=dataset_type,
106 |         ann_file=data_root + 'annotations/instances_val2017.json',
107 |         img_prefix=data_root + 'val2017/',
108 |         pipeline=test_pipeline),
109 |     test=dict(
110 |         type=dataset_type,
111 |         ann_file=data_root + 'annotations/instances_val2017.json',
112 |         img_prefix=data_root + 'val2017/',
113 |         pipeline=test_pipeline))
114 | evaluation = dict(interval=1, metric='bbox')
115 | # optimizer
116 | optimizer = dict(
117 |     type='SGD',
118 |     lr=0.08,
119 |     momentum=0.9,
120 |     weight_decay=4e-5,
121 |     paramwise_options=dict(norm_decay_mult=0))
122 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
123 | # learning policy
124 | lr_config = dict(
125 |     policy='cosine',
126 |     warmup='linear',
127 |     warmup_iters=2000,
128 |     warmup_ratio=0.008)
129 | checkpoint_config = dict(interval=10)
130 | # yapf:disable
131 | log_config = dict(
132 |     interval=50,
133 |     hooks=[
134 |         dict(type='TextLoggerHook'),
135 |         # dict(type='TensorboardLoggerHook')
136 |     ])
137 | # yapf:enable
138 | # runtime settings
139 | total_epochs = 300
140 | dist_params = dict(backend='nccl')
141 | log_level = 'INFO'
142 | work_dir = './efficientdet_d2'
143 | load_from = None
144 | resume_from = None
145 | workflow = [('train', 1)]
146 | 


--------------------------------------------------------------------------------
/configs/efficientdet/efficientdet_d3_8gpu.py:
--------------------------------------------------------------------------------
  1 | cudnn_benchmark = True
  2 | # model settings
  3 | norm_cfg = dict(type='BN', momentum=0.01, eps=1e-3, requires_grad=True)  # using SyncBN during training
  4 | model = dict(
  5 |     type='RetinaNet',
  6 |     pretrained='pretrained/efficientnet-b3-5fb5a3c3.pth',
  7 |     backbone=dict(
  8 |         type='EfficientNet',
  9 |         arch='efficientnet-b3',
 10 |         out_indices=[4, 6, 8],
 11 |         norm_cfg=norm_cfg,
 12 |         norm_eval=False),
 13 |     neck=dict(
 14 |         type='BiFPN',
 15 |         in_channels=[48, 136, 384],
 16 |         target_size_list=[112, 56, 28, 14, 7],
 17 |         out_channels=160,
 18 |         stack=6,
 19 |         start_level=0,
 20 |         norm_cfg=norm_cfg,
 21 |         num_outs=5),
 22 |     bbox_head=dict(
 23 |         type='RetinaSepConvHead',
 24 |         num_classes=81,
 25 |         num_ins=5,
 26 |         in_channels=160,
 27 |         stacked_convs=4,
 28 |         feat_channels=160,
 29 |         octave_base_scale=4,
 30 |         scales_per_octave=3,
 31 |         anchor_ratios=[0.5, 1.0, 2.0],
 32 |         anchor_strides=[8, 16, 32, 64, 128],
 33 |         target_means=[.0, .0, .0, .0],
 34 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 35 |         norm_cfg=norm_cfg,
 36 |         loss_cls=dict(
 37 |             type='FocalLoss',
 38 |             use_sigmoid=True,
 39 |             gamma=1.5,
 40 |             alpha=0.25,
 41 |             loss_weight=1.0),
 42 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
 43 | # training and testing settings
 44 | train_cfg = dict(
 45 |     assigner=dict(
 46 |         type='MaxIoUAssigner',
 47 |         pos_iou_thr=0.5,
 48 |         neg_iou_thr=0.5,
 49 |         min_pos_iou=0,
 50 |         ignore_iof_thr=-1),
 51 |     allowed_border=-1,
 52 |     pos_weight=-1,
 53 |     debug=False)
 54 | test_cfg = dict(
 55 |     nms_pre=1000,
 56 |     min_bbox_size=0,
 57 |     score_thr=0.05,
 58 |     nms=dict(type='nms', iou_thr=0.5),
 59 |     max_per_img=100)
 60 | # dataset settings
 61 | dataset_type = 'CocoDataset'
 62 | data_root = 'data/coco/'
 63 | img_norm_cfg = dict(
 64 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 65 | img_size = 896
 66 | train_pipeline = [
 67 |     dict(type='LoadImageFromFile'),
 68 |     dict(type='LoadAnnotations', with_bbox=True),
 69 |     dict(
 70 |         type='Resize',
 71 |         img_scale=(img_size, img_size),
 72 |         ratio_range=(0.1, 2.0),
 73 |         keep_ratio=True),
 74 |     dict(type='RandomCrop', crop_size=(img_size, img_size)),
 75 |     dict(type='RandomFlip', flip_ratio=0.5),
 76 |     dict(type='Normalize', **img_norm_cfg),
 77 |     dict(type='Pad', size=(img_size, img_size)),
 78 |     dict(type='DefaultFormatBundle'),
 79 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 80 | ]
 81 | test_pipeline = [
 82 |     dict(type='LoadImageFromFile'),
 83 |     dict(
 84 |         type='MultiScaleFlipAug',
 85 |         img_scale=(img_size, img_size),
 86 |         flip=False,
 87 |         transforms=[
 88 |             dict(type='Resize', keep_ratio=True),
 89 |             dict(type='RandomFlip'),
 90 |             dict(type='Normalize', **img_norm_cfg),
 91 |             dict(type='Pad', size=(img_size, img_size)),
 92 |             dict(type='ImageToTensor', keys=['img']),
 93 |             dict(type='Collect', keys=['img']),
 94 |         ])
 95 | ]
 96 | data = dict(
 97 |     imgs_per_gpu=8,
 98 |     workers_per_gpu=4,
 99 |     train=dict(
100 |         type=dataset_type,
101 |         ann_file=data_root + 'annotations/instances_train2017.json',
102 |         img_prefix=data_root + 'train2017/',
103 |         pipeline=train_pipeline),
104 |     val=dict(
105 |         type=dataset_type,
106 |         ann_file=data_root + 'annotations/instances_val2017.json',
107 |         img_prefix=data_root + 'val2017/',
108 |         pipeline=test_pipeline),
109 |     test=dict(
110 |         type=dataset_type,
111 |         ann_file=data_root + 'annotations/instances_val2017.json',
112 |         img_prefix=data_root + 'val2017/',
113 |         pipeline=test_pipeline))
114 | evaluation = dict(interval=1, metric='bbox')
115 | # optimizer
116 | optimizer = dict(
117 |     type='SGD',
118 |     lr=0.08,
119 |     momentum=0.9,
120 |     weight_decay=4e-5,
121 |     paramwise_options=dict(norm_decay_mult=0))
122 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
123 | # learning policy
124 | lr_config = dict(
125 |     policy='cosine',
126 |     warmup='linear',
127 |     warmup_iters=2000,
128 |     warmup_ratio=0.008)
129 | checkpoint_config = dict(interval=10)
130 | # yapf:disable
131 | log_config = dict(
132 |     interval=50,
133 |     hooks=[
134 |         dict(type='TextLoggerHook'),
135 |         # dict(type='TensorboardLoggerHook')
136 |     ])
137 | # yapf:enable
138 | # runtime settings
139 | total_epochs = 300
140 | dist_params = dict(backend='nccl')
141 | log_level = 'INFO'
142 | work_dir = './efficientdet_d3'
143 | load_from = None
144 | resume_from = None
145 | workflow = [('train', 1)]
146 | 


--------------------------------------------------------------------------------
/configs/efficientdet/efficientdet_d4_8gpu.py:
--------------------------------------------------------------------------------
  1 | cudnn_benchmark = True
  2 | # model settings
  3 | norm_cfg = dict(type='BN', momentum=0.01, eps=1e-3, requires_grad=True)  # using SyncBN during training
  4 | model = dict(
  5 |     type='RetinaNet',
  6 |     pretrained='pretrained/efficientnet-b4-6ed6700e.pth',
  7 |     backbone=dict(
  8 |         type='EfficientNet',
  9 |         arch='efficientnet-b4',
 10 |         out_indices=[4, 6, 8],
 11 |         norm_cfg=norm_cfg,
 12 |         norm_eval=False),
 13 |     neck=dict(
 14 |         type='BiFPN',
 15 |         in_channels=[56, 160, 448],
 16 |         target_size_list=[128, 64, 32, 16, 8],
 17 |         out_channels=224,
 18 |         stack=7,
 19 |         start_level=0,
 20 |         norm_cfg=norm_cfg,
 21 |         num_outs=5),
 22 |     bbox_head=dict(
 23 |         type='RetinaSepConvHead',
 24 |         num_classes=81,
 25 |         num_ins=5,
 26 |         in_channels=224,
 27 |         stacked_convs=4,
 28 |         feat_channels=224,
 29 |         octave_base_scale=4,
 30 |         scales_per_octave=3,
 31 |         anchor_ratios=[0.5, 1.0, 2.0],
 32 |         anchor_strides=[8, 16, 32, 64, 128],
 33 |         target_means=[.0, .0, .0, .0],
 34 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 35 |         norm_cfg=norm_cfg,
 36 |         loss_cls=dict(
 37 |             type='FocalLoss',
 38 |             use_sigmoid=True,
 39 |             gamma=1.5,
 40 |             alpha=0.25,
 41 |             loss_weight=1.0),
 42 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
 43 | # training and testing settings
 44 | train_cfg = dict(
 45 |     assigner=dict(
 46 |         type='MaxIoUAssigner',
 47 |         pos_iou_thr=0.5,
 48 |         neg_iou_thr=0.5,
 49 |         min_pos_iou=0,
 50 |         ignore_iof_thr=-1),
 51 |     allowed_border=-1,
 52 |     pos_weight=-1,
 53 |     debug=False)
 54 | test_cfg = dict(
 55 |     nms_pre=1000,
 56 |     min_bbox_size=0,
 57 |     score_thr=0.05,
 58 |     nms=dict(type='nms', iou_thr=0.5),
 59 |     max_per_img=100)
 60 | # dataset settings
 61 | dataset_type = 'CocoDataset'
 62 | data_root = 'data/coco/'
 63 | img_norm_cfg = dict(
 64 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 65 | img_size = 1024
 66 | train_pipeline = [
 67 |     dict(type='LoadImageFromFile'),
 68 |     dict(type='LoadAnnotations', with_bbox=True),
 69 |     dict(
 70 |         type='Resize',
 71 |         img_scale=(img_size, img_size),
 72 |         ratio_range=(0.1, 2.0),
 73 |         keep_ratio=True),
 74 |     dict(type='RandomCrop', crop_size=(img_size, img_size)),
 75 |     dict(type='RandomFlip', flip_ratio=0.5),
 76 |     dict(type='Normalize', **img_norm_cfg),
 77 |     dict(type='Pad', size=(img_size, img_size)),
 78 |     dict(type='DefaultFormatBundle'),
 79 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 80 | ]
 81 | test_pipeline = [
 82 |     dict(type='LoadImageFromFile'),
 83 |     dict(
 84 |         type='MultiScaleFlipAug',
 85 |         img_scale=(img_size, img_size),
 86 |         flip=False,
 87 |         transforms=[
 88 |             dict(type='Resize', keep_ratio=True),
 89 |             dict(type='RandomFlip'),
 90 |             dict(type='Normalize', **img_norm_cfg),
 91 |             dict(type='Pad', size=(img_size, img_size)),
 92 |             dict(type='ImageToTensor', keys=['img']),
 93 |             dict(type='Collect', keys=['img']),
 94 |         ])
 95 | ]
 96 | data = dict(
 97 |     imgs_per_gpu=8,
 98 |     workers_per_gpu=4,
 99 |     train=dict(
100 |         type=dataset_type,
101 |         ann_file=data_root + 'annotations/instances_train2017.json',
102 |         img_prefix=data_root + 'train2017/',
103 |         pipeline=train_pipeline),
104 |     val=dict(
105 |         type=dataset_type,
106 |         ann_file=data_root + 'annotations/instances_val2017.json',
107 |         img_prefix=data_root + 'val2017/',
108 |         pipeline=test_pipeline),
109 |     test=dict(
110 |         type=dataset_type,
111 |         ann_file=data_root + 'annotations/instances_val2017.json',
112 |         img_prefix=data_root + 'val2017/',
113 |         pipeline=test_pipeline))
114 | evaluation = dict(interval=1, metric='bbox')
115 | # optimizer
116 | optimizer = dict(
117 |     type='SGD',
118 |     lr=0.08,
119 |     momentum=0.9,
120 |     weight_decay=4e-5,
121 |     paramwise_options=dict(norm_decay_mult=0))
122 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
123 | # learning policy
124 | lr_config = dict(
125 |     policy='cosine',
126 |     warmup='linear',
127 |     warmup_iters=2000,
128 |     warmup_ratio=0.008)
129 | checkpoint_config = dict(interval=10)
130 | # yapf:disable
131 | log_config = dict(
132 |     interval=50,
133 |     hooks=[
134 |         dict(type='TextLoggerHook'),
135 |         # dict(type='TensorboardLoggerHook')
136 |     ])
137 | # yapf:enable
138 | # runtime settings
139 | total_epochs = 300
140 | dist_params = dict(backend='nccl')
141 | log_level = 'INFO'
142 | work_dir = './efficientdet_d4'
143 | load_from = None
144 | resume_from = None
145 | workflow = [('train', 1)]
146 | 


--------------------------------------------------------------------------------
/configs/efficientdet/efficientdet_d0_4gpu.py:
--------------------------------------------------------------------------------
  1 | cudnn_benchmark = True
  2 | # model settings
  3 | norm_cfg = dict(type='SyncBN', momentum=0.01, eps=1e-3, requires_grad=True)  # using SyncBN during training
  4 | model = dict(
  5 |     type='RetinaNet',
  6 |     pretrained='pretrained/adv-efficientnet-b0-b64d5a18.pth',
  7 |     backbone=dict(
  8 |         type='EfficientNet',
  9 |         arch='efficientnet-b0',
 10 |         out_indices=[4, 6, 8],
 11 |         norm_cfg=norm_cfg,
 12 |         norm_eval=False),
 13 |     neck=dict(
 14 |         type='BiFPN',
 15 |         in_channels=[40, 112, 320],
 16 |         target_size_list=[64, 32, 16, 8, 4],
 17 |         out_channels=64,
 18 |         stack=3,
 19 |         start_level=0,
 20 |         norm_cfg=norm_cfg,
 21 |         num_outs=5),
 22 |     bbox_head=dict(
 23 |         type='RetinaSepConvHead',
 24 |         num_classes=81,
 25 |         num_ins=5,
 26 |         in_channels=64,
 27 |         stacked_convs=3,
 28 |         feat_channels=64,
 29 |         octave_base_scale=4,
 30 |         scales_per_octave=3,
 31 |         anchor_ratios=[0.5, 1.0, 2.0],
 32 |         anchor_strides=[8, 16, 32, 64, 128],
 33 |         target_means=[.0, .0, .0, .0],
 34 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 35 |         norm_cfg=norm_cfg,
 36 |         loss_cls=dict(
 37 |             type='FocalLoss',
 38 |             use_sigmoid=True,
 39 |             gamma=1.5,
 40 |             alpha=0.25,
 41 |             loss_weight=1.0),
 42 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
 43 | # training and testing settings
 44 | train_cfg = dict(
 45 |     assigner=dict(
 46 |         type='MaxIoUAssigner',
 47 |         pos_iou_thr=0.5,
 48 |         neg_iou_thr=0.5,
 49 |         min_pos_iou=0,
 50 |         ignore_iof_thr=-1),
 51 |     allowed_border=-1,
 52 |     pos_weight=-1,
 53 |     debug=False)
 54 | test_cfg = dict(
 55 |     nms_pre=1000,
 56 |     min_bbox_size=0,
 57 |     score_thr=0.05,
 58 |     nms=dict(type='nms', iou_thr=0.5),
 59 |     max_per_img=100)
 60 | # dataset settings
 61 | dataset_type = 'CocoDataset'
 62 | data_root = 'data/coco/'
 63 | img_norm_cfg = dict(
 64 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 65 | img_size = 512
 66 | train_pipeline = [
 67 |     dict(type='LoadImageFromFile'),
 68 |     dict(type='LoadAnnotations', with_bbox=True),
 69 |     dict(
 70 |         type='Resize',
 71 |         img_scale=(img_size, img_size),
 72 |         ratio_range=(0.1, 2.0),
 73 |         keep_ratio=True),
 74 |     dict(type='RandomCrop', crop_size=(img_size, img_size)),
 75 |     dict(type='RandomFlip', flip_ratio=0.5),
 76 |     dict(type='Normalize', **img_norm_cfg),
 77 |     dict(type='Pad', size=(img_size, img_size)),
 78 |     dict(type='DefaultFormatBundle'),
 79 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 80 | ]
 81 | test_pipeline = [
 82 |     dict(type='LoadImageFromFile'),
 83 |     dict(
 84 |         type='MultiScaleFlipAug',
 85 |         img_scale=(img_size, img_size),
 86 |         flip=False,
 87 |         transforms=[
 88 |             dict(type='Resize', keep_ratio=True),
 89 |             dict(type='RandomFlip'),
 90 |             dict(type='Normalize', **img_norm_cfg),
 91 |             dict(type='Pad', size=(img_size, img_size)),
 92 |             dict(type='ImageToTensor', keys=['img']),
 93 |             dict(type='Collect', keys=['img']),
 94 |         ])
 95 | ]
 96 | data = dict(
 97 |     imgs_per_gpu=16,
 98 |     workers_per_gpu=4,
 99 |     train=dict(
100 |         type=dataset_type,
101 |         ann_file=data_root + 'annotations/instances_train2017.json',
102 |         img_prefix=data_root + 'train2017/',
103 |         pipeline=train_pipeline),
104 |     val=dict(
105 |         type=dataset_type,
106 |         ann_file=data_root + 'annotations/instances_val2017.json',
107 |         img_prefix=data_root + 'val2017/',
108 |         pipeline=test_pipeline),
109 |     test=dict(
110 |         type=dataset_type,
111 |         ann_file=data_root + 'annotations/instances_val2017.json',
112 |         img_prefix=data_root + 'val2017/',
113 |         pipeline=test_pipeline))
114 | evaluation = dict(interval=1, metric='bbox')
115 | # optimizer
116 | optimizer = dict(
117 |     type='SGD',
118 |     lr=0.08,
119 |     momentum=0.9,
120 |     weight_decay=4e-5,
121 |     paramwise_options=dict(norm_decay_mult=0))
122 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
123 | # learning policy
124 | lr_config = dict(
125 |     policy='cosine',
126 |     warmup='linear',
127 |     warmup_iters=2000,
128 |     warmup_ratio=0.008)
129 | checkpoint_config = dict(interval=10)
130 | # yapf:disable
131 | log_config = dict(
132 |     interval=50,
133 |     hooks=[
134 |         dict(type='TextLoggerHook'),
135 |         # dict(type='TensorboardLoggerHook')
136 |     ])
137 | # yapf:enable
138 | # runtime settings
139 | total_epochs = 300
140 | dist_params = dict(backend='nccl')
141 | log_level = 'INFO'
142 | work_dir = './efficientdet_d0'
143 | load_from = None
144 | resume_from = None
145 | workflow = [('train', 1)]
146 | 


--------------------------------------------------------------------------------
/configs/efficientdet/efficientdet_d5_8gpu.py:
--------------------------------------------------------------------------------
  1 | cudnn_benchmark = True
  2 | # model settings
  3 | norm_cfg = dict(type='BN', momentum=0.01, eps=1e-3, requires_grad=True)  # using SyncBN during training
  4 | model = dict(
  5 |     type='RetinaNet',
  6 |     pretrained='pretrained/efficientnet-b5-b6417697.pth',
  7 |     backbone=dict(
  8 |         type='EfficientNet',
  9 |         arch='efficientnet-b5',
 10 |         out_indices=[4, 6, 8],
 11 |         norm_cfg=norm_cfg,
 12 |         norm_eval=False),
 13 |     neck=dict(
 14 |         type='BiFPN',
 15 |         in_channels=[64, 176, 512],
 16 |         target_size_list=[160, 80, 40, 20, 10],
 17 |         out_channels=288,
 18 |         stack=7,
 19 |         start_level=0,
 20 |         norm_cfg=norm_cfg,
 21 |         num_outs=5),
 22 |     bbox_head=dict(
 23 |         type='RetinaSepConvHead',
 24 |         num_classes=81,
 25 |         num_ins=5,
 26 |         in_channels=288,
 27 |         stacked_convs=4,
 28 |         feat_channels=288,
 29 |         octave_base_scale=4,
 30 |         scales_per_octave=3,
 31 |         anchor_ratios=[0.5, 1.0, 2.0],
 32 |         anchor_strides=[8, 16, 32, 64, 128],
 33 |         target_means=[.0, .0, .0, .0],
 34 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 35 |         norm_cfg=norm_cfg,
 36 |         loss_cls=dict(
 37 |             type='FocalLoss',
 38 |             use_sigmoid=True,
 39 |             gamma=1.5,
 40 |             alpha=0.25,
 41 |             loss_weight=1.0),
 42 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
 43 | # training and testing settings
 44 | train_cfg = dict(
 45 |     assigner=dict(
 46 |         type='MaxIoUAssigner',
 47 |         pos_iou_thr=0.5,
 48 |         neg_iou_thr=0.5,
 49 |         min_pos_iou=0,
 50 |         ignore_iof_thr=-1),
 51 |     allowed_border=-1,
 52 |     pos_weight=-1,
 53 |     debug=False)
 54 | test_cfg = dict(
 55 |     nms_pre=1000,
 56 |     min_bbox_size=0,
 57 |     score_thr=0.05,
 58 |     nms=dict(type='nms', iou_thr=0.5),
 59 |     max_per_img=100)
 60 | # dataset settings
 61 | dataset_type = 'CocoDataset'
 62 | data_root = 'data/coco/'
 63 | img_norm_cfg = dict(
 64 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 65 | img_size = 1280
 66 | train_pipeline = [
 67 |     dict(type='LoadImageFromFile'),
 68 |     dict(type='LoadAnnotations', with_bbox=True),
 69 |     dict(
 70 |         type='Resize',
 71 |         img_scale=(img_size, img_size),
 72 |         ratio_range=(0.1, 2.0),
 73 |         keep_ratio=True),
 74 |     dict(type='RandomCrop', crop_size=(img_size, img_size)),
 75 |     dict(type='RandomFlip', flip_ratio=0.5),
 76 |     dict(type='Normalize', **img_norm_cfg),
 77 |     dict(type='Pad', size=(img_size, img_size)),
 78 |     dict(type='DefaultFormatBundle'),
 79 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 80 | ]
 81 | test_pipeline = [
 82 |     dict(type='LoadImageFromFile'),
 83 |     dict(
 84 |         type='MultiScaleFlipAug',
 85 |         img_scale=(img_size, img_size),
 86 |         flip=False,
 87 |         transforms=[
 88 |             dict(type='Resize', keep_ratio=True),
 89 |             dict(type='RandomFlip'),
 90 |             dict(type='Normalize', **img_norm_cfg),
 91 |             dict(type='Pad', size=(img_size, img_size)),
 92 |             dict(type='ImageToTensor', keys=['img']),
 93 |             dict(type='Collect', keys=['img']),
 94 |         ])
 95 | ]
 96 | data = dict(
 97 |     imgs_per_gpu=8,
 98 |     workers_per_gpu=4,
 99 |     train=dict(
100 |         type=dataset_type,
101 |         ann_file=data_root + 'annotations/instances_train2017.json',
102 |         img_prefix=data_root + 'train2017/',
103 |         pipeline=train_pipeline),
104 |     val=dict(
105 |         type=dataset_type,
106 |         ann_file=data_root + 'annotations/instances_val2017.json',
107 |         img_prefix=data_root + 'val2017/',
108 |         pipeline=test_pipeline),
109 |     test=dict(
110 |         type=dataset_type,
111 |         ann_file=data_root + 'annotations/instances_val2017.json',
112 |         img_prefix=data_root + 'val2017/',
113 |         pipeline=test_pipeline))
114 | evaluation = dict(interval=1, metric='bbox')
115 | # optimizer
116 | optimizer = dict(
117 |     type='SGD',
118 |     lr=0.08,
119 |     momentum=0.9,
120 |     weight_decay=4e-5,
121 |     paramwise_options=dict(norm_decay_mult=0))
122 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
123 | # learning policy
124 | lr_config = dict(
125 |     policy='cosine',
126 |     warmup='linear',
127 |     warmup_iters=2000,
128 |     warmup_ratio=0.008)
129 | checkpoint_config = dict(interval=10)
130 | # yapf:disable
131 | log_config = dict(
132 |     interval=50,
133 |     hooks=[
134 |         dict(type='TextLoggerHook'),
135 |         # dict(type='TensorboardLoggerHook')
136 |     ])
137 | # yapf:enable
138 | # runtime settings
139 | total_epochs = 300
140 | dist_params = dict(backend='nccl')
141 | log_level = 'INFO'
142 | work_dir = './efficientdet_d5'
143 | load_from = None
144 | resume_from = None
145 | workflow = [('train', 1)]
146 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/retina_sepconv_head.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch.nn as nn
  3 | from mmcv.cnn import kaiming_init
  4 | 
  5 | from ..registry import HEADS
  6 | from ..utils import bias_init_with_prob, SeparableConv2d
  7 | from .anchor_head import AnchorHead
  8 | 
  9 | 
 10 | @HEADS.register_module
 11 | class RetinaSepConvHead(AnchorHead):
 12 |     """"RetinaHead with separate BN and separable conv.
 13 | 
 14 |     In RetinaHead, conv/norm layers are shared across different FPN levels,
 15 |     while in RetinaSepBNHead, conv layers are shared across different FPN
 16 |     levels, but BN layers are separated.
 17 | 
 18 |     In EfficientDet, using separable conv as conv module.
 19 |     """
 20 | 
 21 |     def __init__(self,
 22 |                  num_classes,
 23 |                  num_ins,
 24 |                  in_channels,
 25 |                  stacked_convs=4,
 26 |                  octave_base_scale=4,
 27 |                  scales_per_octave=3,
 28 |                  conv_cfg=None,
 29 |                  norm_cfg=None,
 30 |                  **kwargs):
 31 |         self.stacked_convs = stacked_convs
 32 |         self.octave_base_scale = octave_base_scale
 33 |         self.scales_per_octave = scales_per_octave
 34 |         self.conv_cfg = conv_cfg
 35 |         self.norm_cfg = norm_cfg
 36 |         self.num_ins = num_ins
 37 |         octave_scales = np.array(
 38 |             [2 ** (i / scales_per_octave) for i in range(scales_per_octave)])
 39 |         anchor_scales = octave_scales * octave_base_scale
 40 |         super(RetinaSepConvHead, self).__init__(
 41 |             num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
 42 | 
 43 |     def _init_layers(self):
 44 |         self.relu = nn.ReLU(inplace=True)
 45 |         self.cls_convs = nn.ModuleList()
 46 |         self.reg_convs = nn.ModuleList()
 47 |         for i in range(self.num_ins):
 48 |             cls_convs = nn.ModuleList()
 49 |             reg_convs = nn.ModuleList()
 50 |             for i in range(self.stacked_convs):
 51 |                 chn = self.in_channels if i == 0 else self.feat_channels
 52 |                 cls_convs.append(
 53 |                     SeparableConv2d(
 54 |                         chn,
 55 |                         self.feat_channels,
 56 |                         3,
 57 |                         stride=1,
 58 |                         padding=1,
 59 |                         activation="Swish",
 60 |                         bias=True,
 61 |                         norm_cfg=self.norm_cfg))
 62 |                 reg_convs.append(
 63 |                     SeparableConv2d(
 64 |                         chn,
 65 |                         self.feat_channels,
 66 |                         3,
 67 |                         stride=1,
 68 |                         padding=1,
 69 |                         activation="Swish",
 70 |                         bias=True,
 71 |                         norm_cfg=self.norm_cfg))
 72 |             self.cls_convs.append(cls_convs)
 73 |             self.reg_convs.append(reg_convs)
 74 |         for i in range(self.stacked_convs):
 75 |             for j in range(1, self.num_ins):
 76 |                 self.cls_convs[j][i].depthwise = self.cls_convs[0][i].depthwise
 77 |                 self.cls_convs[j][i].pointwise.conv = self.cls_convs[0][i].pointwise.conv
 78 |                 self.reg_convs[j][i].depthwise = self.reg_convs[0][i].depthwise
 79 |                 self.reg_convs[j][i].pointwise.conv = self.reg_convs[0][i].pointwise.conv
 80 |         self.retina_cls = SeparableConv2d(
 81 |             self.feat_channels,
 82 |             self.num_anchors * self.cls_out_channels,
 83 |             3,
 84 |             padding=1,
 85 |             bias=True,
 86 |             norm_cfg=None)
 87 |         self.retina_reg = SeparableConv2d(
 88 |             self.feat_channels, self.num_anchors * 4, 3, padding=1, bias=True, norm_cfg=None)
 89 | 
 90 |     def init_weights(self):
 91 |         for m in self.cls_convs[0]:
 92 |             kaiming_init(m.depthwise, mode='fan_in')
 93 |             kaiming_init(m.pointwise.conv, mode='fan_in')
 94 |         for m in self.reg_convs[0]:
 95 |             kaiming_init(m.depthwise, mode='fan_in')
 96 |             kaiming_init(m.pointwise.conv, mode='fan_in')
 97 |         bias_cls = bias_init_with_prob(0.01)
 98 |         kaiming_init(self.retina_cls.depthwise, mode='fan_in')
 99 |         kaiming_init(self.retina_cls.pointwise.conv, mode='fan_in', bias=bias_cls)
100 |         kaiming_init(self.retina_reg.depthwise, mode='fan_in')
101 |         kaiming_init(self.retina_reg.pointwise.conv, mode='fan_in')
102 | 
103 |     def forward(self, feats):
104 |         cls_scores = []
105 |         bbox_preds = []
106 |         for i, x in enumerate(feats):
107 |             cls_feat = feats[i]
108 |             reg_feat = feats[i]
109 |             for cls_conv in self.cls_convs[i]:
110 |                 cls_feat = cls_conv(cls_feat)
111 |             for reg_conv in self.reg_convs[i]:
112 |                 reg_feat = reg_conv(reg_feat)
113 |             cls_score = self.retina_cls(cls_feat)
114 |             bbox_pred = self.retina_reg(reg_feat)
115 |             cls_scores.append(cls_score)
116 |             bbox_preds.append(bbox_pred)
117 |         return cls_scores, bbox_preds
118 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/bifpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from mmcv.cnn import xavier_init
  5 | 
  6 | from mmdet.core import auto_fp16
  7 | from ..registry import NECKS
  8 | from mmdet.ops import ConvModule
  9 | from ..utils import SeparableConv2d, MemoryEfficientSwish
 10 | 
 11 | 
 12 | class WeightedMerge(nn.Module):
 13 |     def __init__(self, in_channels, out_channels, target_size, norm_cfg, apply_bn=False, eps=0.0001):
 14 |         super(WeightedMerge, self).__init__()
 15 |         self.conv = SeparableConv2d(out_channels, out_channels, 3, padding=1, norm_cfg=norm_cfg, bias=True)
 16 |         self.eps = eps
 17 |         self.num_ins = len(in_channels)
 18 |         self.weight = nn.Parameter(torch.Tensor(self.num_ins).fill_(1))
 19 |         self.relu = nn.ReLU(inplace=False)
 20 |         self.swish = MemoryEfficientSwish()
 21 |         self.resample_ops = nn.ModuleList()
 22 |         for in_c in in_channels:
 23 |             self.resample_ops.append(Resample(in_c, out_channels, target_size, norm_cfg, apply_bn))
 24 | 
 25 |     def forward(self, inputs):
 26 |         assert isinstance(inputs, list)
 27 |         assert len(inputs) == self.num_ins
 28 |         w = self.relu(self.weight)
 29 |         w /= (w.sum() + self.eps)
 30 |         x = 0
 31 |         for i in range(self.num_ins):
 32 |             x += w[i] * self.resample_ops[i](inputs[i])
 33 |         output = self.conv(self.swish(x))
 34 |         return output
 35 | 
 36 | 
 37 | class Resample(nn.Module):
 38 |     def __init__(self, in_channels, out_channels, target_size, norm_cfg, apply_bn=False):
 39 |         super(Resample, self).__init__()
 40 |         self.target_size = torch.Size([target_size, target_size])
 41 |         self.is_conv = in_channels != out_channels
 42 |         if self.is_conv:
 43 |             self.conv = ConvModule(in_channels,
 44 |                     out_channels,
 45 |                     1,
 46 |                     norm_cfg=norm_cfg if apply_bn else None,
 47 |                     bias=True,
 48 |                     act_cfg=None,
 49 |                     inplace=False)
 50 | 
 51 |     def _resize(self, x, size):
 52 |         if x.shape[-2:] == size:
 53 |             return x
 54 |         elif x.shape[-2:] < size:
 55 |             return F.interpolate(x, size=size, mode='nearest')
 56 |         else:
 57 |             assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
 58 |             kernel_size = x.shape[-1] // size[-1]
 59 |             x = F.max_pool2d(x, kernel_size=kernel_size+1, stride=kernel_size, padding=1)
 60 |             return x
 61 | 
 62 |     def forward(self, inputs):
 63 |         if self.is_conv:
 64 |             inputs = self.conv(inputs)
 65 |         return self._resize(inputs, self.target_size)
 66 | 
 67 | 
 68 | class bifpn_layer(nn.Module):
 69 |     def __init__(self,
 70 |                  in_channels,
 71 |                  out_channels,
 72 |                  target_size_list,
 73 |                  num_outs=5,
 74 |                  conv_cfg=None,
 75 |                  norm_cfg=None,
 76 |                  activation=None):
 77 |         super(bifpn_layer, self).__init__()
 78 |         assert num_outs >= 2
 79 |         self.out_channels = out_channels
 80 |         self.conv_cfg = conv_cfg
 81 |         self.norm_cfg = norm_cfg
 82 |         self.activation = activation
 83 |         self.num_outs = num_outs
 84 | 
 85 |         self.top_down_merge = nn.ModuleList()
 86 |         for i in range(self.num_outs - 1, 0, -1):
 87 |             in_channels_list = [out_channels, in_channels[i-1]] if i < self.num_outs - 1 else [in_channels[i], in_channels[i-1]]
 88 |             merge_op = WeightedMerge(in_channels_list, out_channels, target_size_list[i-1], norm_cfg, apply_bn=True)
 89 |             self.top_down_merge.append(merge_op)
 90 | 
 91 |         self.bottom_up_merge = nn.ModuleList()
 92 |         for i in range(0, self.num_outs - 1):
 93 |             in_channels_list = [out_channels, in_channels[i+1], out_channels] if i < self.num_outs - 2 else [in_channels[-1], out_channels]
 94 |             merge_op = WeightedMerge(in_channels_list, out_channels, target_size_list[i+1], norm_cfg, apply_bn=True)
 95 |             self.bottom_up_merge.append(merge_op)
 96 | 
 97 |     def forward(self, inputs):
 98 |         assert len(inputs) == self.num_outs
 99 | 
100 |         # top down merge
101 |         md_x = []
102 |         for i in range(self.num_outs - 1, 0, -1):
103 |             inputs_list = [md_x[-1], inputs[i-1]] if i < self.num_outs - 1 else [inputs[i], inputs[i-1]]
104 |             x = self.top_down_merge[self.num_outs-i-1](inputs_list)
105 |             md_x.append(x)
106 | 
107 |         # bottom up merge
108 |         outputs = md_x[::-1]
109 |         for i in range(1, self.num_outs - 1):
110 |             outputs[i] = self.bottom_up_merge[i-1]([outputs[i], inputs[i], outputs[i-1]])
111 |         outputs.append(self.bottom_up_merge[-1]([inputs[-1], outputs[-1]]))
112 |         return outputs
113 | 
114 | 
115 | @NECKS.register_module
116 | class BiFPN(nn.Module):
117 |     def __init__(self,
118 |                  in_channels,
119 |                  out_channels,
120 |                  target_size_list,
121 |                  num_outs,
122 |                  start_level=0,
123 |                  end_level=-1,
124 |                  stack=1,
125 |                  conv_cfg=None,
126 |                  norm_cfg=dict(type='BN', momentum=0.003, eps=1e-4, requires_grad=True)):
127 |         super(BiFPN, self).__init__()
128 |         assert len(in_channels) >= 3
129 |         self.in_channels = in_channels
130 |         self.out_channels = out_channels
131 |         self.num_ins = len(in_channels)
132 |         self.stack = stack
133 |         self.num_outs = num_outs
134 |         self.fp16_enabled = False
135 | 
136 |         if end_level == -1:
137 |             self.backbone_end_level = self.num_ins
138 |             assert num_outs >= self.num_ins - start_level
139 |         else:
140 |             # if end_level < inputs, no extra level is allowed
141 |             self.backbone_end_level = end_level
142 |             assert end_level <= len(in_channels)
143 |             assert num_outs == end_level - start_level
144 |         self.start_level = start_level
145 |         self.end_level = end_level
146 | 
147 |         # add extra feature layers using resampling
148 |         self.extra_ops = nn.ModuleList()
149 |         for i in range(self.backbone_end_level, self.num_outs):
150 |             in_c = in_channels[-1]
151 |             self.extra_ops.append(
152 |                 Resample(in_c, out_channels, target_size_list[i] , norm_cfg, apply_bn=True)
153 |             )
154 |             in_channels.append(out_channels)
155 | 
156 |         self.stack_bifpns = nn.ModuleList()
157 |         for _ in range(stack):
158 |             self.stack_bifpns.append(
159 |                 bifpn_layer(in_channels,
160 |                             out_channels,
161 |                             target_size_list,
162 |                             num_outs=self.num_outs,
163 |                             conv_cfg=conv_cfg,
164 |                             norm_cfg=norm_cfg))
165 |             in_channels = [out_channels] * self.num_outs
166 | 
167 |     def init_weights(self):
168 |         for m in self.modules():
169 |             if isinstance(m, SeparableConv2d):
170 |                 m.init_weights()
171 | 
172 |     @auto_fp16()
173 |     def forward(self, inputs):
174 |         outs = list(inputs)
175 |         for _, extra_op in enumerate(self.extra_ops):
176 |             outs.append(extra_op(outs[-1]))
177 | 
178 |         for _, stack_bifpn in enumerate(self.stack_bifpns):
179 |             outs = stack_bifpn(outs)
180 | 
181 |         return tuple(outs[:self.num_outs])
182 | 
183 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/efficientnet.py:
--------------------------------------------------------------------------------
  1 | # This EfficientNet code is copy from https://github.com/lukemelas/EfficientNet-PyTorch
  2 | # Thanks to the authors of this repository!
  3 | # =====================================================================================
  4 | 
  5 | import logging
  6 | import math
  7 | import collections
  8 | import re
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | from torch.nn import functional as F
 13 | from torch.utils import model_zoo
 14 | 
 15 | from functools import partial
 16 | 
 17 | from mmcv.runner import load_checkpoint
 18 | from ..utils import MemoryEfficientSwish, Swish
 19 | from mmdet.ops import build_norm_layer
 20 | 
 21 | from ..registry import BACKBONES
 22 | 
 23 | 
 24 | def round_filters(filters, global_params):
 25 |     """ Calculate and round number of filters based on depth multiplier. """
 26 |     multiplier = global_params.width_coefficient
 27 |     if not multiplier:
 28 |         return filters
 29 |     divisor = global_params.depth_divisor
 30 |     min_depth = global_params.min_depth
 31 |     filters *= multiplier
 32 |     min_depth = min_depth or divisor
 33 |     new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
 34 |     if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
 35 |         new_filters += divisor
 36 |     return int(new_filters)
 37 | 
 38 | 
 39 | def round_repeats(repeats, global_params):
 40 |     """ Round number of filters based on depth multiplier. """
 41 |     multiplier = global_params.depth_coefficient
 42 |     if not multiplier:
 43 |         return repeats
 44 |     return int(math.ceil(multiplier * repeats))
 45 | 
 46 | def drop_connect(inputs, p, training):
 47 |     """ Drop connect. """
 48 |     if not training: return inputs
 49 |     batch_size = inputs.shape[0]
 50 |     keep_prob = 1 - p
 51 |     random_tensor = keep_prob
 52 |     random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
 53 |     binary_tensor = torch.floor(random_tensor)
 54 |     output = inputs / keep_prob * binary_tensor
 55 |     return output
 56 | 
 57 | def get_same_padding_conv2d(image_size=None):
 58 |     """ Chooses static padding if you have specified an image size, and dynamic padding otherwise.
 59 |         Static padding is necessary for ONNX exporting of models. """
 60 |     if image_size is None:
 61 |         return Conv2dDynamicSamePadding
 62 |     else:
 63 |         return partial(Conv2dStaticSamePadding, image_size=image_size)
 64 | 
 65 | class Conv2dDynamicSamePadding(nn.Conv2d):
 66 |     """ 2D Convolutions like TensorFlow, for a dynamic image size """
 67 | 
 68 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
 69 |         super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
 70 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
 71 | 
 72 |     def forward(self, x):
 73 |         ih, iw = x.size()[-2:]
 74 |         kh, kw = self.weight.size()[-2:]
 75 |         sh, sw = self.stride
 76 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
 77 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
 78 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
 79 |         if pad_h > 0 or pad_w > 0:
 80 |             x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
 81 |         return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
 82 | 
 83 | class Conv2dStaticSamePadding(nn.Conv2d):
 84 |     """ 2D Convolutions like TensorFlow, for a fixed image size"""
 85 | 
 86 |     def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
 87 |         super().__init__(in_channels, out_channels, kernel_size, **kwargs)
 88 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
 89 | 
 90 |         # Calculate padding based on image size and save it
 91 |         assert image_size is not None
 92 |         ih, iw = image_size if type(image_size) == list else [image_size, image_size]
 93 |         kh, kw = self.weight.size()[-2:]
 94 |         sh, sw = self.stride
 95 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
 96 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
 97 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
 98 |         if pad_h > 0 or pad_w > 0:
 99 |             self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
100 |         else:
101 |             self.static_padding = Identity()
102 | 
103 |     def forward(self, x):
104 |         x = self.static_padding(x)
105 |         x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
106 |         return x
107 | 
108 | class Identity(nn.Module):
109 |     def __init__(self, ):
110 |         super(Identity, self).__init__()
111 | 
112 |     def forward(self, input):
113 |         return input
114 | 
115 | 
116 | class MBConvBlock(nn.Module):
117 |     """
118 |     Mobile Inverted Residual Bottleneck Block
119 | 
120 |     Args:
121 |         block_args (namedtuple): BlockArgs, see above
122 |         global_params (namedtuple): GlobalParam, see above
123 | 
124 |     Attributes:
125 |         has_se (bool): Whether the block contains a Squeeze and Excitation layer.
126 |     """
127 | 
128 |     def __init__(self, block_args, global_params, norm_cfg):
129 |         super().__init__()
130 |         self._block_args = block_args
131 |         self._bn_mom = 1 - global_params.batch_norm_momentum
132 |         self._bn_eps = global_params.batch_norm_epsilon
133 |         self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
134 |         self.id_skip = block_args.id_skip  # skip connection and drop connect
135 | 
136 |         # Get static or dynamic convolution depending on image size
137 |         Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
138 | 
139 |         # Expansion phase
140 |         inp = self._block_args.input_filters  # number of input channels
141 |         oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
142 |         if self._block_args.expand_ratio != 1:
143 |             self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
144 |             self._bn0 = build_norm_layer(norm_cfg, num_features=oup, postfix=0)[1]
145 | 
146 |         # Depthwise convolution phase
147 |         k = self._block_args.kernel_size
148 |         s = self._block_args.stride
149 |         self._depthwise_conv = Conv2d(
150 |             in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
151 |             kernel_size=k, stride=s, bias=False)
152 |         self._bn1 = build_norm_layer(norm_cfg, num_features=oup, postfix=1)[1]
153 | 
154 |         # Squeeze and Excitation layer, if desired
155 |         if self.has_se:
156 |             num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
157 |             self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
158 |             self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
159 | 
160 |         # Output phase
161 |         final_oup = self._block_args.output_filters
162 |         self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
163 |         self._bn2 = build_norm_layer(norm_cfg, num_features=final_oup, postfix=2)[1]
164 |         self._swish = MemoryEfficientSwish()
165 | 
166 |     def forward(self, inputs, drop_connect_rate=None):
167 |         """
168 |         :param inputs: input tensor
169 |         :param drop_connect_rate: drop connect rate (float, between 0 and 1)
170 |         :return: output of block
171 |         """
172 | 
173 |         # Expansion and Depthwise Convolution
174 |         x = inputs
175 |         if self._block_args.expand_ratio != 1:
176 |             x = self._swish(self._bn0(self._expand_conv(inputs)))
177 |         x = self._swish(self._bn1(self._depthwise_conv(x)))
178 | 
179 |         # Squeeze and Excitation
180 |         if self.has_se:
181 |             x_squeezed = F.adaptive_avg_pool2d(x, 1)
182 |             x_squeezed = self._se_expand(self._swish(self._se_reduce(x_squeezed)))
183 |             x = torch.sigmoid(x_squeezed) * x
184 | 
185 |         x = self._bn2(self._project_conv(x))
186 | 
187 |         # Skip connection and drop connect
188 |         input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
189 |         if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
190 |             if drop_connect_rate:
191 |                 x = drop_connect(x, p=drop_connect_rate, training=self.training)
192 |             x = x + inputs  # skip connection
193 |         return x
194 | 
195 |     def set_swish(self, memory_efficient=True):
196 |         """Sets swish function as memory efficient (for training) or standard (for export)"""
197 |         self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
198 | 
199 | 
200 | def efficientnet_params(model_name):
201 |     """ Map EfficientNet model name to parameter coefficients. """
202 |     params_dict = {
203 |         # Coefficients:   width,depth,res,dropout
204 |         'efficientnet-b0': (1.0, 1.0, 224, 0.2),
205 |         'efficientnet-b1': (1.0, 1.1, 240, 0.2),
206 |         'efficientnet-b2': (1.1, 1.2, 260, 0.3),
207 |         'efficientnet-b3': (1.2, 1.4, 300, 0.3),
208 |         'efficientnet-b4': (1.4, 1.8, 380, 0.4),
209 |         'efficientnet-b5': (1.6, 2.2, 456, 0.4),
210 |         'efficientnet-b6': (1.8, 2.6, 528, 0.5),
211 |         'efficientnet-b7': (2.0, 3.1, 600, 0.5),
212 |     }
213 |     return params_dict[model_name]
214 | 
215 | class BlockDecoder(object):
216 |     """ Block Decoder for readability, straight from the official TensorFlow repository """
217 | 
218 |     @staticmethod
219 |     def _decode_block_string(block_string):
220 |         """ Gets a block through a string notation of arguments. """
221 |         assert isinstance(block_string, str)
222 | 
223 |         ops = block_string.split('_')
224 |         options = {}
225 |         for op in ops:
226 |             splits = re.split(r'(\d.*)', op)
227 |             if len(splits) >= 2:
228 |                 key, value = splits[:2]
229 |                 options[key] = value
230 | 
231 |         # Check stride
232 |         assert (('s' in options and len(options['s']) == 1) or
233 |                 (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
234 | 
235 |         return BlockArgs(
236 |             kernel_size=int(options['k']),
237 |             num_repeat=int(options['r']),
238 |             input_filters=int(options['i']),
239 |             output_filters=int(options['o']),
240 |             expand_ratio=int(options['e']),
241 |             id_skip=('noskip' not in block_string),
242 |             se_ratio=float(options['se']) if 'se' in options else None,
243 |             stride=[int(options['s'][0])])
244 | 
245 |     @staticmethod
246 |     def _encode_block_string(block):
247 |         """Encodes a block to a string."""
248 |         args = [
249 |             'r%d' % block.num_repeat,
250 |             'k%d' % block.kernel_size,
251 |             's%d%d' % (block.strides[0], block.strides[1]),
252 |             'e%s' % block.expand_ratio,
253 |             'i%d' % block.input_filters,
254 |             'o%d' % block.output_filters
255 |         ]
256 |         if 0 < block.se_ratio <= 1:
257 |             args.append('se%s' % block.se_ratio)
258 |         if block.id_skip is False:
259 |             args.append('noskip')
260 |         return '_'.join(args)
261 | 
262 |     @staticmethod
263 |     def decode(string_list):
264 |         """
265 |         Decodes a list of string notations to specify blocks inside the network.
266 | 
267 |         :param string_list: a list of strings, each string is a notation of block
268 |         :return: a list of BlockArgs namedtuples of block args
269 |         """
270 |         assert isinstance(string_list, list)
271 |         blocks_args = []
272 |         for block_string in string_list:
273 |             blocks_args.append(BlockDecoder._decode_block_string(block_string))
274 |         return blocks_args
275 | 
276 |     @staticmethod
277 |     def encode(blocks_args):
278 |         """
279 |         Encodes a list of BlockArgs to a list of strings.
280 | 
281 |         :param blocks_args: a list of BlockArgs namedtuples of block args
282 |         :return: a list of strings, each string is a notation of block
283 |         """
284 |         block_strings = []
285 |         for block in blocks_args:
286 |             block_strings.append(BlockDecoder._encode_block_string(block))
287 |         return block_strings
288 | 
289 | def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2,
290 |                  drop_connect_rate=0.2, image_size=None, num_classes=1000):
291 |     """ Creates a efficientnet model. """
292 | 
293 |     blocks_args = [
294 |         'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
295 |         'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
296 |         'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
297 |         'r1_k3_s11_e6_i192_o320_se0.25',
298 |     ]
299 |     blocks_args = BlockDecoder.decode(blocks_args)
300 | 
301 |     global_params = GlobalParams(
302 |         batch_norm_momentum=0.99, #0.99 -> 0.997
303 |         batch_norm_epsilon=1e-3, #1e-3 -> 1e-4
304 |         dropout_rate=dropout_rate,
305 |         drop_connect_rate=drop_connect_rate,
306 |         # data_format='channels_last',  # removed, this is always true in PyTorch
307 |         num_classes=num_classes,
308 |         width_coefficient=width_coefficient,
309 |         depth_coefficient=depth_coefficient,
310 |         depth_divisor=8,
311 |         min_depth=None,
312 |         image_size=image_size,
313 |     )
314 | 
315 |     return blocks_args, global_params
316 | 
317 | 
318 | # Parameters for the entire model (stem, all blocks, and head)
319 | GlobalParams = collections.namedtuple('GlobalParams', [
320 |     'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
321 |     'num_classes', 'width_coefficient', 'depth_coefficient',
322 |     'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size'])
323 | 
324 | # Parameters for an individual model block
325 | BlockArgs = collections.namedtuple('BlockArgs', [
326 |     'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
327 |     'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
328 | 
329 | # Change namedtuple defaults
330 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
331 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
332 | 
333 | def get_model_params(model_name, override_params):
334 |     """ Get the block args and global params for a given model """
335 |     if model_name.startswith('efficientnet'):
336 |         w, d, s, p = efficientnet_params(model_name)
337 |         # note: all models have drop connect rate = 0.2
338 |         blocks_args, global_params = efficientnet(
339 |             width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
340 |     else:
341 |         raise NotImplementedError('model name is not pre-defined: %s' % model_name)
342 |     if override_params:
343 |         # ValueError will be raised here if override_params has fields not included in global_params.
344 |         global_params = global_params._replace(**override_params)
345 |     return blocks_args, global_params
346 | 
347 | ##################################################################################################################
348 | @BACKBONES.register_module
349 | class EfficientNet(nn.Module):
350 |     """
351 |     An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
352 | 
353 |     Args:
354 |         blocks_args (list): A list of BlockArgs to construct blocks
355 |         global_params (namedtuple): A set of GlobalParams shared between blocks
356 | 
357 |     Example:
358 |         model = EfficientNet.from_pretrained('efficientnet-b0')
359 | 
360 |     """
361 | 
362 |     def __init__(self, 
363 |                 arch='efficientnet-b0', 
364 |                 out_indices=[4, 5, 6, 7, 8],
365 |                 norm_cfg=dict(type="BN"),
366 |                 norm_eval=True,
367 |                 override_params=None):
368 |         super(EfficientNet, self).__init__()
369 |         self._check_model_name_is_valid(arch)
370 |         blocks_args, global_params = get_model_params(arch, override_params)
371 |         assert isinstance(blocks_args, list), 'blocks_args should be a list'
372 |         assert len(blocks_args) > 0, 'block args must be greater than 0'
373 |         self._global_params = global_params
374 |         self._blocks_args = blocks_args
375 |         self.out_indices = out_indices
376 |         self.norm_eval = norm_eval
377 | 
378 |         # Get static or dynamic convolution depending on image size
379 |         Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
380 | 
381 |         # Batch norm parameters
382 |         bn_mom = 1 - self._global_params.batch_norm_momentum
383 |         bn_eps = self._global_params.batch_norm_epsilon
384 | 
385 |         # Stem
386 |         in_channels = 3  # rgb
387 |         out_channels = round_filters(32, self._global_params)  # number of output channels
388 |         self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
389 |         self._bn0 = build_norm_layer(norm_cfg, num_features=out_channels, postfix=0)[1]
390 | 
391 |         # Build blocks
392 |         self._blocks = nn.ModuleList([])
393 |         self.per_last_stage_idx = []
394 |         cum_idx = 0
395 |         for block_args in self._blocks_args:
396 |             # Update block input and output filters based on depth multiplier.
397 |             block_args = block_args._replace(
398 |                 input_filters=round_filters(block_args.input_filters, self._global_params),
399 |                 output_filters=round_filters(block_args.output_filters, self._global_params),
400 |                 num_repeat=round_repeats(block_args.num_repeat, self._global_params)
401 |             )
402 | 
403 |             # The first block needs to take care of stride and filter size increase.
404 |             self._blocks.append(MBConvBlock(block_args, self._global_params, norm_cfg))
405 |             if block_args.num_repeat > 1:
406 |                 block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
407 |             for _ in range(block_args.num_repeat - 1):
408 |                 self._blocks.append(MBConvBlock(block_args, self._global_params, norm_cfg))
409 |             # last_stage_idx
410 |             cum_idx += block_args.num_repeat
411 |             self.per_last_stage_idx.append(cum_idx)
412 | 
413 |         # out_indices_stage_idx
414 |         self.out_stage_idx = []
415 |         for stage in self.out_indices:
416 |             self.out_stage_idx.append(self.per_last_stage_idx[stage - 2] - 1)
417 | 
418 | 
419 |         # Head
420 |         # in_channels = block_args.output_filters  # output of final block
421 |         # out_channels = round_filters(1280, self._global_params)
422 |         # self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
423 |         # self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
424 | 
425 |         # Final linear layer
426 |         # self._avg_pooling = nn.AdaptiveAvgPool2d(1)
427 |         # self._dropout = nn.Dropout(self._global_params.dropout_rate)
428 |         # self._fc = nn.Linear(out_channels, self._global_params.num_classes)
429 |         self._swish = MemoryEfficientSwish()
430 | 
431 |     def set_swish(self, memory_efficient=True):
432 |         """Sets swish function as memory efficient (for training) or standard (for export)"""
433 |         self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
434 |         for block in self._blocks:
435 |             block.set_swish(memory_efficient)
436 | 
437 |     def get_last_stage_idx(self):
438 |         pass
439 | 
440 |     def extract_features(self, inputs):
441 |         """ Returns output of the final convolution layer """
442 | 
443 |         # Stem
444 |         x = self._swish(self._bn0(self._conv_stem(inputs)))
445 | 
446 |         # Blocks
447 |         outs = []
448 |         for idx, block in enumerate(self._blocks):
449 |             drop_connect_rate = self._global_params.drop_connect_rate
450 |             if drop_connect_rate:
451 |                 drop_connect_rate *= float(idx) / len(self._blocks)
452 |             x = block(x, drop_connect_rate=drop_connect_rate)
453 |             # stage_idx = 
454 |             if idx in self.out_stage_idx:
455 |                 outs.append(x)
456 |         # Head
457 |         # x = self._swish(self._bn1(self._conv_head(x)))
458 | 
459 |         return tuple(outs)
460 | 
461 |     def init_weights(self, pretrained=None):
462 |         if isinstance(pretrained, str):
463 |             logger = logging.getLogger()
464 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
465 |         elif pretrained is None:
466 |             # print("pretrained is None")
467 |             return
468 |         else:
469 |             raise TypeError('pretrained must be a str or None')
470 | 
471 | 
472 |     def forward(self, inputs):
473 |         """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
474 |         bs = inputs.size(0)
475 |         # Convolution layers
476 |         x = self.extract_features(inputs)
477 | 
478 |         # # Pooling and final linear layer
479 |         # x = self._avg_pooling(x)
480 |         # x = x.view(bs, -1)
481 |         # x = self._dropout(x)
482 |         # x = self._fc(x)
483 |         return x
484 | 
485 |     @classmethod
486 |     def from_name(cls, model_name, override_params=None):
487 |         cls._check_model_name_is_valid(model_name)
488 |         blocks_args, global_params = get_model_params(model_name, override_params)
489 |         return cls(blocks_args, global_params)
490 | 
491 |     @classmethod
492 |     def from_pretrained(cls, model_name, num_classes=1000, in_channels = 3):
493 |         model = cls.from_name(model_name, override_params={'num_classes': num_classes})
494 |         load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000))
495 |         if in_channels != 3:
496 |             Conv2d = get_same_padding_conv2d(image_size = model._global_params.image_size)
497 |             out_channels = round_filters(32, model._global_params)
498 |             model._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
499 |         return model
500 |     
501 |     @classmethod
502 |     def from_pretrained(cls, model_name, num_classes=1000):
503 |         model = cls.from_name(model_name, override_params={'num_classes': num_classes})
504 |         load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000))
505 | 
506 |         return model
507 | 
508 |     @classmethod
509 |     def get_image_size(cls, model_name):
510 |         cls._check_model_name_is_valid(model_name)
511 |         _, _, res, _ = efficientnet_params(model_name)
512 |         return res
513 | 
514 |     @classmethod
515 |     def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False):
516 |         """ Validates model name. None that pretrained weights are only available for
517 |         the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """
518 |         num_models = 4 if also_need_pretrained_weights else 8
519 |         valid_models = ['efficientnet-b'+str(i) for i in range(num_models)]
520 |         if model_name not in valid_models:
521 |             raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
522 | 
523 |     def train(self, mode=True):
524 |         super(EfficientNet, self).train(mode)
525 |         # self._freeze_stages()
526 |         if mode and self.norm_eval:
527 |             for m in self.modules():
528 |                 # trick: eval have effect on BatchNorm only
529 |                 if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.SyncBatchNorm):
530 |                     m.eval()
531 | 
532 | 
533 | 
534 | url_map = {
535 |     'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth',
536 |     'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth',
537 |     'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth',
538 |     'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth',
539 |     'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth',
540 |     'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth',
541 |     'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth',
542 |     'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth',
543 | }
544 | 
545 | def load_pretrained_weights(model, model_name, load_fc=True):
546 |     """ Loads pretrained weights, and downloads if loading for the first time. """
547 |     state_dict = model_zoo.load_url(url_map[model_name])
548 |     if load_fc:
549 |         model.load_state_dict(state_dict)
550 |     else:
551 |         state_dict.pop('_fc.weight')
552 |         state_dict.pop('_fc.bias')
553 |         res = model.load_state_dict(state_dict, strict=False)
554 |         assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
555 |     print('Loaded pretrained weights for {}'.format(model_name))
556 | 


--------------------------------------------------------------------------------