├── lib
    ├── __init__.py
    ├── nms
    │   ├── __init__.py
    │   ├── src
    │   │   ├── cuda
    │   │   │   ├── nms_kernel.cu.o
    │   │   │   ├── nms_kernel.h
    │   │   │   └── nms_kernel.cu
    │   │   ├── nms_cuda.h
    │   │   ├── nms.h
    │   │   ├── nms_cuda.c
    │   │   └── nms.c
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   └── pth_nms.cpython-36.pyc
    │   ├── build.py
    │   └── pth_nms.py
    ├── __pycache__
    │   └── __init__.cpython-36.pyc
    └── build.sh
├── img
    ├── 1.png
    ├── 2.png
    ├── 3.jpg
    └── 4.png
├── imges
    ├── 1.jpg
    ├── 2.jpg
    ├── 3.jpg
    ├── 4.jpg
    ├── 5.jpg
    └── 6.jpg
├── __pycache__
    └── coco_eval.cpython-36.pyc
├── model
    ├── __pycache__
    │   ├── BiFPN.cpython-35.pyc
    │   ├── BiFPN.cpython-36.pyc
    │   ├── losses.cpython-35.pyc
    │   ├── losses.cpython-36.pyc
    │   ├── model.cpython-35.pyc
    │   ├── model.cpython-36.pyc
    │   ├── util.cpython-35.pyc
    │   ├── util.cpython-36.pyc
    │   ├── utils.cpython-35.pyc
    │   ├── utils.cpython-36.pyc
    │   ├── __init__.cpython-35.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── anchors.cpython-35.pyc
    │   ├── anchors.cpython-36.pyc
    │   ├── RetinaHead.cpython-35.pyc
    │   ├── RetinaHead.cpython-36.pyc
    │   ├── efficientdet.cpython-35.pyc
    │   └── efficientdet.cpython-36.pyc
    ├── __init__.py
    ├── efficientdet.py
    ├── anchors.py
    ├── RetinaHead.py
    ├── losses.py
    ├── BiFPN.py
    ├── util.py
    ├── model.py
    └── utils.py
├── dataset
    ├── __pycache__
    │   └── dataloader.cpython-36.pyc
    └── dataloader.py
├── log
    ├── events.out.tfevents.1577539929.fineserver
    └── events.out.tfevents.1577540185.fineserver
├── .idea
    ├── misc.xml
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── modules.xml
    ├── bishe.iml
    └── workspace.xml
├── README.md
├── coco_eval.py
├── demo.py
└── train.py


/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/img/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/1.png


--------------------------------------------------------------------------------
/img/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/2.png


--------------------------------------------------------------------------------
/img/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/3.jpg


--------------------------------------------------------------------------------
/img/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/4.png


--------------------------------------------------------------------------------
/imges/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/1.jpg


--------------------------------------------------------------------------------
/imges/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/2.jpg


--------------------------------------------------------------------------------
/imges/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/3.jpg


--------------------------------------------------------------------------------
/imges/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/4.jpg


--------------------------------------------------------------------------------
/imges/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/5.jpg


--------------------------------------------------------------------------------
/imges/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/6.jpg


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/src/cuda/nms_kernel.cu.o


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/__pycache__/coco_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/__pycache__/coco_eval.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/BiFPN.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/BiFPN.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/BiFPN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/BiFPN.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/losses.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/losses.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/losses.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/losses.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/model.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/util.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/util.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/utils.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/anchors.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/anchors.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/anchors.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/anchors.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nms/__pycache__/pth_nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/__pycache__/pth_nms.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/RetinaHead.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/RetinaHead.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/RetinaHead.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/RetinaHead.cpython-36.pyc


--------------------------------------------------------------------------------
/dataset/__pycache__/dataloader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/dataset/__pycache__/dataloader.cpython-36.pyc


--------------------------------------------------------------------------------
/log/events.out.tfevents.1577539929.fineserver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/log/events.out.tfevents.1577539929.fineserver


--------------------------------------------------------------------------------
/log/events.out.tfevents.1577540185.fineserver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/log/events.out.tfevents.1577540185.fineserver


--------------------------------------------------------------------------------
/model/__pycache__/efficientdet.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/efficientdet.cpython-35.pyc


--------------------------------------------------------------------------------
/model/__pycache__/efficientdet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/efficientdet.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (venv)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = "0.5.1"
 2 | from .model import EfficientNet
 3 | from .utils import (
 4 |     GlobalParams,
 5 |     BlockArgs,
 6 |     BlockDecoder,
 7 |     efficientnet,
 8 |     get_model_params,
 9 | )
10 | 
11 | 


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/bishe.iml" filepath="$PROJECT_DIR$/.idea/bishe.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NMS_KERNEL
 2 | #define _NMS_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 | 
11 | void _nms(int boxes_num, float * boxes_dev,
12 |           unsigned long long * mask_dev, float nms_overlap_thresh);
13 | 
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/build.sh:
--------------------------------------------------------------------------------
 1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
 2 |            -gencode arch=compute_35,code=sm_35 \
 3 |            -gencode arch=compute_50,code=sm_50 \
 4 |            -gencode arch=compute_52,code=sm_52 \
 5 |            -gencode arch=compute_60,code=sm_60 \
 6 |            -gencode arch=compute_61,code=sm_61"
 7 | 
 8 | 
 9 | # Build NMS
10 | cd nms/src/cuda
11 | echo "Compiling nms kernels by nvcc..."
12 | /usr/local/cuda/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH
13 | cd ../../
14 | python build.py install
15 | cd ../
16 | 


--------------------------------------------------------------------------------
/.idea/bishe.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$">
 5 |       <sourceFolder url="file://$MODULE_DIR$/model" isTestSource="false" />
 6 |       <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
 7 |       <sourceFolder url="file://$MODULE_DIR$/dataset" isTestSource="false" />
 8 |     </content>
 9 |     <orderEntry type="jdk" jdkName="Python 3.6 (venv)" jdkType="Python SDK" />
10 |     <orderEntry type="sourceFolder" forTests="false" />
11 |   </component>
12 |   <component name="TestRunnerService">
13 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
14 |   </component>
15 | </module>


--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | #from torch.utils.ffi import create_extension
 4 | from torch.utils.cpp_extension import BuildExtension
 5 | 
 6 | 
 7 | sources = ['src/nms.c']
 8 | headers = ['src/nms.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/nms_cuda.c']
15 |     headers += ['src/nms_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/cuda/nms_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = BuildExtension(
25 |     '_ext.nms',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects,
32 |     extra_compile_args=['-std=c99']
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from ._ext import nms
 3 | import numpy as np
 4 | 
 5 | def pth_nms(dets, thresh):
 6 |   """
 7 |   dets has to be a tensor
 8 |   """
 9 |   if not dets.is_cuda:
10 |     x1 = dets[:, 0]
11 |     y1 = dets[:, 1]
12 |     x2 = dets[:, 2]
13 |     y2 = dets[:, 3]
14 |     scores = dets[:, 4]
15 | 
16 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 |     order = scores.sort(0, descending=True)[1]
18 |     # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 | 
20 |     keep = torch.LongTensor(dets.size(0))
21 |     num_out = torch.LongTensor(1)
22 |     nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 | 
24 |     return keep[:num_out[0]]
25 |   else:
26 |     x1 = dets[:, 0]
27 |     y1 = dets[:, 1]
28 |     x2 = dets[:, 2]
29 |     y2 = dets[:, 3]
30 |     scores = dets[:, 4]
31 | 
32 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 |     order = scores.sort(0, descending=True)[1]
34 |     # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
35 | 
36 |     dets = dets[order].contiguous()
37 | 
38 |     keep = torch.LongTensor(dets.size(0))
39 |     num_out = torch.LongTensor(1)
40 |     # keep = torch.cuda.LongTensor(dets.size(0))
41 |     # num_out = torch.cuda.LongTensor(1)
42 |     nms.gpu_nms(keep, num_out, dets, thresh)
43 | 
44 |     return order[keep[:num_out[0]].cuda()].contiguous()
45 |     # return order[keep[:num_out[0]]].contiguous()
46 | 
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # efficientdet-pytorch
 2 | ![image](https://github.com/coderhss/efficientdet-pytorch/blob/master/img/2.png)
 3 | ![image](https://github.com/coderhss/efficientdet-pytorch/blob/master/img/1.png)
 4 | ![image](https://github.com/coderhss/efficientdet-pytorch/blob/master/img/3.jpg)
 5 | ![image](https://github.com/coderhss/efficientdet-pytorch/blob/master/img/4.png)
 6 | 
 7 | Pytorch implementtation of EfficientDet object detection as described in [EfficientDet: Scalable and Efficient Object Detection](https://arxiv.org/pdf/1911.09070.pdf)
 8 | 
 9 | This implementation is a very simple version without many data augmentation.
10 | 
11 | The EfficientNet code are borrowed from the [A PyTorch implementation of EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch),if you want to train EffcicientDet from scratch,you should load the efficientnet pretrained parameter. use
12 | 
13 | ```
14 | python train.py --coco_path '/home/hoo/Dataset/COCO' --backbon 'efficientnet-b0' --backbone_pretrained True
15 | ```
16 | 
17 | and the efficientnet pretrainied parameter will be download and load automatically, and start to train.
18 | 
19 | I've only trained efficientdet-d0 so far,and without many data augmentation.if you want to load efficientnet pretrained parameter,use
20 | 
21 | ```
22 | python train.py --coco_path '/home/hoo/Dataset/COCO' --backbone 'efficientnet-b0' --backbone_pretrained False --EfficientDet_pretrained True --pretrained './weights/efficientdet_0.pth'
23 | ```
24 | |      Model      |  mAP  |                         pre_trained                          |
25 | | :-------------: | :---: | :----------------------------------------------------------: |
26 | | efficientdet-d0 | 25.9% | [download](https://drive.google.com/open?id=1UgQp9wqtc1O_EabU9O6NWNG6B8imYmv_) |
27 | 
28 | **QQ-group: 607724770(Torch交流群)**
29 | 
30 | ## Acknowledgements
31 | - The EfficientNet code are borrowed from the [A PyTorch implementation of EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch)
32 | - The code of RetinaNet are borrowed from the [Pytorch implementation of RetinaNet object detection.](https://github.com/yhenon/pytorch-retinanet)
33 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <stdio.h>
11 | 
12 | #include "cuda/nms_kernel.h"
13 | 
14 | 
15 | extern THCState *state;
16 | 
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 |   // boxes has to be sorted
19 |   THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 |   THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 |   // Number of ROIs
22 |   int boxes_num = THCudaTensor_size(state, boxes, 0);
23 |   int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 | 
25 |   float* boxes_flat = THCudaTensor_data(state, boxes);
26 | 
27 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 |   THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 |   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 | 
31 |   _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 | 
33 |   THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 |   THLongTensor_copyCuda(state, mask_cpu, mask);
35 |   THCudaLongTensor_free(state, mask);
36 | 
37 |   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 | 
39 |   THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 |   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 |   THLongTensor_fill(remv_cpu, 0);
42 | 
43 |   long * keep_flat = THLongTensor_data(keep);
44 |   long num_to_keep = 0;
45 | 
46 |   int i, j;
47 |   for (i = 0; i < boxes_num; i++) {
48 |     int nblock = i / threadsPerBlock;
49 |     int inblock = i % threadsPerBlock;
50 | 
51 |     if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 |       keep_flat[num_to_keep++] = i;
53 |       unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 |       for (j = nblock; j < col_blocks; j++) {
55 |         remv_cpu_flat[j] |= p[j];
56 |       }
57 |     }
58 |   }
59 | 
60 |   long * num_out_flat = THLongTensor_data(num_out);
61 |   * num_out_flat = num_to_keep;
62 | 
63 |   THLongTensor_free(mask_cpu);
64 |   THLongTensor_free(remv_cpu);
65 | 
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/model/efficientdet.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # Author: huashuoshuo
 3 | # Data: 2019/12/17 10:53
 4 | 
 5 | 
 6 | import torch
 7 | import numpy as np
 8 | import torch.nn as nn
 9 | from .BiFPN import BiFPN
10 | # from .RetinaHead import RetinaHead
11 | 
12 | # class ConvBlock(nn.Module):
13 | #     def __init__(self):
14 | #         super().__init__()
15 | class ConvBlock(nn.Module):
16 |     """
17 | 
18 |     """
19 |     def __init__(self, inp, oup, k_size, stride=1, padding=0):
20 |         super().__init__()
21 |         # Conv2d = get_same_padding_conv2d
22 |         self.conv = nn.Conv2d(in_channels=inp, out_channels=oup, kernel_size=k_size, stride=stride, padding=padding, bias=False)
23 |         self.norm = nn.BatchNorm2d(num_features=oup)
24 |         self.act = nn.ReLU(inplace=True)
25 |     def forward(self, x):
26 |         x = self.norm(self.conv(x))
27 |         return self.act(x)
28 | 
29 | from model import EfficientNet
30 | # from .RetinaHead import RetinaHead
31 | class EfficientDet(nn.Module):
32 |     """
33 | 
34 |     """
35 |     def __init__(self, args):
36 |         super().__init__()
37 | 
38 |         self.inp = 64
39 |         self.oup = 64
40 |         self.bifpn_repeat = 2
41 |         print(args.backbone)
42 |         self.backbone = EfficientNet.from_pretrained(args)
43 |         # self.backbone.get_list_features()
44 |         self.tail = nn.ModuleList([ConvBlock(320, self.oup, 3, 2, 1), ConvBlock(self.oup, self.oup, 3, 2, 1)])
45 |         self.channel_same = self.change_channel(self.backbone.get_list_feature()[-3:])
46 |         self.BiFPN_first = BiFPN(oup=self.oup, first=True)
47 |         self.BiFPN = nn.ModuleList()
48 |         for i in range(self.bifpn_repeat-1):
49 |             self.BiFPN.append(BiFPN(oup=self.oup, first=False))
50 | 
51 |     def forward(self, inputs):
52 |         features_in = self.extra(inputs)
53 |         features_out = self.BiFPN_first(features_in)
54 |         for i, bifpn in enumerate(self.BiFPN):
55 |             features_out = bifpn(features_out)
56 |         return features_out
57 | 
58 | 
59 |     def extra(self, img):
60 |         x = self.backbone(img)[-3:]
61 |         # before_fpn = self.channel_same(x[-5:])
62 |         # print(x[-1].shape)
63 |         # print(self.tail)
64 |         # tail = [tail_conv(x[-1]) for i, tail_conv in enumerate(self.tail)]
65 |         for i, tail_conv in enumerate(self.tail):
66 |             x.append(tail_conv(x[-1]))
67 | 
68 | 
69 |         before_fpn = [
70 |             conv(x[i])
71 |             for i, conv in enumerate(self.channel_same)]
72 | 
73 |         before_fpn.extend(x[-2:])
74 | 
75 |         return before_fpn
76 | 
77 |     def change_channel(self, channel):
78 |         convs = nn.ModuleList()
79 |         for i in range(len(channel)):
80 |             conv = ConvBlock(channel[i], self.oup, k_size=1, stride=1, padding=0)
81 |             convs.append(conv)
82 |         return convs


--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
 5 |     // boxes has to be sorted
 6 |     THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
 7 |     THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
 8 |     THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
 9 |     THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 |     // Number of ROIs
11 |     long boxes_num = THFloatTensor_size(boxes, 0);
12 |     long boxes_dim = THFloatTensor_size(boxes, 1);
13 | 
14 |     long * keep_out_flat = THLongTensor_data(keep_out);
15 |     float * boxes_flat = THFloatTensor_data(boxes);
16 |     long * order_flat = THLongTensor_data(order);
17 |     float * areas_flat = THFloatTensor_data(areas);
18 | 
19 |     THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 |     THByteTensor_fill(suppressed, 0);
21 |     unsigned char * suppressed_flat =  THByteTensor_data(suppressed);
22 | 
23 |     // nominal indices
24 |     int i, j;
25 |     // sorted indices
26 |     int _i, _j;
27 |     // temp variables for box i's (the box currently under consideration)
28 |     float ix1, iy1, ix2, iy2, iarea;
29 |     // variables for computing overlap with box j (lower scoring box)
30 |     float xx1, yy1, xx2, yy2;
31 |     float w, h;
32 |     float inter, ovr;
33 | 
34 |     long num_to_keep = 0;
35 |     for (_i=0; _i < boxes_num; ++_i) {
36 |         i = order_flat[_i];
37 |         if (suppressed_flat[i] == 1) {
38 |             continue;
39 |         }
40 |         keep_out_flat[num_to_keep++] = i;
41 |         ix1 = boxes_flat[i * boxes_dim];
42 |         iy1 = boxes_flat[i * boxes_dim + 1];
43 |         ix2 = boxes_flat[i * boxes_dim + 2];
44 |         iy2 = boxes_flat[i * boxes_dim + 3];
45 |         iarea = areas_flat[i];
46 |         for (_j = _i + 1; _j < boxes_num; ++_j) {
47 |             j = order_flat[_j];
48 |             if (suppressed_flat[j] == 1) {
49 |                 continue;
50 |             }
51 |             xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 |             yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 |             xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 |             yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 |             w = fmaxf(0.0, xx2 - xx1 + 1);
56 |             h = fmaxf(0.0, yy2 - yy1 + 1);
57 |             inter = w * h;
58 |             ovr = inter / (iarea + areas_flat[j] - inter);
59 |             if (ovr >= nms_overlap_thresh) {
60 |                 suppressed_flat[j] = 1;
61 |             }
62 |         }
63 |     }
64 | 
65 |     long *num_out_flat = THLongTensor_data(num_out);
66 |     *num_out_flat = num_to_keep;
67 |     THByteTensor_free(suppressed);
68 |     return 1;
69 | }


--------------------------------------------------------------------------------
/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | from pycocotools.coco import COCO
 4 | from pycocotools.cocoeval import COCOeval
 5 | 
 6 | import numpy as np
 7 | import json
 8 | import os
 9 | 
10 | import torch
11 | 
12 | def evaluate_coco(dataset, model, threshold=0.05):
13 |     
14 |     model.eval()
15 |     
16 |     with torch.no_grad():
17 | 
18 |         # start collecting results
19 |         results = []
20 |         image_ids = []
21 | 
22 |         for index in range(len(dataset)-4500):
23 |             data = dataset[index]
24 |             # scale = data['scale']
25 |             scale1 = data['scale1']
26 |             scale2 = data['scale2']
27 | 
28 |             # run network
29 |             scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
30 |             scores = scores.cpu()
31 |             labels = labels.cpu()
32 |             boxes  = boxes.cpu()
33 | 
34 |             # correct boxes for image scale
35 |             # boxes /= scale
36 |             boxes[:, 0] /= scale2
37 |             boxes[:, 2] /= scale2
38 |             boxes[:, 1] /= scale1
39 |             boxes[:, 3] /= scale1
40 | 
41 |             if boxes.shape[0] > 0:
42 |                 # change to (x, y, w, h) (MS COCO standard)
43 |                 boxes[:, 2] -= boxes[:, 0]
44 |                 boxes[:, 3] -= boxes[:, 1]
45 | 
46 |                 # compute predicted labels and scores
47 |                 #for box, score, label in zip(boxes[0], scores[0], labels[0]):
48 |                 for box_id in range(boxes.shape[0]):
49 |                     score = float(scores[box_id])
50 |                     label = int(labels[box_id])
51 |                     box = boxes[box_id, :]
52 | 
53 |                     # scores are sorted, so we can break
54 |                     if score < threshold:
55 |                         break
56 | 
57 |                     # append detection for each positively labeled class
58 |                     image_result = {
59 |                         'image_id'    : dataset.image_ids[index],
60 |                         'category_id' : dataset.label_to_coco_label(label),
61 |                         'score'       : float(score),
62 |                         'bbox'        : box.tolist(),
63 |                     }
64 | 
65 |                     # append detection to results
66 |                     results.append(image_result)
67 | 
68 |             # append image to list of processed images
69 |             image_ids.append(dataset.image_ids[index])
70 | 
71 |             # print progress
72 |             print('{}/{}'.format(index, len(dataset)), end='\r')
73 | 
74 |         if not len(results):
75 |             return
76 | 
77 |         # write output
78 |         json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
79 | 
80 |         # load results in COCO evaluation tool
81 |         coco_true = dataset.coco
82 |         coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
83 | 
84 |         # run COCO evaluation
85 |         coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
86 |         coco_eval.params.imgIds = image_ids
87 |         coco_eval.evaluate()
88 |         coco_eval.accumulate()
89 |         mAP = coco_eval.summarize()
90 | 
91 |         model.train()
92 | 
93 |         return mAP
94 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | #include <math.h>
12 | #include <stdio.h>
13 | #include <float.h>
14 | #include "nms_kernel.h"
15 | 
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 |   float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 |   float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 |   float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 |   float interS = width * height;
21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 |   return interS / (Sa + Sb - interS);
24 | }
25 | 
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
28 |   const int row_start = blockIdx.y;
29 |   const int col_start = blockIdx.x;
30 | 
31 |   // if (row_start > col_start) return;
32 | 
33 |   const int row_size =
34 |         fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 |   const int col_size =
36 |         fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 | 
38 |   __shared__ float block_boxes[threadsPerBlock * 5];
39 |   if (threadIdx.x < col_size) {
40 |     block_boxes[threadIdx.x * 5 + 0] =
41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 |     block_boxes[threadIdx.x * 5 + 1] =
43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 |     block_boxes[threadIdx.x * 5 + 2] =
45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 |     block_boxes[threadIdx.x * 5 + 3] =
47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 |     block_boxes[threadIdx.x * 5 + 4] =
49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 |   }
51 |   __syncthreads();
52 | 
53 |   if (threadIdx.x < row_size) {
54 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
56 |     int i = 0;
57 |     unsigned long long t = 0;
58 |     int start = 0;
59 |     if (row_start == col_start) {
60 |       start = threadIdx.x + 1;
61 |     }
62 |     for (i = start; i < col_size; i++) {
63 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 |         t |= 1ULL << i;
65 |       }
66 |     }
67 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 |   }
70 | }
71 | 
72 | 
73 | void _nms(int boxes_num, float * boxes_dev,
74 |           unsigned long long * mask_dev, float nms_overlap_thresh) {
75 | 
76 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 |               DIVUP(boxes_num, threadsPerBlock));
78 |   dim3 threads(threadsPerBlock);
79 |   nms_kernel<<<blocks, threads>>>(boxes_num,
80 |                                   nms_overlap_thresh,
81 |                                   boxes_dev,
82 |                                   mask_dev);
83 | }
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: huashuoshuo
  3 | # Data: 12/26/19 2:12 PM
  4 | import torch
  5 | import torch.nn as nn
  6 | from model.util import Filter_boxes
  7 | import os
  8 | import argparse
  9 | from RetinaHead import RetinaHead
 10 | import skimage.io
 11 | import skimage
 12 | import skimage.transform
 13 | import numpy as np
 14 | import cv2 as cv2
 15 | import matplotlib.pyplot as plt
 16 | import time
 17 | from model.util import num2name
 18 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 19 | 
 20 | def main():
 21 |     parser = argparse.ArgumentParser()
 22 |     parser.add_argument('--img_path', type=str, default='/home/huashuoshuo/bishe/imges/6.jpg')
 23 |     parser.add_argument('--weight_path', type=str, default='./weights/retinanet_15.pth')
 24 |     parser.add_argument('--backbone', type=str, default='efficientnet-b0')
 25 |     parser.add_argument('--backbone_pretrained', type=bool, default=False)
 26 |     parser.add_argument('--threshold', type=float, default=0.35)
 27 | 
 28 |     parser = parser.parse_args()
 29 |     with torch.no_grad():
 30 |         efficientdet = RetinaHead(parser, is_demo=True)
 31 |         # efficientdet = torch.nn.DataParallel(efficientdet).cuda()
 32 |         efficientdet = efficientdet.cuda()
 33 |         state_dict = torch.load(parser.weight_path)
 34 |         efficientdet.load_state_dict(state_dict)
 35 | 
 36 |         # img read
 37 |         img = skimage.io.imread(parser.img_path)
 38 |         img_input, scale1, scale2= preprocessing(img)
 39 |         efficientdet.eval()
 40 |         img_input = img_input.cuda()
 41 |         time_start = time.time()
 42 |         # for i in range(1000):
 43 |         boxes, classification, scores = efficientdet(img_input)
 44 |         boxes, scores, labels= Filter_boxes(parser)([boxes, classification, scores])
 45 | 
 46 |         time_stop = time.time()
 47 |         print('time:', time_stop-time_start)
 48 |         # scores = scores.cpu().numpy()
 49 |         # labels = labels.cpu().numpy()
 50 |         # boxes = boxes.cpu().numpy()
 51 | 
 52 |         # print(boxes)
 53 |         # print(np.shape(img))
 54 |         text_thickness = 1
 55 |         thickness = 2
 56 |         scale = 0.4
 57 |         line_type = 8
 58 |         for i in range(np.shape(boxes)[0]):
 59 |             box = boxes[i].cpu().numpy()
 60 |             score = scores[i].cpu().numpy()
 61 |             for j in range(np.shape(box)[0]):
 62 |                 p1 = (int(box[j][0]/scale2), int(box[j][1]/scale1))
 63 |                 p2 = (int(box[j][2]/scale2), int(box[j][3]/scale1))
 64 |                 cv2.rectangle(img, p1, p2, (0, 0, 255), 2)
 65 |                 s = '%s/%.1f%%' % (num2name[labels[i]+1], score[j] * 100)
 66 |                 text_size, baseline = cv2.getTextSize(s, cv2.FONT_HERSHEY_SIMPLEX, scale, text_thickness)
 67 | 
 68 |                 if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1):
 69 |                     continue
 70 |                 # p1 = (p1[0] - text_size[1], p1[1])
 71 | 
 72 |                 cv2.rectangle(img, (p1[0], p1[1]),
 73 |                               (p1[0] + text_size[0], p1[1] + text_size[1]), (0, 0, 255), -1)
 74 | 
 75 |                 cv2.putText(img, s, (p1[0], p1[1] + 2*baseline), cv2.FONT_HERSHEY_SIMPLEX, scale, (255, 255, 255),
 76 |                             text_thickness, line_type)
 77 |         plt.imshow(img)
 78 |         plt.show()
 79 |     # print(scores, labels)
 80 | 
 81 | 
 82 | 
 83 |     return
 84 | 
 85 | def preprocessing(img):
 86 | 
 87 |     img = img.astype(np.float32) / 255.0
 88 |     # normalize
 89 |     mean = np.array([[[0.485, 0.456, 0.406]]])
 90 |     std = np.array([[[0.229, 0.224, 0.225]]])
 91 |     img = (img - mean) / std
 92 |     # resize
 93 |     rows, cols, cns = np.shape(img)
 94 |     scale1 = 512 / rows
 95 |     scale2 = 512 / cols
 96 |     img_input = skimage.transform.resize(img, (512, 512))
 97 |     img_input = torch.from_numpy(img_input)
 98 |     img_input = img_input.unsqueeze(0)
 99 |     img_input = img_input.permute(0, 3, 1, 2).float()
100 |     return img_input, scale1, scale2
101 | 
102 | 
103 | def box_filter(scores, labels, boxes):
104 |     scores = scores.cpu()
105 |     labels = labels.cpu()
106 |     boxes = boxes.cpu()
107 | 
108 |     return
109 | 
110 | 
111 | if __name__=='__main__':
112 |     main()
113 | 


--------------------------------------------------------------------------------
/model/anchors.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: huashuoshuo
  3 | # Data: 2019/12/19 18:58
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | 
 10 | class Anchors(nn.Module):
 11 |     def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
 12 |         super(Anchors, self).__init__()
 13 | 
 14 |         if pyramid_levels is None:
 15 |             self.pyramid_levels = [3, 4, 5, 6, 7]
 16 |         if strides is None:
 17 |             self.strides = [2 ** x for x in self.pyramid_levels]
 18 |         if sizes is None:
 19 |             self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
 20 |         if ratios is None:
 21 |             self.ratios = np.array([0.5, 1, 2])
 22 |         if scales is None:
 23 |             self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 24 | 
 25 |     def forward(self, image):
 26 | 
 27 |         image_shape = image.shape[2:]
 28 |         image_shape = np.array(image_shape)
 29 |         image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
 30 | 
 31 |         # compute anchors over all pyramid levels
 32 |         all_anchors = np.zeros((0, 4)).astype(np.float32)
 33 | 
 34 |         for idx, p in enumerate(self.pyramid_levels):
 35 |             anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
 36 |             shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
 37 |             all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
 38 | 
 39 |         all_anchors = np.expand_dims(all_anchors, axis=0)
 40 | 
 41 |         return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
 42 | 
 43 | 
 44 | def generate_anchors(base_size=16, ratios=None, scales=None):
 45 |     """
 46 |     Generate anchor (reference) windows by enumerating aspect ratios X
 47 |     scales w.r.t. a reference window.
 48 |     """
 49 | 
 50 |     if ratios is None:
 51 |         ratios = np.array([0.5, 1, 2])
 52 | 
 53 |     if scales is None:
 54 |         scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 55 | 
 56 |     num_anchors = len(ratios) * len(scales)
 57 | 
 58 |     # initialize output anchors
 59 |     anchors = np.zeros((num_anchors, 4))
 60 | 
 61 |     # scale base_size
 62 |     anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
 63 | 
 64 |     # compute areas of anchors
 65 |     areas = anchors[:, 2] * anchors[:, 3]
 66 | 
 67 |     # correct for ratios
 68 |     anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
 69 |     anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
 70 | 
 71 |     # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
 72 |     anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
 73 |     anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
 74 | 
 75 |     return anchors
 76 | 
 77 | 
 78 | def compute_shape(image_shape, pyramid_levels):
 79 |     """Compute shapes based on pyramid levels.
 80 | 
 81 |     :param image_shape:
 82 |     :param pyramid_levels:
 83 |     :return:
 84 |     """
 85 |     image_shape = np.array(image_shape[:2])
 86 |     image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
 87 |     return image_shapes
 88 | 
 89 | 
 90 | def anchors_for_shape(
 91 |         image_shape,
 92 |         pyramid_levels=None,
 93 |         ratios=None,
 94 |         scales=None,
 95 |         strides=None,
 96 |         sizes=None,
 97 |         shapes_callback=None,
 98 | ):
 99 |     image_shapes = compute_shape(image_shape, pyramid_levels)
100 | 
101 |     # compute anchors over all pyramid levels
102 |     all_anchors = np.zeros((0, 4))
103 |     for idx, p in enumerate(pyramid_levels):
104 |         anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
105 |         shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
106 |         all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
107 | 
108 |     return all_anchors
109 | 
110 | 
111 | def shift(shape, stride, anchors):
112 |     shift_x = (np.arange(0, shape[1]) + 0.5) * stride
113 |     shift_y = (np.arange(0, shape[0]) + 0.5) * stride
114 | 
115 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
116 | 
117 |     shifts = np.vstack((
118 |         shift_x.ravel(), shift_y.ravel(),
119 |         shift_x.ravel(), shift_y.ravel()
120 |     )).transpose()
121 | 
122 |     # add A anchors (1, A, 4) to
123 |     # cell K shifts (K, 1, 4) to get
124 |     # shift anchors (K, A, 4)
125 |     # reshape to (K*A, 4) shifted anchors
126 |     A = anchors.shape[0]
127 |     K = shifts.shape[0]
128 |     all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
129 |     all_anchors = all_anchors.reshape((K * A, 4))
130 | 
131 |     return all_anchors


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: huashuoshuo
  3 | # Data: 2019/12/19 14:57
  4 | 
  5 | import os
  6 | import torch
  7 | import numpy as np
  8 | import torch.nn as nn
  9 | import torch.optim as optim
 10 | from dataset.dataloader import CocoDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer
 11 | from torch.utils.data import Dataset, DataLoader
 12 | from torchvision import transforms
 13 | # from model.efficientdet import EfficientDet
 14 | from model.RetinaHead import RetinaHead
 15 | import coco_eval
 16 | import argparse
 17 | from tensorboardX import SummaryWriter
 18 | import cv2 as cv2
 19 | import matplotlib.pyplot as plt
 20 | 
 21 | # writer = SummaryWriter('log')
 22 | 
 23 | os.environ['CUDA_VISIBLE_DEVICES']='0, 1, 2, 3'
 24 | def main(arg=None):
 25 |     parser = argparse.ArgumentParser()
 26 | 
 27 |     parser.add_argument('--coco_path', type=str, default='/home/hoo/Dataset/COCO')
 28 |     parser.add_argument('--depth', type=int, default=3)
 29 |     parser.add_argument('--epoches', type=int, default=50)
 30 |     parser.add_argument('--phi', type=int, default=0)
 31 |     parser.add_argument('--backbone', type=str, default='efficientnet-b0')
 32 |     parser.add_argument('--backbone_pretrained', type=bool, default=True)
 33 |     parser.add_argument('--EfficientDet_pretrained', type=bool, default=False)
 34 |     parser.add_argument('--pretrained', type=str, default='./weights/retinanet_1.pth')
 35 |     parser.add_argument('--batch_size', type=int, default=24)
 36 | 
 37 |     parser = parser.parse_args(arg)
 38 |     dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
 39 |     # print(dataset_train.num_classes())
 40 |     dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))
 41 | 
 42 |     sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
 43 |     dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler)
 44 | 
 45 | 
 46 |     sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
 47 |     dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)
 48 | 
 49 |     # Create the Model
 50 | 
 51 |     efficientdet = RetinaHead(parser)
 52 | 
 53 | 
 54 | 
 55 |     efficientdet = torch.nn.DataParallel(efficientdet).cuda()
 56 |     if parser.EfficientDet_pretrained:
 57 |         state_dict = torch.load(parser.pretrained)
 58 |         # print(state_dict)
 59 |         efficientdet.module.load_state_dict(state_dict)
 60 | 
 61 |     efficientdet.training = True
 62 | 
 63 |     optimizer = optim.Adam(efficientdet.parameters(), lr=1e-3)
 64 |     # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
 65 |     scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 5, 7, 9, 11, 13, 15, 17, 19], gamma=0.5)
 66 | 
 67 |     for epoch_num in range(parser.epoches):
 68 |         efficientdet.train()
 69 | 
 70 |         epoch_loss = []
 71 | 
 72 |         for iter_num, data in enumerate(dataloader_train):
 73 |                 break
 74 |             # try:
 75 |                 # print(data)
 76 |                 optimizer.zero_grad()
 77 |                 # print(np.shape(data['annot']))
 78 |                 classification_loss, regression_loss = efficientdet([data['img'].cuda().float(), data['annot']])
 79 |                 classification_loss = classification_loss.mean()
 80 |                 regression_loss = regression_loss.mean()
 81 |                 loss = classification_loss + regression_loss
 82 |                 if bool(loss==0):
 83 |                     continue
 84 |                 loss.backward()
 85 | 
 86 |                 torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1)
 87 |                 optimizer.step()
 88 |                 epoch_loss.append(float(loss))
 89 |                 print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss)))
 90 | 
 91 |                 if iter_num % 200 == 199:
 92 |                     niter = epoch_num * len(dataloader_train) + iter_num
 93 |                     # print(loss)
 94 |                     writer.add_scalar('Train/Loss', loss, niter)
 95 |                     writer.add_scalar('Train/Reg_Loss', regression_loss, niter)
 96 |                     writer.add_scalar('Train/Cls_Loss', classification_loss, niter)
 97 | 
 98 | 
 99 |                 del classification_loss
100 |                 del regression_loss
101 |             # except Exception as e:
102 |                 # print(e)
103 |             # continue
104 |                 # if iter_num == 20:
105 |                 #     break
106 | 
107 |         # print('Evaluating dataset')
108 |         mAP = coco_eval.evaluate_coco(dataset_val, efficientdet)
109 |         # writer.add_scalar('Test/mAP', mAP, epoch_num)
110 |         print('Save Model')
111 |         # torch.save(efficientdet.module.state_dict(), './weights/retinanet_{}.pth'.format(epoch_num))
112 |         # scheduler.step(np.mean(epoch_loss))
113 |         scheduler.step(epoch=epoch_num)
114 | # writer.close()
115 | 
116 | 
117 | if __name__ == '__main__':
118 |     main()


--------------------------------------------------------------------------------
/model/RetinaHead.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: huashuoshuo
  3 | # Data: 2019/12/18 16:37
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from model.BiFPN import ConvBlock
  8 | import model.losses as losses
  9 | from model.efficientdet import EfficientDet
 10 | from pycocotools.coco import COCO as COCO
 11 | from model.anchors import Anchors
 12 | # from lib.nms.pth_nms import pth_nms
 13 | import torchvision.ops as ops
 14 | from model.util import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes, Filter_boxes
 15 | def nms(bbox, score, thresh):
 16 |     # bbox, score = dets
 17 |     return ops.nms(boxes=bbox, scores=score, iou_threshold=thresh)
 18 |     # return pth_nms(dets, thresh)
 19 | 
 20 | 
 21 | class Reg(nn.Module):
 22 |     """
 23 | 
 24 |     """
 25 |     def __init__(self, inp, oup, depth, num_anchor):
 26 |         super().__init__()
 27 |         self.inp = inp
 28 |         self.oup = oup
 29 |         self.D = depth
 30 |         self.reg = nn.ModuleList()
 31 |         self.num_anchors = num_anchor
 32 | 
 33 |         for i in range(self.D):
 34 |             self.reg.append(ConvBlock(inp=self.inp, oup=self.oup, k_size=3, stride=1, padding=1))
 35 |         # self.retina_cls = nn.Conv2d(self.oup, self.num_anchors * self.num_class, 3, padding=1)
 36 |         self.retina_reg = nn.Conv2d(self.oup, self.num_anchors * 4, 3, padding=1)
 37 |     def forward(self, x):
 38 |         reg = x
 39 |         for conv in self.reg:
 40 |             reg = conv(reg)
 41 | 
 42 |         reg = self.retina_reg(reg)
 43 | 
 44 |         reg = reg.permute(0, 2, 3, 1)
 45 |         return reg.contiguous().view(reg.shape[0], -1, 4)
 46 | 
 47 | class Cls(nn.Module):
 48 |     """
 49 | 
 50 |     """
 51 |     def __init__(self, inp, oup, depth, num_anchor, num_class):
 52 |         super().__init__()
 53 |         self.inp = inp
 54 |         self.oup = oup
 55 |         self.D = depth
 56 |         self.cls = nn.ModuleList()
 57 |         self.num_anchors = num_anchor
 58 |         self.num_class = num_class
 59 |         for i in range(self.D):
 60 |             self.cls.append(ConvBlock(inp=self.inp, oup=self.oup, k_size=3, stride=1, padding=1))
 61 |         self.retina_cls = nn.Conv2d(self.oup, self.num_anchors * self.num_class, 3, padding=1)
 62 |         self.act = nn.Sigmoid()
 63 |     def forward(self, x):
 64 |         cls = x
 65 |         for conv in self.cls:
 66 |             cls = conv(cls)
 67 |         cls = self.retina_cls(cls)
 68 |         cls = self.act(cls)
 69 | 
 70 |         cls = cls.permute(0, 2, 3, 1)
 71 | 
 72 |         batch_size, width, height, channel = cls.shape
 73 | 
 74 |         out = cls.view(batch_size, width, height, self.num_anchors, self.num_class)
 75 |         return out.contiguous().view(cls.shape[0], -1, self.num_class)
 76 | 
 77 | 
 78 | class RetinaHead(nn.Module):
 79 |     """
 80 | 
 81 |     """
 82 |     def __init__(self, parser, num_classes=80, num_anchor=9, is_demo=False):
 83 |         super().__init__()
 84 |         depth = 3
 85 |         inp = oup = 64
 86 | 
 87 |         self.regression = Reg(inp, oup, depth-1, num_anchor)
 88 |         self.classification = Cls(inp, oup, depth-1, num_anchor, num_classes)
 89 |         self.FocalLoss = losses.FocalLoss()
 90 |         self.anchors = Anchors()
 91 |         self.EfficientDet = EfficientDet(parser)
 92 |         self.regressBoxes = BBoxTransform()
 93 |         self.is_demo = is_demo
 94 |         self.clipBoxes = ClipBoxes()
 95 |     def forward(self, inputs):
 96 |         if self.training:
 97 |             img_batch, annotations = inputs
 98 |         else:
 99 |             img_batch = inputs
100 | 
101 |         features = self.EfficientDet(img_batch)
102 |         regression = torch.cat([self.regression(feature) for feature in features], dim=1)
103 |         classification = torch.cat([self.classification(feature) for feature in features], dim=1)
104 |         anchors = self.anchors(img_batch)
105 | 
106 |         # self.FocalLoss(classification, regression, anchors, annotations)
107 |         if self.training:
108 |             return self.FocalLoss(classification, regression, anchors, annotations)
109 |         else:
110 |             transformed_anchors = self.regressBoxes(anchors, regression)
111 |             transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
112 | 
113 |             scores = torch.max(classification, dim=2, keepdim=True)[0]
114 | 
115 |             if self.is_demo:
116 |                 return transformed_anchors, classification, scores
117 | 
118 |             scores_over_thresh = (scores>0.01)[0, :, 0]
119 | 
120 |             if scores_over_thresh.sum() == 0:
121 |                 # no boxes to NMS, just return
122 |                 return [torch.zeros(0).cuda(), torch.zeros(0).cuda(), torch.zeros(0, 4).cuda()]
123 | 
124 |             classification = classification[:, scores_over_thresh, :]
125 |             transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
126 |             scores = scores[:, scores_over_thresh, :]
127 |             # print(transformed_anchors.shape, scores.shape)
128 | 
129 |             # anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5)
130 |             # print(transformed_anchors[0, :, :])
131 |             anchors_nms_idx = nms(transformed_anchors[0, :, :], scores[0, :, 0], 0.45)
132 |             nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
133 | 
134 |             return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="ChangeListManager">
  4 |     <list default="true" id="6899d997-30ed-40c1-9a84-a7601dcb508e" name="Default Changelist" comment="" />
  5 |     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
  6 |     <option name="SHOW_DIALOG" value="false" />
  7 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
  8 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
  9 |     <option name="LAST_RESOLUTION" value="IGNORE" />
 10 |   </component>
 11 |   <component name="FileTemplateManagerImpl">
 12 |     <option name="RECENT_TEMPLATES">
 13 |       <list>
 14 |         <option value="Python Script" />
 15 |       </list>
 16 |     </option>
 17 |   </component>
 18 |   <component name="ProjectId" id="1V3NK6c4IvOJiFKKWf42aXXGZxe" />
 19 |   <component name="PropertiesComponent">
 20 |     <property name="last_opened_file_path" value="$PROJECT_DIR$/../EfficientDet.Pytorch-master" />
 21 |     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
 22 |   </component>
 23 |   <component name="RunDashboard">
 24 |     <option name="ruleStates">
 25 |       <list>
 26 |         <RuleState>
 27 |           <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
 28 |         </RuleState>
 29 |         <RuleState>
 30 |           <option name="name" value="StatusDashboardGroupingRule" />
 31 |         </RuleState>
 32 |       </list>
 33 |     </option>
 34 |   </component>
 35 |   <component name="RunManager" selected="Python.demo">
 36 |     <configuration name="demo" type="PythonConfigurationType" factoryName="Python" temporary="true">
 37 |       <module name="bishe" />
 38 |       <option name="INTERPRETER_OPTIONS" value="" />
 39 |       <option name="PARENT_ENVS" value="true" />
 40 |       <envs>
 41 |         <env name="PYTHONUNBUFFERED" value="1" />
 42 |       </envs>
 43 |       <option name="SDK_HOME" value="" />
 44 |       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
 45 |       <option name="IS_MODULE_SDK" value="true" />
 46 |       <option name="ADD_CONTENT_ROOTS" value="true" />
 47 |       <option name="ADD_SOURCE_ROOTS" value="true" />
 48 |       <option name="SCRIPT_NAME" value="$PROJECT_DIR$/demo.py" />
 49 |       <option name="PARAMETERS" value="" />
 50 |       <option name="SHOW_COMMAND_LINE" value="false" />
 51 |       <option name="EMULATE_TERMINAL" value="false" />
 52 |       <option name="MODULE_MODE" value="false" />
 53 |       <option name="REDIRECT_INPUT" value="false" />
 54 |       <option name="INPUT_FILE" value="" />
 55 |       <method v="2" />
 56 |     </configuration>
 57 |     <configuration name="efficientdet" type="PythonConfigurationType" factoryName="Python" temporary="true">
 58 |       <module name="bishe" />
 59 |       <option name="INTERPRETER_OPTIONS" value="" />
 60 |       <option name="PARENT_ENVS" value="true" />
 61 |       <envs>
 62 |         <env name="PYTHONUNBUFFERED" value="1" />
 63 |       </envs>
 64 |       <option name="SDK_HOME" value="" />
 65 |       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/model" />
 66 |       <option name="IS_MODULE_SDK" value="true" />
 67 |       <option name="ADD_CONTENT_ROOTS" value="true" />
 68 |       <option name="ADD_SOURCE_ROOTS" value="true" />
 69 |       <option name="SCRIPT_NAME" value="$PROJECT_DIR$/model/efficientdet.py" />
 70 |       <option name="PARAMETERS" value="" />
 71 |       <option name="SHOW_COMMAND_LINE" value="false" />
 72 |       <option name="EMULATE_TERMINAL" value="false" />
 73 |       <option name="MODULE_MODE" value="false" />
 74 |       <option name="REDIRECT_INPUT" value="false" />
 75 |       <option name="INPUT_FILE" value="" />
 76 |       <method v="2" />
 77 |     </configuration>
 78 |     <configuration name="train" type="PythonConfigurationType" factoryName="Python" temporary="true">
 79 |       <module name="bishe" />
 80 |       <option name="INTERPRETER_OPTIONS" value="" />
 81 |       <option name="PARENT_ENVS" value="true" />
 82 |       <envs>
 83 |         <env name="PYTHONUNBUFFERED" value="1" />
 84 |       </envs>
 85 |       <option name="SDK_HOME" value="" />
 86 |       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
 87 |       <option name="IS_MODULE_SDK" value="true" />
 88 |       <option name="ADD_CONTENT_ROOTS" value="true" />
 89 |       <option name="ADD_SOURCE_ROOTS" value="true" />
 90 |       <option name="SCRIPT_NAME" value="$PROJECT_DIR$/train.py" />
 91 |       <option name="PARAMETERS" value="" />
 92 |       <option name="SHOW_COMMAND_LINE" value="false" />
 93 |       <option name="EMULATE_TERMINAL" value="false" />
 94 |       <option name="MODULE_MODE" value="false" />
 95 |       <option name="REDIRECT_INPUT" value="false" />
 96 |       <option name="INPUT_FILE" value="" />
 97 |       <method v="2" />
 98 |     </configuration>
 99 |     <recent_temporary>
100 |       <list>
101 |         <item itemvalue="Python.demo" />
102 |         <item itemvalue="Python.train" />
103 |         <item itemvalue="Python.efficientdet" />
104 |       </list>
105 |     </recent_temporary>
106 |   </component>
107 |   <component name="SvnConfiguration">
108 |     <configuration />
109 |   </component>
110 |   <component name="TaskManager">
111 |     <task active="true" id="Default" summary="Default task">
112 |       <changelist id="6899d997-30ed-40c1-9a84-a7601dcb508e" name="Default Changelist" comment="" />
113 |       <created>1576477752047</created>
114 |       <option name="number" value="Default" />
115 |       <option name="presentableId" value="Default" />
116 |       <updated>1576477752047</updated>
117 |     </task>
118 |     <servers />
119 |   </component>
120 |   <component name="XDebuggerManager">
121 |     <breakpoint-manager>
122 |       <breakpoints>
123 |         <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
124 |           <url>file://$PROJECT_DIR$/model/model.py</url>
125 |           <line>198</line>
126 |           <option name="timeStamp" value="1" />
127 |         </line-breakpoint>
128 |       </breakpoints>
129 |     </breakpoint-manager>
130 |   </component>
131 | </project>


--------------------------------------------------------------------------------
/model/losses.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: huashuoshuo
  3 | # Data: 2019/12/19 15:05
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | def calc_iou(a, b):
 10 | 
 11 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
 12 |     iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
 13 |     ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
 14 | 
 15 |     iw = torch.clamp(iw, min=0)
 16 |     ih = torch.clamp(ih, min=0)
 17 | 
 18 |     ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
 19 | 
 20 |     ua = torch.clamp(ua, min=1e-8)
 21 | 
 22 |     intersection = iw * ih
 23 | 
 24 |     IoU = intersection / ua
 25 | 
 26 |     return IoU
 27 | 
 28 | class FocalLoss(nn.Module):
 29 |     #def __init__(self):
 30 | 
 31 |     def forward(self, classifications, regressions, anchors, annotations):
 32 |         alpha = 0.25
 33 |         gamma = 2.0
 34 |         batch_size = classifications.shape[0]
 35 |         classification_losses = []
 36 |         regression_losses = []
 37 | 
 38 |         anchor = anchors[0, :, :]
 39 | 
 40 |         anchor_widths  = anchor[:, 2] - anchor[:, 0]
 41 |         anchor_heights = anchor[:, 3] - anchor[:, 1]
 42 |         anchor_ctr_x   = anchor[:, 0] + 0.5 * anchor_widths
 43 |         anchor_ctr_y   = anchor[:, 1] + 0.5 * anchor_heights
 44 | 
 45 |         for j in range(batch_size):
 46 | 
 47 |             classification = classifications[j, :, :]
 48 |             regression = regressions[j, :, :]
 49 | 
 50 |             bbox_annotation = annotations[j, :, :]
 51 |             bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
 52 | 
 53 |             if bbox_annotation.shape[0] == 0:
 54 |                 regression_losses.append(torch.tensor(0).float().to(anchors.device))
 55 |                 classification_losses.append(torch.tensor(0).float().to(anchors.device))
 56 | 
 57 |                 continue
 58 | 
 59 |             classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
 60 | 
 61 |             IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
 62 | 
 63 |             IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
 64 | 
 65 |             #import pdb
 66 |             #pdb.set_trace()
 67 | 
 68 |             # compute the loss for classification
 69 |             targets = torch.ones(classification.shape) * -1
 70 |             targets = targets.to(anchors.device)
 71 | 
 72 |             targets[torch.lt(IoU_max, 0.4), :] = 0
 73 | 
 74 |             positive_indices = torch.ge(IoU_max, 0.5)
 75 | 
 76 |             num_positive_anchors = positive_indices.sum()
 77 | 
 78 |             assigned_annotations = bbox_annotation[IoU_argmax, :]
 79 | 
 80 |             targets[positive_indices, :] = 0
 81 |             targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
 82 | 
 83 |             alpha_factor = torch.ones(targets.shape) * alpha
 84 |             alpha_factor = alpha_factor.to(anchors.device)
 85 |             alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
 86 |             focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
 87 |             focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
 88 | 
 89 |             bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
 90 | 
 91 |             # cls_loss = focal_weight * torch.pow(bce, gamma)
 92 |             cls_loss = focal_weight * bce
 93 | 
 94 |             cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).to(anchors.device))
 95 | 
 96 |             classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
 97 | 
 98 |             # compute the loss for regression
 99 | 
100 |             if positive_indices.sum() > 0:
101 |                 assigned_annotations = assigned_annotations[positive_indices, :]
102 | 
103 |                 anchor_widths_pi = anchor_widths[positive_indices]
104 |                 anchor_heights_pi = anchor_heights[positive_indices]
105 |                 anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
106 |                 anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
107 | 
108 |                 gt_widths  = assigned_annotations[:, 2] - assigned_annotations[:, 0]
109 |                 gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
110 |                 gt_ctr_x   = assigned_annotations[:, 0] + 0.5 * gt_widths
111 |                 gt_ctr_y   = assigned_annotations[:, 1] + 0.5 * gt_heights
112 | 
113 |                 # clip widths to 1
114 |                 gt_widths  = torch.clamp(gt_widths, min=1)
115 |                 gt_heights = torch.clamp(gt_heights, min=1)
116 | 
117 |                 targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
118 |                 targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
119 |                 targets_dw = torch.log(gt_widths / anchor_widths_pi)
120 |                 targets_dh = torch.log(gt_heights / anchor_heights_pi)
121 | 
122 |                 targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
123 |                 targets = targets.t()
124 | 
125 |                 targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).to(anchors.device)
126 | 
127 | 
128 |                 negative_indices = ~positive_indices
129 | 
130 |                 regression_diff = torch.abs(targets - regression[positive_indices, :])
131 | 
132 |                 regression_loss = torch.where(
133 |                     torch.le(regression_diff, 1.0 / 9.0),
134 |                     0.5 * 9.0 * torch.pow(regression_diff, 2),
135 |                     regression_diff - 0.5 / 9.0
136 |                 )
137 |                 regression_losses.append(regression_loss.mean())
138 |             else:
139 |                 regression_losses.append(torch.tensor(0).float().to(anchors.device))
140 | 
141 |         return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)


--------------------------------------------------------------------------------
/model/BiFPN.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # Author: huashuoshuo
  3 | # Data: 2019/12/17 14:36
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.functional as F
  8 | from .utils import (
  9 |     round_filters,
 10 |     round_repeats,
 11 |     drop_connect,
 12 |     get_same_padding_conv2d,
 13 |     get_model_params,
 14 |     efficientnet_params,
 15 |     load_pretrained_weights,
 16 |     Swish,
 17 |     MemoryEfficientSwish,
 18 | )
 19 | 
 20 | class ConvBlock(nn.Module):
 21 |     """
 22 | 
 23 |     """
 24 |     def __init__(self, inp, oup, k_size, stride=1, padding=0, group=1):
 25 |         super().__init__()
 26 |         # Conv2d = get_same_padding_conv2d
 27 |         self.conv = nn.Conv2d(in_channels=inp, out_channels=oup, kernel_size=k_size, stride=stride, padding=padding, bias=False, groups=group).cuda()
 28 |         self.norm = nn.BatchNorm2d(num_features=oup).cuda()
 29 |         self.act = nn.ReLU(inplace=True)
 30 | 
 31 |     def forward(self, x):
 32 |         x = self.norm(self.conv(x))
 33 |         # print(self.conv)
 34 |         x = self.conv(x)
 35 |         return self.act(x)
 36 | 
 37 | 
 38 | class BiFPN(nn.Module):
 39 |     """
 40 | 
 41 |     """
 42 |     def __init__(self,oup, first=True):
 43 |         super().__init__()
 44 |         # self.features_in = features_in
 45 |         self.oup = oup
 46 |         # self.dw_conv = ConvBlock(oup, oup, k_size=3, stride=1, padding=1, group=oup)
 47 |         # self.pw_conv = ConvBlock(oup, oup, k_size=1, stride=1, padding=0)
 48 | 
 49 |         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
 50 |         self.first = first
 51 |         self.conv_gen()
 52 |         self.w_gen()
 53 |     def forward(self, features_in):
 54 |         # self.tail(x)
 55 |         # P3_in, P4_in, P5_in, P6_in, P7_in = features_in
 56 | 
 57 |         features_out = self.top_down(features_in)
 58 |         return features_out
 59 | 
 60 |     def conv_gen(self):
 61 |         # P3_in, P4_in, P5_in, P6_in, P7_in = features_in
 62 |         if not self.first:
 63 |             self.P3_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
 64 |             self.P4_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
 65 |             self.P5_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
 66 |             self.P6_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
 67 |             self.P7_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
 68 | 
 69 |         # upsample
 70 |         self.P6_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
 71 |         self.P5_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
 72 |         self.P4_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
 73 |         self.P3_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
 74 | 
 75 |         # downsample
 76 |         self.P4_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
 77 |         self.P5_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
 78 |         self.P6_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
 79 |         self.P7_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
 80 | 
 81 |     def w_gen(self):
 82 |         self.P6_td_add = wAdd(2)
 83 |         self.P5_td_add = wAdd(2)
 84 |         self.P4_td_add = wAdd(2)
 85 |         self.P3_out_add = wAdd(2)
 86 |         self.P4_out_add = wAdd(3)
 87 |         self.P5_out_add = wAdd(3)
 88 |         self.P6_out_add = wAdd(3)
 89 |         self.P7_out_add = wAdd(2)
 90 | 
 91 |     def top_down_no_w(self, features_in):
 92 |         P3_in, P4_in, P5_in, P6_in, P7_in = features_in
 93 |         if not self.first:
 94 |             P3_in = self.P3_in_conv(P3_in)
 95 |             P4_in = self.P4_in_conv(P4_in)
 96 |             P5_in = self.P5_in_conv(P5_in)
 97 |             P6_in = self.P6_in_conv(P6_in)
 98 |             P7_in = self.P7_in_conv(P7_in)
 99 | 
100 |         # upsample
101 |         P7_U = self.Resize()(P7_in)
102 |         P6_td = P7_U + P6_in
103 |         P6_td = self.P6_td_conv(P6_td)
104 |         P6_U = self.Resize()(P6_td)
105 |         P5_td = P6_U + P5_in
106 |         P5_td = self.P5_td_conv(P5_td)
107 |         P5_U = self.Resize()(P5_td)
108 |         P4_td = P5_U + P4_in
109 |         P4_td = self.P4_td_conv(P4_td)
110 |         P4_U = self.Resize()(P4_td)
111 |         P3_out = P4_U + P3_in
112 |         P3_out = self.P3_out_conv(P3_out)
113 | 
114 |         # downsample
115 |         P3_D = self.pool(P3_out)
116 |         P4_out = P3_D + P4_td + P4_in
117 |         P4_out = self.P4_out_conv(P4_out)
118 |         P4_D = self.pool(P4_out)
119 |         P5_out = P4_D + P5_td + P5_in
120 |         P5_out = self.P5_out_conv(P5_out)
121 |         P5_D = self.pool(P5_out)
122 |         P6_out = P5_D + P6_td + P6_in
123 |         P6_out = self.P6_out_conv(P6_out)
124 |         P6_D = self.pool(P6_out)
125 |         P7_out = P6_D + P7_in
126 |         P7_out = self.P7_out_conv(P7_out)
127 |         return [P3_out, P4_out, P5_out, P6_out, P7_out]
128 | 
129 |     def top_down(self, features_in):
130 |         P3_in, P4_in, P5_in, P6_in, P7_in = features_in
131 |         if not self.first:
132 |             P3_in = self.P3_in_conv(P3_in)
133 |             P4_in = self.P4_in_conv(P4_in)
134 |             P5_in = self.P5_in_conv(P5_in)
135 |             P6_in = self.P6_in_conv(P6_in)
136 |             P7_in = self.P7_in_conv(P7_in)
137 | 
138 |         # upsample
139 |         P7_U = self.Resize()(P7_in)
140 |         P6_td = self.P6_td_add([P6_in, P7_U])
141 |         P6_td = self.P6_td_conv(P6_td)
142 |         P6_U = self.Resize()(P6_td)
143 |         P5_td = self.P5_td_add([P5_in, P6_U])
144 |         P5_td = self.P5_td_conv(P5_td)
145 |         P5_U = self.Resize()(P5_td)
146 |         P4_td = self.P4_td_add([P4_in, P5_U])
147 |         P4_td = self.P4_td_conv(P4_td)
148 |         P4_U = self.Resize()(P4_td)
149 |         P3_out = self.P3_out_add([P3_in, P4_U])
150 |         P3_out = self.P3_out_conv(P3_out)
151 | 
152 |         # downsample
153 |         P3_D = self.pool(P3_out)
154 |         P4_out = self.P4_out_add([P3_D, P4_td, P4_in])
155 |         P4_out = self.P4_out_conv(P4_out)
156 |         P4_D = self.pool(P4_out)
157 |         P5_out = self.P5_out_add([P4_D, P5_td, P5_in])
158 |         P5_out = self.P5_out_conv(P5_out)
159 |         P5_D = self.pool(P5_out)
160 |         P6_out = self.P6_out_add([P5_D, P6_td, P6_in])
161 |         P6_out = self.P6_out_conv(P6_out)
162 |         P6_D = self.pool(P6_out)
163 |         P7_out = self.P7_out_add([P6_D, P7_in])
164 |         P7_out = self.P7_out_conv(P7_out)
165 | 
166 |         return [P3_out, P4_out, P5_out, P6_out, P7_out]
167 | 
168 | 
169 | 
170 |     def Resize(self, scale=2, mode='nearest'):
171 |         upsample = nn.Upsample(scale_factor=scale, mode=mode)
172 |         return upsample
173 | 
174 |     # def get_weight(self):
175 | 
176 | 
177 | class wAdd(nn.Module):
178 |     """
179 | 
180 |     """
181 |     def __init__(self, num_in):
182 |         super().__init__()
183 |         self.epsilon = 1e-4
184 |         self.w = nn.Parameter(torch.Tensor(num_in).fill_(1 / num_in))
185 | 
186 |     def forward(self, inputs):
187 |         # len(inputs)
188 |         num_in = len(inputs)
189 |         # w = nn.Parameter(torch.Tensor(num_in).fill_(1 / num_in))
190 |         w = self.w.cuda()
191 |         # x = [w[i] * inputs[i] for i in range(num_in)]
192 |         x = 0
193 |         # print(w[0])
194 |         for i in range(num_in):
195 |             x += w[i] * inputs[i]
196 |         x /= (torch.sum(w) + self.epsilon)
197 |         # x = x.cuda()
198 |         return x
199 |         # x = torch.sum(x)
200 | 
201 | 
202 | 
203 | 


--------------------------------------------------------------------------------
/model/util.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import torchvision.ops as ops
  5 | 
  6 | 
  7 | def conv3x3(in_planes, out_planes, stride=1):
  8 |     """3x3 convolution with padding"""
  9 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 10 |                      padding=1, bias=False)
 11 | 
 12 | class BasicBlock(nn.Module):
 13 |     expansion = 1
 14 | 
 15 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 16 |         super(BasicBlock, self).__init__()
 17 |         self.conv1 = conv3x3(inplanes, planes, stride)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.relu = nn.ReLU(inplace=True)
 20 |         self.conv2 = conv3x3(planes, planes)
 21 |         self.bn2 = nn.BatchNorm2d(planes)
 22 |         self.downsample = downsample
 23 |         self.stride = stride
 24 | 
 25 |     def forward(self, x):
 26 |         residual = x
 27 | 
 28 |         out = self.conv1(x)
 29 |         out = self.bn1(out)
 30 |         out = self.relu(out)
 31 | 
 32 |         out = self.conv2(out)
 33 |         out = self.bn2(out)
 34 | 
 35 |         if self.downsample is not None:
 36 |             residual = self.downsample(x)
 37 | 
 38 |         out += residual
 39 |         out = self.relu(out)
 40 | 
 41 |         return out
 42 | 
 43 | 
 44 | class Bottleneck(nn.Module):
 45 |     expansion = 4
 46 | 
 47 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 48 |         super(Bottleneck, self).__init__()
 49 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 50 |         self.bn1 = nn.BatchNorm2d(planes)
 51 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 52 |                                padding=1, bias=False)
 53 |         self.bn2 = nn.BatchNorm2d(planes)
 54 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 55 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 56 |         self.relu = nn.ReLU(inplace=True)
 57 |         self.downsample = downsample
 58 |         self.stride = stride
 59 | 
 60 |     def forward(self, x):
 61 |         residual = x
 62 | 
 63 |         out = self.conv1(x)
 64 |         out = self.bn1(out)
 65 |         out = self.relu(out)
 66 | 
 67 |         out = self.conv2(out)
 68 |         out = self.bn2(out)
 69 |         out = self.relu(out)
 70 | 
 71 |         out = self.conv3(out)
 72 |         out = self.bn3(out)
 73 | 
 74 |         if self.downsample is not None:
 75 |             residual = self.downsample(x)
 76 | 
 77 |         out += residual
 78 |         out = self.relu(out)
 79 | 
 80 |         return out
 81 | 
 82 | class BBoxTransform(nn.Module):
 83 | 
 84 |     def __init__(self, mean=None, std=None):
 85 |         super(BBoxTransform, self).__init__()
 86 |         if mean is None:
 87 |             self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
 88 |         else:
 89 |             self.mean = mean
 90 |         if std is None:
 91 |             self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
 92 |         else:
 93 |             self.std = std
 94 | 
 95 |     def forward(self, boxes, deltas):
 96 | 
 97 |         widths  = boxes[:, :, 2] - boxes[:, :, 0]
 98 |         heights = boxes[:, :, 3] - boxes[:, :, 1]
 99 |         ctr_x   = boxes[:, :, 0] + 0.5 * widths
100 |         ctr_y   = boxes[:, :, 1] + 0.5 * heights
101 | 
102 |         dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
103 |         dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
104 |         dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
105 |         dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
106 | 
107 |         pred_ctr_x = ctr_x + dx * widths
108 |         pred_ctr_y = ctr_y + dy * heights
109 |         pred_w     = torch.exp(dw) * widths
110 |         pred_h     = torch.exp(dh) * heights
111 | 
112 |         pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
113 |         pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
114 |         pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
115 |         pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
116 | 
117 |         pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
118 | 
119 |         return pred_boxes
120 | 
121 | 
122 | class ClipBoxes(nn.Module):
123 | 
124 |     def __init__(self, width=None, height=None):
125 |         super(ClipBoxes, self).__init__()
126 | 
127 |     def forward(self, boxes, img):
128 | 
129 |         batch_size, num_channels, height, width = img.shape
130 | 
131 |         boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
132 |         boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
133 | 
134 |         boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
135 |         boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
136 |       
137 |         return boxes
138 | 
139 | class Filter_boxes(nn.Module):
140 |     """
141 | 
142 |     """
143 |     def __init__(self, args):
144 |         super().__init__()
145 |         self.threshold = args.threshold
146 | 
147 |     def forward(self, inputs):
148 |         transformed_anchors, classification, scores = inputs
149 | 
150 |         boxes_dict, scores_dict = self.select(transformed_anchors, classification)
151 |         box = []
152 |         score =[]
153 |         cls = []
154 |         for i in range(80):
155 |             anchors_nms_idx = ops.nms(boxes=boxes_dict[i], scores=scores_dict[i], iou_threshold=0.5)
156 |             if len(scores_dict[i])>0:
157 |                 box.append(boxes_dict[i][anchors_nms_idx, :])
158 |                 score.append(scores_dict[i][anchors_nms_idx])
159 |                 cls.append(i)
160 | 
161 | 
162 |         return box, score, cls
163 | 
164 |     def select(self, transformed_anchors, classification):
165 |         boxes = {}
166 |         scores = {}
167 |         for cls in range(80):
168 |             cls_score = classification[0, :, cls]
169 |             select_mask = cls_score > self.threshold
170 |             boxes[cls] = transformed_anchors[0, select_mask, :]
171 |             scores[cls] = cls_score[select_mask]
172 |         return boxes, scores
173 | 
174 | num2name = {0: u'__background__',
175 |  1: u'person',
176 |  2: u'bicycle',
177 |  3: u'car',
178 |  4: u'motorcycle',
179 |  5: u'airplane',
180 |  6: u'bus',
181 |  7: u'train',
182 |  8: u'truck',
183 |  9: u'boat',
184 |  10: u'traffic light',
185 |  11: u'fire hydrant',
186 |  12: u'stop sign',
187 |  13: u'parking meter',
188 |  14: u'bench',
189 |  15: u'bird',
190 |  16: u'cat',
191 |  17: u'dog',
192 |  18: u'horse',
193 |  19: u'sheep',
194 |  20: u'cow',
195 |  21: u'elephant',
196 |  22: u'bear',
197 |  23: u'zebra',
198 |  24: u'giraffe',
199 |  25: u'backpack',
200 |  26: u'umbrella',
201 |  27: u'handbag',
202 |  28: u'tie',
203 |  29: u'suitcase',
204 |  30: u'frisbee',
205 |  31: u'skis',
206 |  32: u'snowboard',
207 |  33: u'sports ball',
208 |  34: u'kite',
209 |  35: u'baseball bat',
210 |  36: u'baseball glove',
211 |  37: u'skateboard',
212 |  38: u'surfboard',
213 |  39: u'tennis racket',
214 |  40: u'bottle',
215 |  41: u'wine glass',
216 |  42: u'cup',
217 |  43: u'fork',
218 |  44: u'knife',
219 |  45: u'spoon',
220 |  46: u'bowl',
221 |  47: u'banana',
222 |  48: u'apple',
223 |  49: u'sandwich',
224 |  50: u'orange',
225 |  51: u'broccoli',
226 |  52: u'carrot',
227 |  53: u'hot dog',
228 |  54: u'pizza',
229 |  55: u'donut',
230 |  56: u'cake',
231 |  57: u'chair',
232 |  58: u'couch',
233 |  59: u'potted plant',
234 |  60: u'bed',
235 |  61: u'dining table',
236 |  62: u'toilet',
237 |  63: u'tv',
238 |  64: u'laptop',
239 |  65: u'mouse',
240 |  66: u'remote',
241 |  67: u'keyboard',
242 |  68: u'cell phone',
243 |  69: u'microwave',
244 |  70: u'oven',
245 |  71: u'toaster',
246 |  72: u'sink',
247 |  73: u'refrigerator',
248 |  74: u'book',
249 |  75: u'clock',
250 |  76: u'vase',
251 |  77: u'scissors',
252 |  78: u'teddy bear',
253 |  79: u'hair drier',
254 |  80: u'toothbrush'}


--------------------------------------------------------------------------------
/model/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.nn import functional as F
  4 | 
  5 | from .utils import (
  6 |     round_filters,
  7 |     round_repeats,
  8 |     drop_connect,
  9 |     get_same_padding_conv2d,
 10 |     get_model_params,
 11 |     efficientnet_params,
 12 |     load_pretrained_weights,
 13 |     Swish,
 14 |     MemoryEfficientSwish,
 15 | )
 16 | 
 17 | class MBConvBlock(nn.Module):
 18 |     """
 19 |     Mobile Inverted Residual Bottleneck Block
 20 | 
 21 |     Args:
 22 |         block_args (namedtuple): BlockArgs, see above
 23 |         global_params (namedtuple): GlobalParam, see above
 24 | 
 25 |     Attributes:
 26 |         has_se (bool): Whether the block contains a Squeeze and Excitation layer.
 27 |     """
 28 | 
 29 |     def __init__(self, block_args, global_params):
 30 |         super().__init__()
 31 |         self._block_args = block_args
 32 |         self._bn_mom = 1 - global_params.batch_norm_momentum
 33 |         self._bn_eps = global_params.batch_norm_epsilon
 34 |         self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
 35 |         self.id_skip = block_args.id_skip  # skip connection and drop connect
 36 | 
 37 |         # Get static or dynamic convolution depending on image size
 38 |         Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
 39 | 
 40 |         # Expansion phase
 41 |         inp = self._block_args.input_filters  # number of input channels
 42 |         oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
 43 |         if self._block_args.expand_ratio != 1:
 44 |             self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
 45 |             self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
 46 | 
 47 |         # Depthwise convolution phase
 48 |         k = self._block_args.kernel_size
 49 |         s = self._block_args.stride
 50 |         self._depthwise_conv = Conv2d(
 51 |             in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
 52 |             kernel_size=k, stride=s, bias=False)
 53 |         self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
 54 | 
 55 |         # Squeeze and Excitation layer, if desired
 56 |         if self.has_se:
 57 |             num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
 58 |             self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
 59 |             self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
 60 | 
 61 |         # Output phase
 62 |         final_oup = self._block_args.output_filters
 63 |         self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
 64 |         self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
 65 |         self._swish = MemoryEfficientSwish()
 66 | 
 67 |     def forward(self, inputs, drop_connect_rate=None):
 68 |         """
 69 |         :param inputs: input tensor
 70 |         :param drop_connect_rate: drop connect rate (float, between 0 and 1)
 71 |         :return: output of block
 72 |         """
 73 | 
 74 |         # Expansion and Depthwise Convolution
 75 |         x = inputs
 76 |         if self._block_args.expand_ratio != 1:
 77 |             x = self._swish(self._bn0(self._expand_conv(inputs)))
 78 |         x = self._swish(self._bn1(self._depthwise_conv(x)))
 79 | 
 80 |         # Squeeze and Excitation
 81 |         if self.has_se:
 82 |             x_squeezed = F.adaptive_avg_pool2d(x, 1)
 83 |             x_squeezed = self._se_expand(self._swish(self._se_reduce(x_squeezed)))
 84 |             x = torch.sigmoid(x_squeezed) * x
 85 | 
 86 |         x = self._bn2(self._project_conv(x))
 87 | 
 88 |         # Skip connection and drop connect
 89 |         input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
 90 |         if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
 91 |             if drop_connect_rate:
 92 |                 x = drop_connect(x, p=drop_connect_rate, training=self.training)
 93 |             x = x + inputs  # skip connection
 94 |         return x
 95 | 
 96 |     def set_swish(self, memory_efficient=True):
 97 |         """Sets swish function as memory efficient (for training) or standard (for export)"""
 98 |         self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
 99 | 
100 | 
101 | class EfficientNet(nn.Module):
102 |     """
103 |     An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
104 | 
105 |     Args:
106 |         blocks_args (list): A list of BlockArgs to construct blocks
107 |         global_params (namedtuple): A set of GlobalParams shared between blocks
108 | 
109 |     Example:
110 |         model = EfficientNet.from_pretrained('efficientnet-b0')
111 | 
112 |     """
113 | 
114 |     def __init__(self, blocks_args=None, global_params=None):
115 |         super().__init__()
116 |         assert isinstance(blocks_args, list), 'blocks_args should be a list'
117 |         assert len(blocks_args) > 0, 'block args must be greater than 0'
118 |         self._global_params = global_params
119 |         self._blocks_args = blocks_args
120 | 
121 |         # Get static or dynamic convolution depending on image size
122 |         Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
123 | 
124 |         # Batch norm parameters
125 |         bn_mom = 1 - self._global_params.batch_norm_momentum
126 |         bn_eps = self._global_params.batch_norm_epsilon
127 | 
128 |         # Stem
129 |         in_channels = 3  # rgb
130 |         out_channels = round_filters(32, self._global_params)  # number of output channels
131 |         self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
132 |         self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
133 | 
134 |         # Build blocks
135 |         self._blocks = nn.ModuleList([])
136 |         for block_args in self._blocks_args:
137 | 
138 |             # Update block input and output filters based on depth multiplier.
139 |             block_args = block_args._replace(
140 |                 input_filters=round_filters(block_args.input_filters, self._global_params),
141 |                 output_filters=round_filters(block_args.output_filters, self._global_params),
142 |                 num_repeat=round_repeats(block_args.num_repeat, self._global_params)
143 |             )
144 | 
145 |             # The first block needs to take care of stride and filter size increase.
146 |             self._blocks.append(MBConvBlock(block_args, self._global_params))
147 |             if block_args.num_repeat > 1:
148 |                 block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
149 |             for _ in range(block_args.num_repeat - 1):
150 |                 self._blocks.append(MBConvBlock(block_args, self._global_params))
151 | 
152 |         # Head
153 |         in_channels = block_args.output_filters  # output of final block
154 |         out_channels = round_filters(1280, self._global_params)
155 |         self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
156 |         self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
157 | 
158 |         # Final linear layer
159 |         self._avg_pooling = nn.AdaptiveAvgPool2d(1)
160 |         self._dropout = nn.Dropout(self._global_params.dropout_rate)
161 |         self._fc = nn.Linear(out_channels, self._global_params.num_classes)
162 |         self._swish = MemoryEfficientSwish()
163 | 
164 |     def set_swish(self, memory_efficient=True):
165 |         """Sets swish function as memory efficient (for training) or standard (for export)"""
166 |         self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
167 |         for block in self._blocks:
168 |             block.set_swish(memory_efficient)
169 | 
170 | 
171 |     def extract_features(self, inputs):
172 |         """ Returns output of the final convolution layer """
173 | 
174 |         # Stem
175 |         x = self._swish(self._bn0(self._conv_stem(inputs)))
176 |         x_before = x
177 |         features = []
178 |         block_index = 0
179 |         repeat = 0
180 |         # Blocks
181 |         for idx, block in enumerate(self._blocks):
182 |             drop_connect_rate = self._global_params.drop_connect_rate
183 |             if drop_connect_rate:
184 |                 drop_connect_rate *= float(idx) / len(self._blocks)
185 |             x = block(x, drop_connect_rate=drop_connect_rate)
186 |             if x_before.shape[2] != x.shape[2]:
187 |                 features.append(x_before)
188 |             x_before = x
189 |         features.append(x)
190 |             # repeat += 1
191 |             # if(repeat == self._blocks_args[block_index].num_repeat):
192 |             #     repeat = 0
193 |             #     block_index += 1
194 |             #     features.append(x)
195 | 
196 |         # Head
197 |         # x = self._swish(self._bn1(self._conv_head(x)))
198 | 
199 |         return features
200 | 
201 |     def forward(self, inputs):
202 |         """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
203 |         # bs = inputs.size(0)
204 |         # Convolution layers
205 |         x = self.extract_features(inputs)
206 | 
207 |         # Pooling and final linear layer
208 |         # x = self._avg_pooling(x)
209 |         # x = x.view(bs, -1)
210 |         # x = self._dropout(x)
211 |         # x = self._fc(x)
212 |         return x
213 | 
214 |     @classmethod
215 |     def from_name(cls, model_name, override_params=None):
216 |         cls._check_model_name_is_valid(model_name)
217 |         blocks_args, global_params = get_model_params(model_name, override_params)
218 |         return cls(blocks_args, global_params)
219 | 
220 |     @classmethod
221 |     def from_pretrained(cls, args, num_classes=1000, in_channels = 3):
222 |         print(args)
223 |         model_name = args.backbone
224 |         print('backbone', model_name)
225 |         model = cls.from_name(model_name, override_params={'num_classes': num_classes})
226 |         if args.backbone_pretrained:
227 |             load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000))
228 | 
229 |         if in_channels != 3:
230 |             Conv2d = get_same_padding_conv2d(image_size = model._global_params.image_size)
231 |             out_channels = round_filters(32, model._global_params)
232 |             model._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
233 |         return model
234 |     
235 |     @classmethod
236 |     def from_pretrained(cls, args, num_classes=1000):
237 |         # print(model_name)
238 |         model_name = args.backbone
239 |         model = cls.from_name(model_name, override_params={'num_classes': num_classes})
240 |         if args.backbone_pretrained:
241 |             load_pretrained_weights(model, model_name, load_fc=False)
242 | 
243 |         return model
244 | 
245 |     @classmethod
246 |     def get_image_size(cls, model_name):
247 |         cls._check_model_name_is_valid(model_name)
248 |         _, _, res, _ = efficientnet_params(model_name)
249 |         return res
250 | 
251 |     @classmethod
252 |     def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False):
253 |         """ Validates model name. None that pretrained weights are only available for
254 |         the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """
255 |         num_models = 4 if also_need_pretrained_weights else 8
256 |         valid_models = ['efficientnet-b'+str(i) for i in range(num_models)]
257 |         if model_name not in valid_models:
258 |             raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
259 | 
260 |     def get_list_feature(self):
261 |         list_feature = [80, 192, 320]
262 |         # s_before = self._blocks_args[0]
263 |         # for idx in range(len(self._blocks_args)-1):
264 |             # print(self._blocks_args[idx].stride)
265 |             # if self._blocks_args[idx].stride == self._blocks_args[idx+1].stride:
266 |                 # list_feature.append(self._blocks_args[idx].output_filters)
267 | 
268 |         return list_feature
269 | 


--------------------------------------------------------------------------------
/model/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains helper functions for building the model and for loading model parameters.
  3 | These helper functions are built to mirror those in the official TensorFlow implementation.
  4 | """
  5 | 
  6 | import re
  7 | import math
  8 | import collections
  9 | from functools import partial
 10 | import torch
 11 | from torch import nn
 12 | from torch.nn import functional as F
 13 | from torch.utils import model_zoo
 14 | 
 15 | ########################################################################
 16 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
 17 | ########################################################################
 18 | 
 19 | 
 20 | # Parameters for the entire model (stem, all blocks, and head)
 21 | GlobalParams = collections.namedtuple('GlobalParams', [
 22 |     'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
 23 |     'num_classes', 'width_coefficient', 'depth_coefficient',
 24 |     'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size'])
 25 | 
 26 | # Parameters for an individual model block
 27 | BlockArgs = collections.namedtuple('BlockArgs', [
 28 |     'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
 29 |     'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
 30 | 
 31 | # Change namedtuple defaults
 32 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
 33 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
 34 | 
 35 | 
 36 | class SwishImplementation(torch.autograd.Function):
 37 |     @staticmethod
 38 |     def forward(ctx, i):
 39 |         result = i * torch.sigmoid(i)
 40 |         ctx.save_for_backward(i)
 41 |         return result
 42 | 
 43 |     @staticmethod
 44 |     def backward(ctx, grad_output):
 45 |         i = ctx.saved_variables[0]
 46 |         sigmoid_i = torch.sigmoid(i)
 47 |         return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
 48 | 
 49 | 
 50 | class MemoryEfficientSwish(nn.Module):
 51 |     def forward(self, x):
 52 |         return SwishImplementation.apply(x)
 53 | 
 54 | class Swish(nn.Module):
 55 |     def forward(self, x):
 56 |         return x * torch.sigmoid(x)
 57 | 
 58 | 
 59 | def round_filters(filters, global_params):
 60 |     """ Calculate and round number of filters based on depth multiplier. """
 61 |     multiplier = global_params.width_coefficient
 62 |     if not multiplier:
 63 |         return filters
 64 |     divisor = global_params.depth_divisor
 65 |     min_depth = global_params.min_depth
 66 |     filters *= multiplier
 67 |     min_depth = min_depth or divisor
 68 |     new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
 69 |     if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
 70 |         new_filters += divisor
 71 |     return int(new_filters)
 72 | 
 73 | 
 74 | def round_repeats(repeats, global_params):
 75 |     """ Round number of filters based on depth multiplier. """
 76 |     multiplier = global_params.depth_coefficient
 77 |     if not multiplier:
 78 |         return repeats
 79 |     return int(math.ceil(multiplier * repeats))
 80 | 
 81 | 
 82 | def drop_connect(inputs, p, training):
 83 |     """ Drop connect. """
 84 |     if not training: return inputs
 85 |     batch_size = inputs.shape[0]
 86 |     keep_prob = 1 - p
 87 |     random_tensor = keep_prob
 88 |     random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
 89 |     binary_tensor = torch.floor(random_tensor)
 90 |     output = inputs / keep_prob * binary_tensor
 91 |     return output
 92 | 
 93 | 
 94 | def get_same_padding_conv2d(image_size=None):
 95 |     """ Chooses static padding if you have specified an image size, and dynamic padding otherwise.
 96 |         Static padding is necessary for ONNX exporting of models. """
 97 |     if image_size is None:
 98 |         return Conv2dDynamicSamePadding
 99 |     else:
100 |         return partial(Conv2dStaticSamePadding, image_size=image_size)
101 | 
102 | 
103 | class Conv2dDynamicSamePadding(nn.Conv2d):
104 |     """ 2D Convolutions like TensorFlow, for a dynamic image size """
105 | 
106 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
107 |         super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
108 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
109 | 
110 |     def forward(self, x):
111 |         ih, iw = x.size()[-2:]
112 |         kh, kw = self.weight.size()[-2:]
113 |         sh, sw = self.stride
114 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
115 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
116 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
117 |         if pad_h > 0 or pad_w > 0:
118 |             x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
119 |         return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
120 | 
121 | 
122 | class Conv2dStaticSamePadding(nn.Conv2d):
123 |     """ 2D Convolutions like TensorFlow, for a fixed image size"""
124 | 
125 |     def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
126 |         super().__init__(in_channels, out_channels, kernel_size, **kwargs)
127 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
128 | 
129 |         # Calculate padding based on image size and save it
130 |         assert image_size is not None
131 |         ih, iw = image_size if type(image_size) == list else [image_size, image_size]
132 |         kh, kw = self.weight.size()[-2:]
133 |         sh, sw = self.stride
134 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
135 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
136 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
137 |         if pad_h > 0 or pad_w > 0:
138 |             self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
139 |         else:
140 |             self.static_padding = Identity()
141 | 
142 |     def forward(self, x):
143 |         x = self.static_padding(x)
144 |         x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
145 |         return x
146 | 
147 | 
148 | class Identity(nn.Module):
149 |     def __init__(self, ):
150 |         super(Identity, self).__init__()
151 | 
152 |     def forward(self, input):
153 |         return input
154 | 
155 | 
156 | ########################################################################
157 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
158 | ########################################################################
159 | 
160 | 
161 | def efficientnet_params(model_name):
162 |     """ Map EfficientNet model name to parameter coefficients. """
163 |     params_dict = {
164 |         # Coefficients:   width,depth,res,dropout
165 |         'efficientnet-b0': (1.0, 1.0, 224, 0.2),
166 |         'efficientnet-b1': (1.0, 1.1, 240, 0.2),
167 |         'efficientnet-b2': (1.1, 1.2, 260, 0.3),
168 |         'efficientnet-b3': (1.2, 1.4, 300, 0.3),
169 |         'efficientnet-b4': (1.4, 1.8, 380, 0.4),
170 |         'efficientnet-b5': (1.6, 2.2, 456, 0.4),
171 |         'efficientnet-b6': (1.8, 2.6, 528, 0.5),
172 |         'efficientnet-b7': (2.0, 3.1, 600, 0.5),
173 |     }
174 |     return params_dict[model_name]
175 | 
176 | 
177 | class BlockDecoder(object):
178 |     """ Block Decoder for readability, straight from the official TensorFlow repository """
179 | 
180 |     @staticmethod
181 |     def _decode_block_string(block_string):
182 |         """ Gets a block through a string notation of arguments. """
183 |         assert isinstance(block_string, str)
184 | 
185 |         ops = block_string.split('_')
186 |         options = {}
187 |         for op in ops:
188 |             splits = re.split(r'(\d.*)', op)
189 |             if len(splits) >= 2:
190 |                 key, value = splits[:2]
191 |                 options[key] = value
192 | 
193 |         # Check stride
194 |         assert (('s' in options and len(options['s']) == 1) or
195 |                 (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
196 | 
197 |         return BlockArgs(
198 |             kernel_size=int(options['k']),
199 |             num_repeat=int(options['r']),
200 |             input_filters=int(options['i']),
201 |             output_filters=int(options['o']),
202 |             expand_ratio=int(options['e']),
203 |             id_skip=('noskip' not in block_string),
204 |             se_ratio=float(options['se']) if 'se' in options else None,
205 |             stride=[int(options['s'][0])])
206 | 
207 |     @staticmethod
208 |     def _encode_block_string(block):
209 |         """Encodes a block to a string."""
210 |         args = [
211 |             'r%d' % block.num_repeat,
212 |             'k%d' % block.kernel_size,
213 |             's%d%d' % (block.strides[0], block.strides[1]),
214 |             'e%s' % block.expand_ratio,
215 |             'i%d' % block.input_filters,
216 |             'o%d' % block.output_filters
217 |         ]
218 |         if 0 < block.se_ratio <= 1:
219 |             args.append('se%s' % block.se_ratio)
220 |         if block.id_skip is False:
221 |             args.append('noskip')
222 |         return '_'.join(args)
223 | 
224 |     @staticmethod
225 |     def decode(string_list):
226 |         """
227 |         Decodes a list of string notations to specify blocks inside the network.
228 | 
229 |         :param string_list: a list of strings, each string is a notation of block
230 |         :return: a list of BlockArgs namedtuples of block args
231 |         """
232 |         assert isinstance(string_list, list)
233 |         blocks_args = []
234 |         for block_string in string_list:
235 |             blocks_args.append(BlockDecoder._decode_block_string(block_string))
236 |         return blocks_args
237 | 
238 |     @staticmethod
239 |     def encode(blocks_args):
240 |         """
241 |         Encodes a list of BlockArgs to a list of strings.
242 | 
243 |         :param blocks_args: a list of BlockArgs namedtuples of block args
244 |         :return: a list of strings, each string is a notation of block
245 |         """
246 |         block_strings = []
247 |         for block in blocks_args:
248 |             block_strings.append(BlockDecoder._encode_block_string(block))
249 |         return block_strings
250 | 
251 | 
252 | def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2,
253 |                  drop_connect_rate=0.2, image_size=None, num_classes=1000):
254 |     """ Creates a efficientnet model. """
255 | 
256 |     blocks_args = [
257 |         'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s11_e6_i16_o24_se0.25',
258 |         'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
259 |         'r3_k5_s22_e6_i80_o112_se0.25', 'r4_k5_s11_e6_i112_o192_se0.25',
260 |         'r1_k3_s22_e6_i192_o320_se0.25',
261 |     ]
262 |     blocks_args = BlockDecoder.decode(blocks_args)
263 | 
264 |     global_params = GlobalParams(
265 |         batch_norm_momentum=0.99,
266 |         batch_norm_epsilon=1e-3,
267 |         dropout_rate=dropout_rate,
268 |         drop_connect_rate=drop_connect_rate,
269 |         # data_format='channels_last',  # removed, this is always true in PyTorch
270 |         num_classes=num_classes,
271 |         width_coefficient=width_coefficient,
272 |         depth_coefficient=depth_coefficient,
273 |         depth_divisor=8,
274 |         min_depth=None,
275 |         image_size=image_size,
276 |     )
277 | 
278 |     return blocks_args, global_params
279 | 
280 | 
281 | def get_model_params(model_name, override_params):
282 |     """ Get the block args and global params for a given model """
283 |     if model_name.startswith('efficientnet'):
284 |         w, d, s, p = efficientnet_params(model_name)
285 |         # note: all models have drop connect rate = 0.2
286 |         blocks_args, global_params = efficientnet(
287 |             width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
288 |     else:
289 |         raise NotImplementedError('model name is not pre-defined: %s' % model_name)
290 |     if override_params:
291 |         # ValueError will be raised here if override_params has fields not included in global_params.
292 |         global_params = global_params._replace(**override_params)
293 |     return blocks_args, global_params
294 | 
295 | 
296 | url_map = {
297 |     'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth',
298 |     'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth',
299 |     'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth',
300 |     'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth',
301 |     'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth',
302 |     'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth',
303 |     'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth',
304 |     'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth',
305 | }
306 | 
307 | 
308 | def load_pretrained_weights(model, model_name, load_fc=False):
309 |     """ Loads pretrained weights, and downloads if loading for the first time. """
310 |     state_dict = model_zoo.load_url(url_map[model_name])
311 |     # state_dict = torch.load('/home/pre_trained/efficientnet-b0-355c32eb.pth')
312 |     if load_fc:
313 |         model.load_state_dict(state_dict)
314 |     else:
315 |         state_dict.pop('_fc.weight')
316 |         state_dict.pop('_fc.bias')
317 |         res = model.load_state_dict(state_dict, strict=False)
318 |         assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
319 |     print('Loaded pretrained weights for {}'.format(model_name))
320 | 


--------------------------------------------------------------------------------
/dataset/dataloader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import sys
  3 | import os
  4 | import torch
  5 | import numpy as np
  6 | import random
  7 | import csv
  8 | import cv2
  9 | from torchvision import transforms
 10 | from torch.utils.data import Dataset, DataLoader
 11 | from torchvision import transforms, utils
 12 | from torch.utils.data.sampler import Sampler
 13 | 
 14 | from pycocotools.coco import COCO
 15 | 
 16 | import skimage.io
 17 | import skimage.transform
 18 | import skimage.color
 19 | import skimage
 20 | 
 21 | from PIL import Image
 22 | 
 23 | 
 24 | class CocoDataset(Dataset):
 25 |     """Coco dataset."""
 26 | 
 27 |     def __init__(self, root_dir, set_name='train2017', transform=None):
 28 |         """
 29 |         Args:
 30 |             root_dir (string): COCO directory.
 31 |             transform (callable, optional): Optional transform to be applied
 32 |                 on a sample.
 33 |         """
 34 |         self.root_dir = root_dir
 35 |         self.set_name = set_name
 36 |         self.transform = transform
 37 | 
 38 |         self.coco      = COCO(os.path.join(self.root_dir, 'annotations_trainval2017', 'annotations', 'instances_' + self.set_name + '.json'))
 39 |         self.image_ids = self.coco.getImgIds()
 40 | 
 41 |         self.load_classes()
 42 | 
 43 |     def load_classes(self):
 44 |         # load class names (name -> label)
 45 |         categories = self.coco.loadCats(self.coco.getCatIds())
 46 |         categories.sort(key=lambda x: x['id'])
 47 | 
 48 |         self.classes             = {}
 49 |         self.coco_labels         = {}
 50 |         self.coco_labels_inverse = {}
 51 |         for c in categories:
 52 |             self.coco_labels[len(self.classes)] = c['id']
 53 |             self.coco_labels_inverse[c['id']] = len(self.classes)
 54 |             self.classes[c['name']] = len(self.classes)
 55 | 
 56 |         # also load the reverse (label -> name)
 57 |         self.labels = {}
 58 |         for key, value in self.classes.items():
 59 |             self.labels[value] = key
 60 | 
 61 |     def __len__(self):
 62 |         return len(self.image_ids)
 63 | 
 64 |     def __getitem__(self, idx):
 65 | 
 66 |         img = self.load_image(idx)
 67 |         annot = self.load_annotations(idx)
 68 |         sample = {'img': img, 'annot': annot}
 69 |         if self.transform:
 70 |             sample = self.transform(sample)
 71 | 
 72 |         return sample
 73 | 
 74 |     def load_image(self, image_index):
 75 |         image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
 76 |         path       = os.path.join(self.root_dir, self.set_name, image_info['file_name'])
 77 |         img = skimage.io.imread(path)
 78 |         # img = cv2.imread(path)
 79 |         # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
 80 |         if len(img.shape) == 2:
 81 |             img = skimage.color.gray2rgb(img)
 82 |             # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
 83 | 
 84 |         return img.astype(np.float32) / 255.0
 85 | 
 86 |     def load_annotations(self, image_index):
 87 |         # get ground truth annotations
 88 |         annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
 89 |         annotations     = np.zeros((0, 5))
 90 | 
 91 |         # some images appear to miss annotations (like image with id 257034)
 92 |         if len(annotations_ids) == 0:
 93 |             return annotations
 94 | 
 95 |         # parse annotations
 96 |         coco_annotations = self.coco.loadAnns(annotations_ids)
 97 |         for idx, a in enumerate(coco_annotations):
 98 | 
 99 |             # some annotations have basically no width / height, skip them
100 |             if a['bbox'][2] < 1 or a['bbox'][3] < 1:
101 |                 continue
102 | 
103 |             annotation        = np.zeros((1, 5))
104 |             annotation[0, :4] = a['bbox']
105 |             annotation[0, 4]  = self.coco_label_to_label(a['category_id'])
106 |             annotations       = np.append(annotations, annotation, axis=0)
107 | 
108 |         # transform from [x, y, w, h] to [x1, y1, x2, y2]
109 |         annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
110 |         annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
111 | 
112 |         return annotations
113 | 
114 |     def coco_label_to_label(self, coco_label):
115 |         return self.coco_labels_inverse[coco_label]
116 | 
117 | 
118 |     def label_to_coco_label(self, label):
119 |         return self.coco_labels[label]
120 | 
121 |     def image_aspect_ratio(self, image_index):
122 |         image = self.coco.loadImgs(self.image_ids[image_index])[0]
123 |         return float(image['width']) / float(image['height'])
124 | 
125 |     def num_classes(self):
126 |         return 80
127 | 
128 | 
129 | class CSVDataset(Dataset):
130 |     """CSV dataset."""
131 | 
132 |     def __init__(self, train_file, class_list, transform=None):
133 |         """
134 |         Args:
135 |             train_file (string): CSV file with training annotations
136 |             annotations (string): CSV file with class list
137 |             test_file (string, optional): CSV file with testing annotations
138 |         """
139 |         self.train_file = train_file
140 |         self.class_list = class_list
141 |         self.transform = transform
142 | 
143 |         # parse the provided class file
144 |         try:
145 |             with self._open_for_csv(self.class_list) as file:
146 |                 self.classes = self.load_classes(csv.reader(file, delimiter=','))
147 |         except ValueError as e:
148 |             raise_from(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None)
149 | 
150 |         self.labels = {}
151 |         for key, value in self.classes.items():
152 |             self.labels[value] = key
153 | 
154 |         # csv with img_path, x1, y1, x2, y2, class_name
155 |         try:
156 |             with self._open_for_csv(self.train_file) as file:
157 |                 self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes)
158 |         except ValueError as e:
159 |             raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None)
160 |         self.image_names = list(self.image_data.keys())
161 | 
162 |     def _parse(self, value, function, fmt):
163 |         """
164 |         Parse a string into a value, and format a nice ValueError if it fails.
165 |         Returns `function(value)`.
166 |         Any `ValueError` raised is catched and a new `ValueError` is raised
167 |         with message `fmt.format(e)`, where `e` is the caught `ValueError`.
168 |         """
169 |         try:
170 |             return function(value)
171 |         except ValueError as e:
172 |             raise_from(ValueError(fmt.format(e)), None)
173 | 
174 |     def _open_for_csv(self, path):
175 |         """
176 |         Open a file with flags suitable for csv.reader.
177 |         This is different for python2 it means with mode 'rb',
178 |         for python3 this means 'r' with "universal newlines".
179 |         """
180 |         if sys.version_info[0] < 3:
181 |             return open(path, 'rb')
182 |         else:
183 |             return open(path, 'r', newline='')
184 | 
185 | 
186 |     def load_classes(self, csv_reader):
187 |         result = {}
188 | 
189 |         for line, row in enumerate(csv_reader):
190 |             line += 1
191 | 
192 |             try:
193 |                 class_name, class_id = row
194 |             except ValueError:
195 |                 raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None)
196 |             class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
197 | 
198 |             if class_name in result:
199 |                 raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
200 |             result[class_name] = class_id
201 |         return result
202 | 
203 | 
204 |     def __len__(self):
205 |         return len(self.image_names)
206 | 
207 |     def __getitem__(self, idx):
208 | 
209 |         img = self.load_image(idx)
210 |         annot = self.load_annotations(idx)
211 |         sample = {'img': img, 'annot': annot}
212 |         if self.transform:
213 |             sample = self.transform(sample)
214 | 
215 |         return sample
216 | 
217 |     def load_image(self, image_index):
218 |         img = skimage.io.imread(self.image_names[image_index])
219 | 
220 |         if len(img.shape) == 2:
221 |             img = skimage.color.gray2rgb(img)
222 | 
223 |         return img.astype(np.float32)/255.0
224 | 
225 |     def load_annotations(self, image_index):
226 |         # get ground truth annotations
227 |         annotation_list = self.image_data[self.image_names[image_index]]
228 |         annotations     = np.zeros((0, 5))
229 | 
230 |         # some images appear to miss annotations (like image with id 257034)
231 |         if len(annotation_list) == 0:
232 |             return annotations
233 | 
234 |         # parse annotations
235 |         for idx, a in enumerate(annotation_list):
236 |             # some annotations have basically no width / height, skip them
237 |             x1 = a['x1']
238 |             x2 = a['x2']
239 |             y1 = a['y1']
240 |             y2 = a['y2']
241 | 
242 |             if (x2-x1) < 1 or (y2-y1) < 1:
243 |                 continue
244 | 
245 |             annotation        = np.zeros((1, 5))
246 |             
247 |             annotation[0, 0] = x1
248 |             annotation[0, 1] = y1
249 |             annotation[0, 2] = x2
250 |             annotation[0, 3] = y2
251 | 
252 |             annotation[0, 4]  = self.name_to_label(a['class'])
253 |             annotations       = np.append(annotations, annotation, axis=0)
254 | 
255 |         return annotations
256 | 
257 |     def _read_annotations(self, csv_reader, classes):
258 |         result = {}
259 |         for line, row in enumerate(csv_reader):
260 |             line += 1
261 | 
262 |             try:
263 |                 img_file, x1, y1, x2, y2, class_name = row[:6]
264 |             except ValueError:
265 |                 raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
266 | 
267 |             if img_file not in result:
268 |                 result[img_file] = []
269 | 
270 |             # If a row contains only an image path, it's an image without annotations.
271 |             if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
272 |                 continue
273 | 
274 |             x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
275 |             y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
276 |             x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
277 |             y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
278 | 
279 |             # Check that the bounding box is valid.
280 |             if x2 <= x1:
281 |                 raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
282 |             if y2 <= y1:
283 |                 raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
284 | 
285 |             # check if the current class name is correctly present
286 |             if class_name not in classes:
287 |                 raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
288 | 
289 |             result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
290 |         return result
291 | 
292 |     def name_to_label(self, name):
293 |         return self.classes[name]
294 | 
295 |     def label_to_name(self, label):
296 |         return self.labels[label]
297 | 
298 |     def num_classes(self):
299 |         return max(self.classes.values()) + 1
300 | 
301 |     def image_aspect_ratio(self, image_index):
302 |         image = Image.open(self.image_names[image_index])
303 |         return float(image.width) / float(image.height)
304 | 
305 | 
306 | def collater(data):
307 | 
308 |     imgs = [s['img'] for s in data]
309 |     annots = [s['annot'] for s in data]
310 |     scales1 = [s['scale1'] for s in data]
311 |     scales2 = [s['scale2'] for s in data]
312 |         
313 |     widths = [int(s.shape[0]) for s in imgs]
314 |     heights = [int(s.shape[1]) for s in imgs]
315 |     batch_size = len(imgs)
316 | 
317 |     max_width = np.array(widths).max()
318 |     max_height = np.array(heights).max()
319 | 
320 |     padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
321 | 
322 |     for i in range(batch_size):
323 |         img = imgs[i]
324 |         padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
325 | 
326 |     max_num_annots = max(annot.shape[0] for annot in annots)
327 |     
328 |     if max_num_annots > 0:
329 | 
330 |         annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
331 | 
332 |         if max_num_annots > 0:
333 |             for idx, annot in enumerate(annots):
334 |                 #print(annot.shape)
335 |                 if annot.shape[0] > 0:
336 |                     annot_padded[idx, :annot.shape[0], :] = annot
337 |     else:
338 |         annot_padded = torch.ones((len(annots), 1, 5)) * -1
339 | 
340 | 
341 |     padded_imgs = padded_imgs.permute(0, 3, 1, 2)
342 | 
343 |     return {'img': padded_imgs, 'annot': annot_padded, 'scale1': scales1, 'scale2': scales2}
344 | 
345 | class Resizer(object):
346 |     """Convert ndarrays in sample to Tensors."""
347 | 
348 |     def __call__(self, sample, min_side=512, max_side=512):
349 |         image, annots = sample['img'], sample['annot']
350 | 
351 |         rows, cols, cns = image.shape
352 | 
353 |         smallest_side = min(rows, cols)
354 | 
355 |         # rescale the image so the smallest side is min_side
356 |         scale = min_side / smallest_side
357 |         scale1 = 512 / rows
358 |         scale2 = 512 / cols
359 |         # check if the largest side is now greater than max_side, which can happen
360 |         # when images have a large aspect ratio
361 |         largest_side = max(rows, cols)
362 | 
363 |         if largest_side * scale > max_side:
364 |             scale = max_side / largest_side
365 | 
366 |         # resize the image with the computed scale
367 |         # image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
368 |         image = skimage.transform.resize(image, (512, 512))
369 | 
370 |         rows, cols, cns = image.shape
371 | 
372 |         pad_w = 32 - rows%32
373 |         pad_h = 32 - cols%32
374 | 
375 |         new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
376 |         new_image[:rows, :cols, :] = image.astype(np.float32)
377 |         image = image.astype(np.float32)
378 |         # print(np.shape(annots))
379 |         # annots[:, :2] *= scale1
380 |         # annots[:, 2:4] *= scale2
381 | 
382 |         annots[:, 0] *= scale2
383 |         annots[:, 2] *= scale2
384 |         annots[:, 1] *= scale1
385 |         annots[:, 3] *= scale1
386 | 
387 |         return {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale1': scale1, 'scale2': scale2}
388 | 
389 | 
390 | class Augmenter(object):
391 |     """Convert ndarrays in sample to Tensors."""
392 | 
393 |     def __call__(self, sample, flip_x=0.5):
394 | 
395 |         if np.random.rand() < flip_x:
396 |             image, annots = sample['img'], sample['annot']
397 |             image = image[:, ::-1, :]
398 | 
399 |             rows, cols, channels = image.shape
400 | 
401 |             x1 = annots[:, 0].copy()
402 |             x2 = annots[:, 2].copy()
403 |             
404 |             x_tmp = x1.copy()
405 | 
406 |             annots[:, 0] = cols - x2
407 |             annots[:, 2] = cols - x_tmp
408 | 
409 |             sample = {'img': image, 'annot': annots}
410 | 
411 |         return sample
412 | 
413 | 
414 | class Normalizer(object):
415 | 
416 |     def __init__(self):
417 |         self.mean = np.array([[[0.485, 0.456, 0.406]]])
418 |         self.std = np.array([[[0.229, 0.224, 0.225]]])
419 | 
420 |     def __call__(self, sample):
421 | 
422 |         image, annots = sample['img'], sample['annot']
423 | 
424 |         return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}
425 | 
426 | # class to_tensor(object):
427 | #
428 | #     def __call__(self, sample):
429 | #         image, annots = sample['img'], sample['annot']
430 | #     return {}
431 | 
432 | 
433 | 
434 | class UnNormalizer(object):
435 |     def __init__(self, mean=None, std=None):
436 |         if mean == None:
437 |             self.mean = [0.485, 0.456, 0.406]
438 |         else:
439 |             self.mean = mean
440 |         if std == None:
441 |             self.std = [0.229, 0.224, 0.225]
442 |         else:
443 |             self.std = std
444 | 
445 |     def __call__(self, tensor):
446 |         """
447 |         Args:
448 |             tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
449 |         Returns:
450 |             Tensor: Normalized image.
451 |         """
452 |         for t, m, s in zip(tensor, self.mean, self.std):
453 |             t.mul_(s).add_(m)
454 |         return tensor
455 | 
456 | 
457 | class AspectRatioBasedSampler(Sampler):
458 | 
459 |     def __init__(self, data_source, batch_size, drop_last):
460 |         self.data_source = data_source
461 |         self.batch_size = batch_size
462 |         self.drop_last = drop_last
463 |         self.groups = self.group_images()
464 | 
465 |     def __iter__(self):
466 |         random.shuffle(self.groups)
467 |         for group in self.groups:
468 |             yield group
469 | 
470 |     def __len__(self):
471 |         if self.drop_last:
472 |             return len(self.data_source) // self.batch_size
473 |         else:
474 |             return (len(self.data_source) + self.batch_size - 1) // self.batch_size
475 | 
476 |     def group_images(self):
477 |         # determine the order of the images
478 |         order = list(range(len(self.data_source)))
479 |         order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
480 | 
481 |         # divide into groups, one group = one batch
482 |         return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
483 | 


--------------------------------------------------------------------------------