├── lib
├── __init__.py
├── nms
│ ├── __init__.py
│ ├── src
│ │ ├── cuda
│ │ │ ├── nms_kernel.cu.o
│ │ │ ├── nms_kernel.h
│ │ │ └── nms_kernel.cu
│ │ ├── nms_cuda.h
│ │ ├── nms.h
│ │ ├── nms_cuda.c
│ │ └── nms.c
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ └── pth_nms.cpython-36.pyc
│ ├── build.py
│ └── pth_nms.py
├── __pycache__
│ └── __init__.cpython-36.pyc
└── build.sh
├── img
├── 1.png
├── 2.png
├── 3.jpg
└── 4.png
├── imges
├── 1.jpg
├── 2.jpg
├── 3.jpg
├── 4.jpg
├── 5.jpg
└── 6.jpg
├── __pycache__
└── coco_eval.cpython-36.pyc
├── model
├── __pycache__
│ ├── BiFPN.cpython-35.pyc
│ ├── BiFPN.cpython-36.pyc
│ ├── losses.cpython-35.pyc
│ ├── losses.cpython-36.pyc
│ ├── model.cpython-35.pyc
│ ├── model.cpython-36.pyc
│ ├── util.cpython-35.pyc
│ ├── util.cpython-36.pyc
│ ├── utils.cpython-35.pyc
│ ├── utils.cpython-36.pyc
│ ├── __init__.cpython-35.pyc
│ ├── __init__.cpython-36.pyc
│ ├── anchors.cpython-35.pyc
│ ├── anchors.cpython-36.pyc
│ ├── RetinaHead.cpython-35.pyc
│ ├── RetinaHead.cpython-36.pyc
│ ├── efficientdet.cpython-35.pyc
│ └── efficientdet.cpython-36.pyc
├── __init__.py
├── efficientdet.py
├── anchors.py
├── RetinaHead.py
├── losses.py
├── BiFPN.py
├── util.py
├── model.py
└── utils.py
├── dataset
├── __pycache__
│ └── dataloader.cpython-36.pyc
└── dataloader.py
├── log
├── events.out.tfevents.1577539929.fineserver
└── events.out.tfevents.1577540185.fineserver
├── .idea
├── misc.xml
├── inspectionProfiles
│ └── profiles_settings.xml
├── modules.xml
├── bishe.iml
└── workspace.xml
├── README.md
├── coco_eval.py
├── demo.py
└── train.py
/lib/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/img/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/1.png
--------------------------------------------------------------------------------
/img/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/2.png
--------------------------------------------------------------------------------
/img/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/3.jpg
--------------------------------------------------------------------------------
/img/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/4.png
--------------------------------------------------------------------------------
/imges/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/1.jpg
--------------------------------------------------------------------------------
/imges/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/2.jpg
--------------------------------------------------------------------------------
/imges/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/3.jpg
--------------------------------------------------------------------------------
/imges/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/4.jpg
--------------------------------------------------------------------------------
/imges/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/5.jpg
--------------------------------------------------------------------------------
/imges/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/6.jpg
--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/src/cuda/nms_kernel.cu.o
--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);
--------------------------------------------------------------------------------
/__pycache__/coco_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/__pycache__/coco_eval.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/model/__pycache__/BiFPN.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/BiFPN.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/BiFPN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/BiFPN.cpython-36.pyc
--------------------------------------------------------------------------------
/model/__pycache__/losses.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/losses.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/losses.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/losses.cpython-36.pyc
--------------------------------------------------------------------------------
/model/__pycache__/model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/model.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/model.cpython-36.pyc
--------------------------------------------------------------------------------
/model/__pycache__/util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/util.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/util.cpython-36.pyc
--------------------------------------------------------------------------------
/model/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/utils.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/model/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/model/__pycache__/anchors.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/anchors.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/anchors.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/anchors.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/nms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/nms/__pycache__/pth_nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/__pycache__/pth_nms.cpython-36.pyc
--------------------------------------------------------------------------------
/model/__pycache__/RetinaHead.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/RetinaHead.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/RetinaHead.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/RetinaHead.cpython-36.pyc
--------------------------------------------------------------------------------
/dataset/__pycache__/dataloader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/dataset/__pycache__/dataloader.cpython-36.pyc
--------------------------------------------------------------------------------
/log/events.out.tfevents.1577539929.fineserver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/log/events.out.tfevents.1577539929.fineserver
--------------------------------------------------------------------------------
/log/events.out.tfevents.1577540185.fineserver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/log/events.out.tfevents.1577540185.fineserver
--------------------------------------------------------------------------------
/model/__pycache__/efficientdet.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/efficientdet.cpython-35.pyc
--------------------------------------------------------------------------------
/model/__pycache__/efficientdet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/efficientdet.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.5.1"
2 | from .model import EfficientNet
3 | from .utils import (
4 | GlobalParams,
5 | BlockArgs,
6 | BlockDecoder,
7 | efficientnet,
8 | get_model_params,
9 | )
10 |
11 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _NMS_KERNEL
2 | #define _NMS_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 |
11 | void _nms(int boxes_num, float * boxes_dev,
12 | unsigned long long * mask_dev, float nms_overlap_thresh);
13 |
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 |
18 | #endif
19 |
20 |
--------------------------------------------------------------------------------
/lib/build.sh:
--------------------------------------------------------------------------------
1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
2 | -gencode arch=compute_35,code=sm_35 \
3 | -gencode arch=compute_50,code=sm_50 \
4 | -gencode arch=compute_52,code=sm_52 \
5 | -gencode arch=compute_60,code=sm_60 \
6 | -gencode arch=compute_61,code=sm_61"
7 |
8 |
9 | # Build NMS
10 | cd nms/src/cuda
11 | echo "Compiling nms kernels by nvcc..."
12 | /usr/local/cuda/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH
13 | cd ../../
14 | python build.py install
15 | cd ../
16 |
--------------------------------------------------------------------------------
/.idea/bishe.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | #from torch.utils.ffi import create_extension
4 | from torch.utils.cpp_extension import BuildExtension
5 |
6 |
7 | sources = ['src/nms.c']
8 | headers = ['src/nms.h']
9 | defines = []
10 | with_cuda = False
11 |
12 | if torch.cuda.is_available():
13 | print('Including CUDA code.')
14 | sources += ['src/nms_cuda.c']
15 | headers += ['src/nms_cuda.h']
16 | defines += [('WITH_CUDA', None)]
17 | with_cuda = True
18 |
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/cuda/nms_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 |
24 | ffi = BuildExtension(
25 | '_ext.nms',
26 | headers=headers,
27 | sources=sources,
28 | define_macros=defines,
29 | relative_to=__file__,
30 | with_cuda=with_cuda,
31 | extra_objects=extra_objects,
32 | extra_compile_args=['-std=c99']
33 | )
34 |
35 | if __name__ == '__main__':
36 | ffi.build()
37 |
--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from ._ext import nms
3 | import numpy as np
4 |
5 | def pth_nms(dets, thresh):
6 | """
7 | dets has to be a tensor
8 | """
9 | if not dets.is_cuda:
10 | x1 = dets[:, 0]
11 | y1 = dets[:, 1]
12 | x2 = dets[:, 2]
13 | y2 = dets[:, 3]
14 | scores = dets[:, 4]
15 |
16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 | order = scores.sort(0, descending=True)[1]
18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 |
20 | keep = torch.LongTensor(dets.size(0))
21 | num_out = torch.LongTensor(1)
22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 |
24 | return keep[:num_out[0]]
25 | else:
26 | x1 = dets[:, 0]
27 | y1 = dets[:, 1]
28 | x2 = dets[:, 2]
29 | y2 = dets[:, 3]
30 | scores = dets[:, 4]
31 |
32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 | order = scores.sort(0, descending=True)[1]
34 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
35 |
36 | dets = dets[order].contiguous()
37 |
38 | keep = torch.LongTensor(dets.size(0))
39 | num_out = torch.LongTensor(1)
40 | # keep = torch.cuda.LongTensor(dets.size(0))
41 | # num_out = torch.cuda.LongTensor(1)
42 | nms.gpu_nms(keep, num_out, dets, thresh)
43 |
44 | return order[keep[:num_out[0]].cuda()].contiguous()
45 | # return order[keep[:num_out[0]]].contiguous()
46 |
47 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # efficientdet-pytorch
2 | 
3 | 
4 | 
5 | 
6 |
7 | Pytorch implementtation of EfficientDet object detection as described in [EfficientDet: Scalable and Efficient Object Detection](https://arxiv.org/pdf/1911.09070.pdf)
8 |
9 | This implementation is a very simple version without many data augmentation.
10 |
11 | The EfficientNet code are borrowed from the [A PyTorch implementation of EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch),if you want to train EffcicientDet from scratch,you should load the efficientnet pretrained parameter. use
12 |
13 | ```
14 | python train.py --coco_path '/home/hoo/Dataset/COCO' --backbon 'efficientnet-b0' --backbone_pretrained True
15 | ```
16 |
17 | and the efficientnet pretrainied parameter will be download and load automatically, and start to train.
18 |
19 | I've only trained efficientdet-d0 so far,and without many data augmentation.if you want to load efficientnet pretrained parameter,use
20 |
21 | ```
22 | python train.py --coco_path '/home/hoo/Dataset/COCO' --backbone 'efficientnet-b0' --backbone_pretrained False --EfficientDet_pretrained True --pretrained './weights/efficientdet_0.pth'
23 | ```
24 | | Model | mAP | pre_trained |
25 | | :-------------: | :---: | :----------------------------------------------------------: |
26 | | efficientdet-d0 | 25.9% | [download](https://drive.google.com/open?id=1UgQp9wqtc1O_EabU9O6NWNG6B8imYmv_) |
27 |
28 | **QQ-group: 607724770(Torch交流群)**
29 |
30 | ## Acknowledgements
31 | - The EfficientNet code are borrowed from the [A PyTorch implementation of EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch)
32 | - The code of RetinaNet are borrowed from the [Pytorch implementation of RetinaNet object detection.](https://github.com/yhenon/pytorch-retinanet)
33 |
--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | #include "cuda/nms_kernel.h"
13 |
14 |
15 | extern THCState *state;
16 |
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 | // boxes has to be sorted
19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 | // Number of ROIs
22 | int boxes_num = THCudaTensor_size(state, boxes, 0);
23 | int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 |
25 | float* boxes_flat = THCudaTensor_data(state, boxes);
26 |
27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 |
31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 |
33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 | THLongTensor_copyCuda(state, mask_cpu, mask);
35 | THCudaLongTensor_free(state, mask);
36 |
37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 |
39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 | THLongTensor_fill(remv_cpu, 0);
42 |
43 | long * keep_flat = THLongTensor_data(keep);
44 | long num_to_keep = 0;
45 |
46 | int i, j;
47 | for (i = 0; i < boxes_num; i++) {
48 | int nblock = i / threadsPerBlock;
49 | int inblock = i % threadsPerBlock;
50 |
51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 | keep_flat[num_to_keep++] = i;
53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 | for (j = nblock; j < col_blocks; j++) {
55 | remv_cpu_flat[j] |= p[j];
56 | }
57 | }
58 | }
59 |
60 | long * num_out_flat = THLongTensor_data(num_out);
61 | * num_out_flat = num_to_keep;
62 |
63 | THLongTensor_free(mask_cpu);
64 | THLongTensor_free(remv_cpu);
65 |
66 | return 1;
67 | }
68 |
--------------------------------------------------------------------------------
/model/efficientdet.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: huashuoshuo
3 | # Data: 2019/12/17 10:53
4 |
5 |
6 | import torch
7 | import numpy as np
8 | import torch.nn as nn
9 | from .BiFPN import BiFPN
10 | # from .RetinaHead import RetinaHead
11 |
12 | # class ConvBlock(nn.Module):
13 | # def __init__(self):
14 | # super().__init__()
15 | class ConvBlock(nn.Module):
16 | """
17 |
18 | """
19 | def __init__(self, inp, oup, k_size, stride=1, padding=0):
20 | super().__init__()
21 | # Conv2d = get_same_padding_conv2d
22 | self.conv = nn.Conv2d(in_channels=inp, out_channels=oup, kernel_size=k_size, stride=stride, padding=padding, bias=False)
23 | self.norm = nn.BatchNorm2d(num_features=oup)
24 | self.act = nn.ReLU(inplace=True)
25 | def forward(self, x):
26 | x = self.norm(self.conv(x))
27 | return self.act(x)
28 |
29 | from model import EfficientNet
30 | # from .RetinaHead import RetinaHead
31 | class EfficientDet(nn.Module):
32 | """
33 |
34 | """
35 | def __init__(self, args):
36 | super().__init__()
37 |
38 | self.inp = 64
39 | self.oup = 64
40 | self.bifpn_repeat = 2
41 | print(args.backbone)
42 | self.backbone = EfficientNet.from_pretrained(args)
43 | # self.backbone.get_list_features()
44 | self.tail = nn.ModuleList([ConvBlock(320, self.oup, 3, 2, 1), ConvBlock(self.oup, self.oup, 3, 2, 1)])
45 | self.channel_same = self.change_channel(self.backbone.get_list_feature()[-3:])
46 | self.BiFPN_first = BiFPN(oup=self.oup, first=True)
47 | self.BiFPN = nn.ModuleList()
48 | for i in range(self.bifpn_repeat-1):
49 | self.BiFPN.append(BiFPN(oup=self.oup, first=False))
50 |
51 | def forward(self, inputs):
52 | features_in = self.extra(inputs)
53 | features_out = self.BiFPN_first(features_in)
54 | for i, bifpn in enumerate(self.BiFPN):
55 | features_out = bifpn(features_out)
56 | return features_out
57 |
58 |
59 | def extra(self, img):
60 | x = self.backbone(img)[-3:]
61 | # before_fpn = self.channel_same(x[-5:])
62 | # print(x[-1].shape)
63 | # print(self.tail)
64 | # tail = [tail_conv(x[-1]) for i, tail_conv in enumerate(self.tail)]
65 | for i, tail_conv in enumerate(self.tail):
66 | x.append(tail_conv(x[-1]))
67 |
68 |
69 | before_fpn = [
70 | conv(x[i])
71 | for i, conv in enumerate(self.channel_same)]
72 |
73 | before_fpn.extend(x[-2:])
74 |
75 | return before_fpn
76 |
77 | def change_channel(self, channel):
78 | convs = nn.ModuleList()
79 | for i in range(len(channel)):
80 | conv = ConvBlock(channel[i], self.oup, k_size=1, stride=1, padding=0)
81 | convs.append(conv)
82 | return convs
--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
5 | // boxes has to be sorted
6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 | // Number of ROIs
11 | long boxes_num = THFloatTensor_size(boxes, 0);
12 | long boxes_dim = THFloatTensor_size(boxes, 1);
13 |
14 | long * keep_out_flat = THLongTensor_data(keep_out);
15 | float * boxes_flat = THFloatTensor_data(boxes);
16 | long * order_flat = THLongTensor_data(order);
17 | float * areas_flat = THFloatTensor_data(areas);
18 |
19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 | THByteTensor_fill(suppressed, 0);
21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed);
22 |
23 | // nominal indices
24 | int i, j;
25 | // sorted indices
26 | int _i, _j;
27 | // temp variables for box i's (the box currently under consideration)
28 | float ix1, iy1, ix2, iy2, iarea;
29 | // variables for computing overlap with box j (lower scoring box)
30 | float xx1, yy1, xx2, yy2;
31 | float w, h;
32 | float inter, ovr;
33 |
34 | long num_to_keep = 0;
35 | for (_i=0; _i < boxes_num; ++_i) {
36 | i = order_flat[_i];
37 | if (suppressed_flat[i] == 1) {
38 | continue;
39 | }
40 | keep_out_flat[num_to_keep++] = i;
41 | ix1 = boxes_flat[i * boxes_dim];
42 | iy1 = boxes_flat[i * boxes_dim + 1];
43 | ix2 = boxes_flat[i * boxes_dim + 2];
44 | iy2 = boxes_flat[i * boxes_dim + 3];
45 | iarea = areas_flat[i];
46 | for (_j = _i + 1; _j < boxes_num; ++_j) {
47 | j = order_flat[_j];
48 | if (suppressed_flat[j] == 1) {
49 | continue;
50 | }
51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 | w = fmaxf(0.0, xx2 - xx1 + 1);
56 | h = fmaxf(0.0, yy2 - yy1 + 1);
57 | inter = w * h;
58 | ovr = inter / (iarea + areas_flat[j] - inter);
59 | if (ovr >= nms_overlap_thresh) {
60 | suppressed_flat[j] = 1;
61 | }
62 | }
63 | }
64 |
65 | long *num_out_flat = THLongTensor_data(num_out);
66 | *num_out_flat = num_to_keep;
67 | THByteTensor_free(suppressed);
68 | return 1;
69 | }
--------------------------------------------------------------------------------
/coco_eval.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | from pycocotools.coco import COCO
4 | from pycocotools.cocoeval import COCOeval
5 |
6 | import numpy as np
7 | import json
8 | import os
9 |
10 | import torch
11 |
12 | def evaluate_coco(dataset, model, threshold=0.05):
13 |
14 | model.eval()
15 |
16 | with torch.no_grad():
17 |
18 | # start collecting results
19 | results = []
20 | image_ids = []
21 |
22 | for index in range(len(dataset)-4500):
23 | data = dataset[index]
24 | # scale = data['scale']
25 | scale1 = data['scale1']
26 | scale2 = data['scale2']
27 |
28 | # run network
29 | scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
30 | scores = scores.cpu()
31 | labels = labels.cpu()
32 | boxes = boxes.cpu()
33 |
34 | # correct boxes for image scale
35 | # boxes /= scale
36 | boxes[:, 0] /= scale2
37 | boxes[:, 2] /= scale2
38 | boxes[:, 1] /= scale1
39 | boxes[:, 3] /= scale1
40 |
41 | if boxes.shape[0] > 0:
42 | # change to (x, y, w, h) (MS COCO standard)
43 | boxes[:, 2] -= boxes[:, 0]
44 | boxes[:, 3] -= boxes[:, 1]
45 |
46 | # compute predicted labels and scores
47 | #for box, score, label in zip(boxes[0], scores[0], labels[0]):
48 | for box_id in range(boxes.shape[0]):
49 | score = float(scores[box_id])
50 | label = int(labels[box_id])
51 | box = boxes[box_id, :]
52 |
53 | # scores are sorted, so we can break
54 | if score < threshold:
55 | break
56 |
57 | # append detection for each positively labeled class
58 | image_result = {
59 | 'image_id' : dataset.image_ids[index],
60 | 'category_id' : dataset.label_to_coco_label(label),
61 | 'score' : float(score),
62 | 'bbox' : box.tolist(),
63 | }
64 |
65 | # append detection to results
66 | results.append(image_result)
67 |
68 | # append image to list of processed images
69 | image_ids.append(dataset.image_ids[index])
70 |
71 | # print progress
72 | print('{}/{}'.format(index, len(dataset)), end='\r')
73 |
74 | if not len(results):
75 | return
76 |
77 | # write output
78 | json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
79 |
80 | # load results in COCO evaluation tool
81 | coco_true = dataset.coco
82 | coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
83 |
84 | # run COCO evaluation
85 | coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
86 | coco_eval.params.imgIds = image_ids
87 | coco_eval.evaluate()
88 | coco_eval.accumulate()
89 | mAP = coco_eval.summarize()
90 |
91 | model.train()
92 |
93 | return mAP
94 |
--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | #include
12 | #include
13 | #include
14 | #include "nms_kernel.h"
15 |
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 | float interS = width * height;
21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 | return interS / (Sa + Sb - interS);
24 | }
25 |
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 | const float *dev_boxes, unsigned long long *dev_mask) {
28 | const int row_start = blockIdx.y;
29 | const int col_start = blockIdx.x;
30 |
31 | // if (row_start > col_start) return;
32 |
33 | const int row_size =
34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 | const int col_size =
36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 |
38 | __shared__ float block_boxes[threadsPerBlock * 5];
39 | if (threadIdx.x < col_size) {
40 | block_boxes[threadIdx.x * 5 + 0] =
41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 | block_boxes[threadIdx.x * 5 + 1] =
43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 | block_boxes[threadIdx.x * 5 + 2] =
45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 | block_boxes[threadIdx.x * 5 + 3] =
47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 | block_boxes[threadIdx.x * 5 + 4] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 | }
51 | __syncthreads();
52 |
53 | if (threadIdx.x < row_size) {
54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 | const float *cur_box = dev_boxes + cur_box_idx * 5;
56 | int i = 0;
57 | unsigned long long t = 0;
58 | int start = 0;
59 | if (row_start == col_start) {
60 | start = threadIdx.x + 1;
61 | }
62 | for (i = start; i < col_size; i++) {
63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 | t |= 1ULL << i;
65 | }
66 | }
67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 | }
70 | }
71 |
72 |
73 | void _nms(int boxes_num, float * boxes_dev,
74 | unsigned long long * mask_dev, float nms_overlap_thresh) {
75 |
76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 | DIVUP(boxes_num, threadsPerBlock));
78 | dim3 threads(threadsPerBlock);
79 | nms_kernel<<>>(boxes_num,
80 | nms_overlap_thresh,
81 | boxes_dev,
82 | mask_dev);
83 | }
84 |
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: huashuoshuo
3 | # Data: 12/26/19 2:12 PM
4 | import torch
5 | import torch.nn as nn
6 | from model.util import Filter_boxes
7 | import os
8 | import argparse
9 | from RetinaHead import RetinaHead
10 | import skimage.io
11 | import skimage
12 | import skimage.transform
13 | import numpy as np
14 | import cv2 as cv2
15 | import matplotlib.pyplot as plt
16 | import time
17 | from model.util import num2name
18 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
19 |
20 | def main():
21 | parser = argparse.ArgumentParser()
22 | parser.add_argument('--img_path', type=str, default='/home/huashuoshuo/bishe/imges/6.jpg')
23 | parser.add_argument('--weight_path', type=str, default='./weights/retinanet_15.pth')
24 | parser.add_argument('--backbone', type=str, default='efficientnet-b0')
25 | parser.add_argument('--backbone_pretrained', type=bool, default=False)
26 | parser.add_argument('--threshold', type=float, default=0.35)
27 |
28 | parser = parser.parse_args()
29 | with torch.no_grad():
30 | efficientdet = RetinaHead(parser, is_demo=True)
31 | # efficientdet = torch.nn.DataParallel(efficientdet).cuda()
32 | efficientdet = efficientdet.cuda()
33 | state_dict = torch.load(parser.weight_path)
34 | efficientdet.load_state_dict(state_dict)
35 |
36 | # img read
37 | img = skimage.io.imread(parser.img_path)
38 | img_input, scale1, scale2= preprocessing(img)
39 | efficientdet.eval()
40 | img_input = img_input.cuda()
41 | time_start = time.time()
42 | # for i in range(1000):
43 | boxes, classification, scores = efficientdet(img_input)
44 | boxes, scores, labels= Filter_boxes(parser)([boxes, classification, scores])
45 |
46 | time_stop = time.time()
47 | print('time:', time_stop-time_start)
48 | # scores = scores.cpu().numpy()
49 | # labels = labels.cpu().numpy()
50 | # boxes = boxes.cpu().numpy()
51 |
52 | # print(boxes)
53 | # print(np.shape(img))
54 | text_thickness = 1
55 | thickness = 2
56 | scale = 0.4
57 | line_type = 8
58 | for i in range(np.shape(boxes)[0]):
59 | box = boxes[i].cpu().numpy()
60 | score = scores[i].cpu().numpy()
61 | for j in range(np.shape(box)[0]):
62 | p1 = (int(box[j][0]/scale2), int(box[j][1]/scale1))
63 | p2 = (int(box[j][2]/scale2), int(box[j][3]/scale1))
64 | cv2.rectangle(img, p1, p2, (0, 0, 255), 2)
65 | s = '%s/%.1f%%' % (num2name[labels[i]+1], score[j] * 100)
66 | text_size, baseline = cv2.getTextSize(s, cv2.FONT_HERSHEY_SIMPLEX, scale, text_thickness)
67 |
68 | if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1):
69 | continue
70 | # p1 = (p1[0] - text_size[1], p1[1])
71 |
72 | cv2.rectangle(img, (p1[0], p1[1]),
73 | (p1[0] + text_size[0], p1[1] + text_size[1]), (0, 0, 255), -1)
74 |
75 | cv2.putText(img, s, (p1[0], p1[1] + 2*baseline), cv2.FONT_HERSHEY_SIMPLEX, scale, (255, 255, 255),
76 | text_thickness, line_type)
77 | plt.imshow(img)
78 | plt.show()
79 | # print(scores, labels)
80 |
81 |
82 |
83 | return
84 |
85 | def preprocessing(img):
86 |
87 | img = img.astype(np.float32) / 255.0
88 | # normalize
89 | mean = np.array([[[0.485, 0.456, 0.406]]])
90 | std = np.array([[[0.229, 0.224, 0.225]]])
91 | img = (img - mean) / std
92 | # resize
93 | rows, cols, cns = np.shape(img)
94 | scale1 = 512 / rows
95 | scale2 = 512 / cols
96 | img_input = skimage.transform.resize(img, (512, 512))
97 | img_input = torch.from_numpy(img_input)
98 | img_input = img_input.unsqueeze(0)
99 | img_input = img_input.permute(0, 3, 1, 2).float()
100 | return img_input, scale1, scale2
101 |
102 |
103 | def box_filter(scores, labels, boxes):
104 | scores = scores.cpu()
105 | labels = labels.cpu()
106 | boxes = boxes.cpu()
107 |
108 | return
109 |
110 |
111 | if __name__=='__main__':
112 | main()
113 |
--------------------------------------------------------------------------------
/model/anchors.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: huashuoshuo
3 | # Data: 2019/12/19 18:58
4 |
5 | import numpy as np
6 | import torch
7 | import torch.nn as nn
8 |
9 |
10 | class Anchors(nn.Module):
11 | def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
12 | super(Anchors, self).__init__()
13 |
14 | if pyramid_levels is None:
15 | self.pyramid_levels = [3, 4, 5, 6, 7]
16 | if strides is None:
17 | self.strides = [2 ** x for x in self.pyramid_levels]
18 | if sizes is None:
19 | self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
20 | if ratios is None:
21 | self.ratios = np.array([0.5, 1, 2])
22 | if scales is None:
23 | self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
24 |
25 | def forward(self, image):
26 |
27 | image_shape = image.shape[2:]
28 | image_shape = np.array(image_shape)
29 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
30 |
31 | # compute anchors over all pyramid levels
32 | all_anchors = np.zeros((0, 4)).astype(np.float32)
33 |
34 | for idx, p in enumerate(self.pyramid_levels):
35 | anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
36 | shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
37 | all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
38 |
39 | all_anchors = np.expand_dims(all_anchors, axis=0)
40 |
41 | return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
42 |
43 |
44 | def generate_anchors(base_size=16, ratios=None, scales=None):
45 | """
46 | Generate anchor (reference) windows by enumerating aspect ratios X
47 | scales w.r.t. a reference window.
48 | """
49 |
50 | if ratios is None:
51 | ratios = np.array([0.5, 1, 2])
52 |
53 | if scales is None:
54 | scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
55 |
56 | num_anchors = len(ratios) * len(scales)
57 |
58 | # initialize output anchors
59 | anchors = np.zeros((num_anchors, 4))
60 |
61 | # scale base_size
62 | anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
63 |
64 | # compute areas of anchors
65 | areas = anchors[:, 2] * anchors[:, 3]
66 |
67 | # correct for ratios
68 | anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
69 | anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
70 |
71 | # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
72 | anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
73 | anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
74 |
75 | return anchors
76 |
77 |
78 | def compute_shape(image_shape, pyramid_levels):
79 | """Compute shapes based on pyramid levels.
80 |
81 | :param image_shape:
82 | :param pyramid_levels:
83 | :return:
84 | """
85 | image_shape = np.array(image_shape[:2])
86 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
87 | return image_shapes
88 |
89 |
90 | def anchors_for_shape(
91 | image_shape,
92 | pyramid_levels=None,
93 | ratios=None,
94 | scales=None,
95 | strides=None,
96 | sizes=None,
97 | shapes_callback=None,
98 | ):
99 | image_shapes = compute_shape(image_shape, pyramid_levels)
100 |
101 | # compute anchors over all pyramid levels
102 | all_anchors = np.zeros((0, 4))
103 | for idx, p in enumerate(pyramid_levels):
104 | anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
105 | shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
106 | all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
107 |
108 | return all_anchors
109 |
110 |
111 | def shift(shape, stride, anchors):
112 | shift_x = (np.arange(0, shape[1]) + 0.5) * stride
113 | shift_y = (np.arange(0, shape[0]) + 0.5) * stride
114 |
115 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
116 |
117 | shifts = np.vstack((
118 | shift_x.ravel(), shift_y.ravel(),
119 | shift_x.ravel(), shift_y.ravel()
120 | )).transpose()
121 |
122 | # add A anchors (1, A, 4) to
123 | # cell K shifts (K, 1, 4) to get
124 | # shift anchors (K, A, 4)
125 | # reshape to (K*A, 4) shifted anchors
126 | A = anchors.shape[0]
127 | K = shifts.shape[0]
128 | all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
129 | all_anchors = all_anchors.reshape((K * A, 4))
130 |
131 | return all_anchors
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: huashuoshuo
3 | # Data: 2019/12/19 14:57
4 |
5 | import os
6 | import torch
7 | import numpy as np
8 | import torch.nn as nn
9 | import torch.optim as optim
10 | from dataset.dataloader import CocoDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer
11 | from torch.utils.data import Dataset, DataLoader
12 | from torchvision import transforms
13 | # from model.efficientdet import EfficientDet
14 | from model.RetinaHead import RetinaHead
15 | import coco_eval
16 | import argparse
17 | from tensorboardX import SummaryWriter
18 | import cv2 as cv2
19 | import matplotlib.pyplot as plt
20 |
21 | # writer = SummaryWriter('log')
22 |
23 | os.environ['CUDA_VISIBLE_DEVICES']='0, 1, 2, 3'
24 | def main(arg=None):
25 | parser = argparse.ArgumentParser()
26 |
27 | parser.add_argument('--coco_path', type=str, default='/home/hoo/Dataset/COCO')
28 | parser.add_argument('--depth', type=int, default=3)
29 | parser.add_argument('--epoches', type=int, default=50)
30 | parser.add_argument('--phi', type=int, default=0)
31 | parser.add_argument('--backbone', type=str, default='efficientnet-b0')
32 | parser.add_argument('--backbone_pretrained', type=bool, default=True)
33 | parser.add_argument('--EfficientDet_pretrained', type=bool, default=False)
34 | parser.add_argument('--pretrained', type=str, default='./weights/retinanet_1.pth')
35 | parser.add_argument('--batch_size', type=int, default=24)
36 |
37 | parser = parser.parse_args(arg)
38 | dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
39 | # print(dataset_train.num_classes())
40 | dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))
41 |
42 | sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
43 | dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler)
44 |
45 |
46 | sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
47 | dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)
48 |
49 | # Create the Model
50 |
51 | efficientdet = RetinaHead(parser)
52 |
53 |
54 |
55 | efficientdet = torch.nn.DataParallel(efficientdet).cuda()
56 | if parser.EfficientDet_pretrained:
57 | state_dict = torch.load(parser.pretrained)
58 | # print(state_dict)
59 | efficientdet.module.load_state_dict(state_dict)
60 |
61 | efficientdet.training = True
62 |
63 | optimizer = optim.Adam(efficientdet.parameters(), lr=1e-3)
64 | # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
65 | scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 5, 7, 9, 11, 13, 15, 17, 19], gamma=0.5)
66 |
67 | for epoch_num in range(parser.epoches):
68 | efficientdet.train()
69 |
70 | epoch_loss = []
71 |
72 | for iter_num, data in enumerate(dataloader_train):
73 | break
74 | # try:
75 | # print(data)
76 | optimizer.zero_grad()
77 | # print(np.shape(data['annot']))
78 | classification_loss, regression_loss = efficientdet([data['img'].cuda().float(), data['annot']])
79 | classification_loss = classification_loss.mean()
80 | regression_loss = regression_loss.mean()
81 | loss = classification_loss + regression_loss
82 | if bool(loss==0):
83 | continue
84 | loss.backward()
85 |
86 | torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1)
87 | optimizer.step()
88 | epoch_loss.append(float(loss))
89 | print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss)))
90 |
91 | if iter_num % 200 == 199:
92 | niter = epoch_num * len(dataloader_train) + iter_num
93 | # print(loss)
94 | writer.add_scalar('Train/Loss', loss, niter)
95 | writer.add_scalar('Train/Reg_Loss', regression_loss, niter)
96 | writer.add_scalar('Train/Cls_Loss', classification_loss, niter)
97 |
98 |
99 | del classification_loss
100 | del regression_loss
101 | # except Exception as e:
102 | # print(e)
103 | # continue
104 | # if iter_num == 20:
105 | # break
106 |
107 | # print('Evaluating dataset')
108 | mAP = coco_eval.evaluate_coco(dataset_val, efficientdet)
109 | # writer.add_scalar('Test/mAP', mAP, epoch_num)
110 | print('Save Model')
111 | # torch.save(efficientdet.module.state_dict(), './weights/retinanet_{}.pth'.format(epoch_num))
112 | # scheduler.step(np.mean(epoch_loss))
113 | scheduler.step(epoch=epoch_num)
114 | # writer.close()
115 |
116 |
117 | if __name__ == '__main__':
118 | main()
--------------------------------------------------------------------------------
/model/RetinaHead.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: huashuoshuo
3 | # Data: 2019/12/18 16:37
4 |
5 | import torch
6 | import torch.nn as nn
7 | from model.BiFPN import ConvBlock
8 | import model.losses as losses
9 | from model.efficientdet import EfficientDet
10 | from pycocotools.coco import COCO as COCO
11 | from model.anchors import Anchors
12 | # from lib.nms.pth_nms import pth_nms
13 | import torchvision.ops as ops
14 | from model.util import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes, Filter_boxes
15 | def nms(bbox, score, thresh):
16 | # bbox, score = dets
17 | return ops.nms(boxes=bbox, scores=score, iou_threshold=thresh)
18 | # return pth_nms(dets, thresh)
19 |
20 |
21 | class Reg(nn.Module):
22 | """
23 |
24 | """
25 | def __init__(self, inp, oup, depth, num_anchor):
26 | super().__init__()
27 | self.inp = inp
28 | self.oup = oup
29 | self.D = depth
30 | self.reg = nn.ModuleList()
31 | self.num_anchors = num_anchor
32 |
33 | for i in range(self.D):
34 | self.reg.append(ConvBlock(inp=self.inp, oup=self.oup, k_size=3, stride=1, padding=1))
35 | # self.retina_cls = nn.Conv2d(self.oup, self.num_anchors * self.num_class, 3, padding=1)
36 | self.retina_reg = nn.Conv2d(self.oup, self.num_anchors * 4, 3, padding=1)
37 | def forward(self, x):
38 | reg = x
39 | for conv in self.reg:
40 | reg = conv(reg)
41 |
42 | reg = self.retina_reg(reg)
43 |
44 | reg = reg.permute(0, 2, 3, 1)
45 | return reg.contiguous().view(reg.shape[0], -1, 4)
46 |
47 | class Cls(nn.Module):
48 | """
49 |
50 | """
51 | def __init__(self, inp, oup, depth, num_anchor, num_class):
52 | super().__init__()
53 | self.inp = inp
54 | self.oup = oup
55 | self.D = depth
56 | self.cls = nn.ModuleList()
57 | self.num_anchors = num_anchor
58 | self.num_class = num_class
59 | for i in range(self.D):
60 | self.cls.append(ConvBlock(inp=self.inp, oup=self.oup, k_size=3, stride=1, padding=1))
61 | self.retina_cls = nn.Conv2d(self.oup, self.num_anchors * self.num_class, 3, padding=1)
62 | self.act = nn.Sigmoid()
63 | def forward(self, x):
64 | cls = x
65 | for conv in self.cls:
66 | cls = conv(cls)
67 | cls = self.retina_cls(cls)
68 | cls = self.act(cls)
69 |
70 | cls = cls.permute(0, 2, 3, 1)
71 |
72 | batch_size, width, height, channel = cls.shape
73 |
74 | out = cls.view(batch_size, width, height, self.num_anchors, self.num_class)
75 | return out.contiguous().view(cls.shape[0], -1, self.num_class)
76 |
77 |
78 | class RetinaHead(nn.Module):
79 | """
80 |
81 | """
82 | def __init__(self, parser, num_classes=80, num_anchor=9, is_demo=False):
83 | super().__init__()
84 | depth = 3
85 | inp = oup = 64
86 |
87 | self.regression = Reg(inp, oup, depth-1, num_anchor)
88 | self.classification = Cls(inp, oup, depth-1, num_anchor, num_classes)
89 | self.FocalLoss = losses.FocalLoss()
90 | self.anchors = Anchors()
91 | self.EfficientDet = EfficientDet(parser)
92 | self.regressBoxes = BBoxTransform()
93 | self.is_demo = is_demo
94 | self.clipBoxes = ClipBoxes()
95 | def forward(self, inputs):
96 | if self.training:
97 | img_batch, annotations = inputs
98 | else:
99 | img_batch = inputs
100 |
101 | features = self.EfficientDet(img_batch)
102 | regression = torch.cat([self.regression(feature) for feature in features], dim=1)
103 | classification = torch.cat([self.classification(feature) for feature in features], dim=1)
104 | anchors = self.anchors(img_batch)
105 |
106 | # self.FocalLoss(classification, regression, anchors, annotations)
107 | if self.training:
108 | return self.FocalLoss(classification, regression, anchors, annotations)
109 | else:
110 | transformed_anchors = self.regressBoxes(anchors, regression)
111 | transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
112 |
113 | scores = torch.max(classification, dim=2, keepdim=True)[0]
114 |
115 | if self.is_demo:
116 | return transformed_anchors, classification, scores
117 |
118 | scores_over_thresh = (scores>0.01)[0, :, 0]
119 |
120 | if scores_over_thresh.sum() == 0:
121 | # no boxes to NMS, just return
122 | return [torch.zeros(0).cuda(), torch.zeros(0).cuda(), torch.zeros(0, 4).cuda()]
123 |
124 | classification = classification[:, scores_over_thresh, :]
125 | transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
126 | scores = scores[:, scores_over_thresh, :]
127 | # print(transformed_anchors.shape, scores.shape)
128 |
129 | # anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5)
130 | # print(transformed_anchors[0, :, :])
131 | anchors_nms_idx = nms(transformed_anchors[0, :, :], scores[0, :, 0], 0.45)
132 | nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
133 |
134 | return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
135 |
136 |
137 |
138 |
139 |
140 |
141 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 | 1576477752047
114 |
115 |
116 | 1576477752047
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 | file://$PROJECT_DIR$/model/model.py
125 | 198
126 |
127 |
128 |
129 |
130 |
131 |
--------------------------------------------------------------------------------
/model/losses.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: huashuoshuo
3 | # Data: 2019/12/19 15:05
4 |
5 | import numpy as np
6 | import torch
7 | import torch.nn as nn
8 |
9 | def calc_iou(a, b):
10 |
11 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
12 | iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
13 | ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
14 |
15 | iw = torch.clamp(iw, min=0)
16 | ih = torch.clamp(ih, min=0)
17 |
18 | ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
19 |
20 | ua = torch.clamp(ua, min=1e-8)
21 |
22 | intersection = iw * ih
23 |
24 | IoU = intersection / ua
25 |
26 | return IoU
27 |
28 | class FocalLoss(nn.Module):
29 | #def __init__(self):
30 |
31 | def forward(self, classifications, regressions, anchors, annotations):
32 | alpha = 0.25
33 | gamma = 2.0
34 | batch_size = classifications.shape[0]
35 | classification_losses = []
36 | regression_losses = []
37 |
38 | anchor = anchors[0, :, :]
39 |
40 | anchor_widths = anchor[:, 2] - anchor[:, 0]
41 | anchor_heights = anchor[:, 3] - anchor[:, 1]
42 | anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
43 | anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
44 |
45 | for j in range(batch_size):
46 |
47 | classification = classifications[j, :, :]
48 | regression = regressions[j, :, :]
49 |
50 | bbox_annotation = annotations[j, :, :]
51 | bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
52 |
53 | if bbox_annotation.shape[0] == 0:
54 | regression_losses.append(torch.tensor(0).float().to(anchors.device))
55 | classification_losses.append(torch.tensor(0).float().to(anchors.device))
56 |
57 | continue
58 |
59 | classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
60 |
61 | IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
62 |
63 | IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
64 |
65 | #import pdb
66 | #pdb.set_trace()
67 |
68 | # compute the loss for classification
69 | targets = torch.ones(classification.shape) * -1
70 | targets = targets.to(anchors.device)
71 |
72 | targets[torch.lt(IoU_max, 0.4), :] = 0
73 |
74 | positive_indices = torch.ge(IoU_max, 0.5)
75 |
76 | num_positive_anchors = positive_indices.sum()
77 |
78 | assigned_annotations = bbox_annotation[IoU_argmax, :]
79 |
80 | targets[positive_indices, :] = 0
81 | targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
82 |
83 | alpha_factor = torch.ones(targets.shape) * alpha
84 | alpha_factor = alpha_factor.to(anchors.device)
85 | alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
86 | focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
87 | focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
88 |
89 | bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
90 |
91 | # cls_loss = focal_weight * torch.pow(bce, gamma)
92 | cls_loss = focal_weight * bce
93 |
94 | cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).to(anchors.device))
95 |
96 | classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
97 |
98 | # compute the loss for regression
99 |
100 | if positive_indices.sum() > 0:
101 | assigned_annotations = assigned_annotations[positive_indices, :]
102 |
103 | anchor_widths_pi = anchor_widths[positive_indices]
104 | anchor_heights_pi = anchor_heights[positive_indices]
105 | anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
106 | anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
107 |
108 | gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
109 | gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
110 | gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
111 | gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
112 |
113 | # clip widths to 1
114 | gt_widths = torch.clamp(gt_widths, min=1)
115 | gt_heights = torch.clamp(gt_heights, min=1)
116 |
117 | targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
118 | targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
119 | targets_dw = torch.log(gt_widths / anchor_widths_pi)
120 | targets_dh = torch.log(gt_heights / anchor_heights_pi)
121 |
122 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
123 | targets = targets.t()
124 |
125 | targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).to(anchors.device)
126 |
127 |
128 | negative_indices = ~positive_indices
129 |
130 | regression_diff = torch.abs(targets - regression[positive_indices, :])
131 |
132 | regression_loss = torch.where(
133 | torch.le(regression_diff, 1.0 / 9.0),
134 | 0.5 * 9.0 * torch.pow(regression_diff, 2),
135 | regression_diff - 0.5 / 9.0
136 | )
137 | regression_losses.append(regression_loss.mean())
138 | else:
139 | regression_losses.append(torch.tensor(0).float().to(anchors.device))
140 |
141 | return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
--------------------------------------------------------------------------------
/model/BiFPN.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Author: huashuoshuo
3 | # Data: 2019/12/17 14:36
4 |
5 | import torch
6 | import torch.nn as nn
7 | import torch.functional as F
8 | from .utils import (
9 | round_filters,
10 | round_repeats,
11 | drop_connect,
12 | get_same_padding_conv2d,
13 | get_model_params,
14 | efficientnet_params,
15 | load_pretrained_weights,
16 | Swish,
17 | MemoryEfficientSwish,
18 | )
19 |
20 | class ConvBlock(nn.Module):
21 | """
22 |
23 | """
24 | def __init__(self, inp, oup, k_size, stride=1, padding=0, group=1):
25 | super().__init__()
26 | # Conv2d = get_same_padding_conv2d
27 | self.conv = nn.Conv2d(in_channels=inp, out_channels=oup, kernel_size=k_size, stride=stride, padding=padding, bias=False, groups=group).cuda()
28 | self.norm = nn.BatchNorm2d(num_features=oup).cuda()
29 | self.act = nn.ReLU(inplace=True)
30 |
31 | def forward(self, x):
32 | x = self.norm(self.conv(x))
33 | # print(self.conv)
34 | x = self.conv(x)
35 | return self.act(x)
36 |
37 |
38 | class BiFPN(nn.Module):
39 | """
40 |
41 | """
42 | def __init__(self,oup, first=True):
43 | super().__init__()
44 | # self.features_in = features_in
45 | self.oup = oup
46 | # self.dw_conv = ConvBlock(oup, oup, k_size=3, stride=1, padding=1, group=oup)
47 | # self.pw_conv = ConvBlock(oup, oup, k_size=1, stride=1, padding=0)
48 |
49 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
50 | self.first = first
51 | self.conv_gen()
52 | self.w_gen()
53 | def forward(self, features_in):
54 | # self.tail(x)
55 | # P3_in, P4_in, P5_in, P6_in, P7_in = features_in
56 |
57 | features_out = self.top_down(features_in)
58 | return features_out
59 |
60 | def conv_gen(self):
61 | # P3_in, P4_in, P5_in, P6_in, P7_in = features_in
62 | if not self.first:
63 | self.P3_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
64 | self.P4_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
65 | self.P5_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
66 | self.P6_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
67 | self.P7_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0)
68 |
69 | # upsample
70 | self.P6_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
71 | self.P5_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
72 | self.P4_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
73 | self.P3_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
74 |
75 | # downsample
76 | self.P4_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
77 | self.P5_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
78 | self.P6_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
79 | self.P7_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup)
80 |
81 | def w_gen(self):
82 | self.P6_td_add = wAdd(2)
83 | self.P5_td_add = wAdd(2)
84 | self.P4_td_add = wAdd(2)
85 | self.P3_out_add = wAdd(2)
86 | self.P4_out_add = wAdd(3)
87 | self.P5_out_add = wAdd(3)
88 | self.P6_out_add = wAdd(3)
89 | self.P7_out_add = wAdd(2)
90 |
91 | def top_down_no_w(self, features_in):
92 | P3_in, P4_in, P5_in, P6_in, P7_in = features_in
93 | if not self.first:
94 | P3_in = self.P3_in_conv(P3_in)
95 | P4_in = self.P4_in_conv(P4_in)
96 | P5_in = self.P5_in_conv(P5_in)
97 | P6_in = self.P6_in_conv(P6_in)
98 | P7_in = self.P7_in_conv(P7_in)
99 |
100 | # upsample
101 | P7_U = self.Resize()(P7_in)
102 | P6_td = P7_U + P6_in
103 | P6_td = self.P6_td_conv(P6_td)
104 | P6_U = self.Resize()(P6_td)
105 | P5_td = P6_U + P5_in
106 | P5_td = self.P5_td_conv(P5_td)
107 | P5_U = self.Resize()(P5_td)
108 | P4_td = P5_U + P4_in
109 | P4_td = self.P4_td_conv(P4_td)
110 | P4_U = self.Resize()(P4_td)
111 | P3_out = P4_U + P3_in
112 | P3_out = self.P3_out_conv(P3_out)
113 |
114 | # downsample
115 | P3_D = self.pool(P3_out)
116 | P4_out = P3_D + P4_td + P4_in
117 | P4_out = self.P4_out_conv(P4_out)
118 | P4_D = self.pool(P4_out)
119 | P5_out = P4_D + P5_td + P5_in
120 | P5_out = self.P5_out_conv(P5_out)
121 | P5_D = self.pool(P5_out)
122 | P6_out = P5_D + P6_td + P6_in
123 | P6_out = self.P6_out_conv(P6_out)
124 | P6_D = self.pool(P6_out)
125 | P7_out = P6_D + P7_in
126 | P7_out = self.P7_out_conv(P7_out)
127 | return [P3_out, P4_out, P5_out, P6_out, P7_out]
128 |
129 | def top_down(self, features_in):
130 | P3_in, P4_in, P5_in, P6_in, P7_in = features_in
131 | if not self.first:
132 | P3_in = self.P3_in_conv(P3_in)
133 | P4_in = self.P4_in_conv(P4_in)
134 | P5_in = self.P5_in_conv(P5_in)
135 | P6_in = self.P6_in_conv(P6_in)
136 | P7_in = self.P7_in_conv(P7_in)
137 |
138 | # upsample
139 | P7_U = self.Resize()(P7_in)
140 | P6_td = self.P6_td_add([P6_in, P7_U])
141 | P6_td = self.P6_td_conv(P6_td)
142 | P6_U = self.Resize()(P6_td)
143 | P5_td = self.P5_td_add([P5_in, P6_U])
144 | P5_td = self.P5_td_conv(P5_td)
145 | P5_U = self.Resize()(P5_td)
146 | P4_td = self.P4_td_add([P4_in, P5_U])
147 | P4_td = self.P4_td_conv(P4_td)
148 | P4_U = self.Resize()(P4_td)
149 | P3_out = self.P3_out_add([P3_in, P4_U])
150 | P3_out = self.P3_out_conv(P3_out)
151 |
152 | # downsample
153 | P3_D = self.pool(P3_out)
154 | P4_out = self.P4_out_add([P3_D, P4_td, P4_in])
155 | P4_out = self.P4_out_conv(P4_out)
156 | P4_D = self.pool(P4_out)
157 | P5_out = self.P5_out_add([P4_D, P5_td, P5_in])
158 | P5_out = self.P5_out_conv(P5_out)
159 | P5_D = self.pool(P5_out)
160 | P6_out = self.P6_out_add([P5_D, P6_td, P6_in])
161 | P6_out = self.P6_out_conv(P6_out)
162 | P6_D = self.pool(P6_out)
163 | P7_out = self.P7_out_add([P6_D, P7_in])
164 | P7_out = self.P7_out_conv(P7_out)
165 |
166 | return [P3_out, P4_out, P5_out, P6_out, P7_out]
167 |
168 |
169 |
170 | def Resize(self, scale=2, mode='nearest'):
171 | upsample = nn.Upsample(scale_factor=scale, mode=mode)
172 | return upsample
173 |
174 | # def get_weight(self):
175 |
176 |
177 | class wAdd(nn.Module):
178 | """
179 |
180 | """
181 | def __init__(self, num_in):
182 | super().__init__()
183 | self.epsilon = 1e-4
184 | self.w = nn.Parameter(torch.Tensor(num_in).fill_(1 / num_in))
185 |
186 | def forward(self, inputs):
187 | # len(inputs)
188 | num_in = len(inputs)
189 | # w = nn.Parameter(torch.Tensor(num_in).fill_(1 / num_in))
190 | w = self.w.cuda()
191 | # x = [w[i] * inputs[i] for i in range(num_in)]
192 | x = 0
193 | # print(w[0])
194 | for i in range(num_in):
195 | x += w[i] * inputs[i]
196 | x /= (torch.sum(w) + self.epsilon)
197 | # x = x.cuda()
198 | return x
199 | # x = torch.sum(x)
200 |
201 |
202 |
203 |
--------------------------------------------------------------------------------
/model/util.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | import torchvision.ops as ops
5 |
6 |
7 | def conv3x3(in_planes, out_planes, stride=1):
8 | """3x3 convolution with padding"""
9 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
10 | padding=1, bias=False)
11 |
12 | class BasicBlock(nn.Module):
13 | expansion = 1
14 |
15 | def __init__(self, inplanes, planes, stride=1, downsample=None):
16 | super(BasicBlock, self).__init__()
17 | self.conv1 = conv3x3(inplanes, planes, stride)
18 | self.bn1 = nn.BatchNorm2d(planes)
19 | self.relu = nn.ReLU(inplace=True)
20 | self.conv2 = conv3x3(planes, planes)
21 | self.bn2 = nn.BatchNorm2d(planes)
22 | self.downsample = downsample
23 | self.stride = stride
24 |
25 | def forward(self, x):
26 | residual = x
27 |
28 | out = self.conv1(x)
29 | out = self.bn1(out)
30 | out = self.relu(out)
31 |
32 | out = self.conv2(out)
33 | out = self.bn2(out)
34 |
35 | if self.downsample is not None:
36 | residual = self.downsample(x)
37 |
38 | out += residual
39 | out = self.relu(out)
40 |
41 | return out
42 |
43 |
44 | class Bottleneck(nn.Module):
45 | expansion = 4
46 |
47 | def __init__(self, inplanes, planes, stride=1, downsample=None):
48 | super(Bottleneck, self).__init__()
49 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
50 | self.bn1 = nn.BatchNorm2d(planes)
51 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
52 | padding=1, bias=False)
53 | self.bn2 = nn.BatchNorm2d(planes)
54 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
55 | self.bn3 = nn.BatchNorm2d(planes * 4)
56 | self.relu = nn.ReLU(inplace=True)
57 | self.downsample = downsample
58 | self.stride = stride
59 |
60 | def forward(self, x):
61 | residual = x
62 |
63 | out = self.conv1(x)
64 | out = self.bn1(out)
65 | out = self.relu(out)
66 |
67 | out = self.conv2(out)
68 | out = self.bn2(out)
69 | out = self.relu(out)
70 |
71 | out = self.conv3(out)
72 | out = self.bn3(out)
73 |
74 | if self.downsample is not None:
75 | residual = self.downsample(x)
76 |
77 | out += residual
78 | out = self.relu(out)
79 |
80 | return out
81 |
82 | class BBoxTransform(nn.Module):
83 |
84 | def __init__(self, mean=None, std=None):
85 | super(BBoxTransform, self).__init__()
86 | if mean is None:
87 | self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
88 | else:
89 | self.mean = mean
90 | if std is None:
91 | self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
92 | else:
93 | self.std = std
94 |
95 | def forward(self, boxes, deltas):
96 |
97 | widths = boxes[:, :, 2] - boxes[:, :, 0]
98 | heights = boxes[:, :, 3] - boxes[:, :, 1]
99 | ctr_x = boxes[:, :, 0] + 0.5 * widths
100 | ctr_y = boxes[:, :, 1] + 0.5 * heights
101 |
102 | dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
103 | dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
104 | dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
105 | dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
106 |
107 | pred_ctr_x = ctr_x + dx * widths
108 | pred_ctr_y = ctr_y + dy * heights
109 | pred_w = torch.exp(dw) * widths
110 | pred_h = torch.exp(dh) * heights
111 |
112 | pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
113 | pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
114 | pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
115 | pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
116 |
117 | pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
118 |
119 | return pred_boxes
120 |
121 |
122 | class ClipBoxes(nn.Module):
123 |
124 | def __init__(self, width=None, height=None):
125 | super(ClipBoxes, self).__init__()
126 |
127 | def forward(self, boxes, img):
128 |
129 | batch_size, num_channels, height, width = img.shape
130 |
131 | boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
132 | boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
133 |
134 | boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
135 | boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
136 |
137 | return boxes
138 |
139 | class Filter_boxes(nn.Module):
140 | """
141 |
142 | """
143 | def __init__(self, args):
144 | super().__init__()
145 | self.threshold = args.threshold
146 |
147 | def forward(self, inputs):
148 | transformed_anchors, classification, scores = inputs
149 |
150 | boxes_dict, scores_dict = self.select(transformed_anchors, classification)
151 | box = []
152 | score =[]
153 | cls = []
154 | for i in range(80):
155 | anchors_nms_idx = ops.nms(boxes=boxes_dict[i], scores=scores_dict[i], iou_threshold=0.5)
156 | if len(scores_dict[i])>0:
157 | box.append(boxes_dict[i][anchors_nms_idx, :])
158 | score.append(scores_dict[i][anchors_nms_idx])
159 | cls.append(i)
160 |
161 |
162 | return box, score, cls
163 |
164 | def select(self, transformed_anchors, classification):
165 | boxes = {}
166 | scores = {}
167 | for cls in range(80):
168 | cls_score = classification[0, :, cls]
169 | select_mask = cls_score > self.threshold
170 | boxes[cls] = transformed_anchors[0, select_mask, :]
171 | scores[cls] = cls_score[select_mask]
172 | return boxes, scores
173 |
174 | num2name = {0: u'__background__',
175 | 1: u'person',
176 | 2: u'bicycle',
177 | 3: u'car',
178 | 4: u'motorcycle',
179 | 5: u'airplane',
180 | 6: u'bus',
181 | 7: u'train',
182 | 8: u'truck',
183 | 9: u'boat',
184 | 10: u'traffic light',
185 | 11: u'fire hydrant',
186 | 12: u'stop sign',
187 | 13: u'parking meter',
188 | 14: u'bench',
189 | 15: u'bird',
190 | 16: u'cat',
191 | 17: u'dog',
192 | 18: u'horse',
193 | 19: u'sheep',
194 | 20: u'cow',
195 | 21: u'elephant',
196 | 22: u'bear',
197 | 23: u'zebra',
198 | 24: u'giraffe',
199 | 25: u'backpack',
200 | 26: u'umbrella',
201 | 27: u'handbag',
202 | 28: u'tie',
203 | 29: u'suitcase',
204 | 30: u'frisbee',
205 | 31: u'skis',
206 | 32: u'snowboard',
207 | 33: u'sports ball',
208 | 34: u'kite',
209 | 35: u'baseball bat',
210 | 36: u'baseball glove',
211 | 37: u'skateboard',
212 | 38: u'surfboard',
213 | 39: u'tennis racket',
214 | 40: u'bottle',
215 | 41: u'wine glass',
216 | 42: u'cup',
217 | 43: u'fork',
218 | 44: u'knife',
219 | 45: u'spoon',
220 | 46: u'bowl',
221 | 47: u'banana',
222 | 48: u'apple',
223 | 49: u'sandwich',
224 | 50: u'orange',
225 | 51: u'broccoli',
226 | 52: u'carrot',
227 | 53: u'hot dog',
228 | 54: u'pizza',
229 | 55: u'donut',
230 | 56: u'cake',
231 | 57: u'chair',
232 | 58: u'couch',
233 | 59: u'potted plant',
234 | 60: u'bed',
235 | 61: u'dining table',
236 | 62: u'toilet',
237 | 63: u'tv',
238 | 64: u'laptop',
239 | 65: u'mouse',
240 | 66: u'remote',
241 | 67: u'keyboard',
242 | 68: u'cell phone',
243 | 69: u'microwave',
244 | 70: u'oven',
245 | 71: u'toaster',
246 | 72: u'sink',
247 | 73: u'refrigerator',
248 | 74: u'book',
249 | 75: u'clock',
250 | 76: u'vase',
251 | 77: u'scissors',
252 | 78: u'teddy bear',
253 | 79: u'hair drier',
254 | 80: u'toothbrush'}
--------------------------------------------------------------------------------
/model/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 |
5 | from .utils import (
6 | round_filters,
7 | round_repeats,
8 | drop_connect,
9 | get_same_padding_conv2d,
10 | get_model_params,
11 | efficientnet_params,
12 | load_pretrained_weights,
13 | Swish,
14 | MemoryEfficientSwish,
15 | )
16 |
17 | class MBConvBlock(nn.Module):
18 | """
19 | Mobile Inverted Residual Bottleneck Block
20 |
21 | Args:
22 | block_args (namedtuple): BlockArgs, see above
23 | global_params (namedtuple): GlobalParam, see above
24 |
25 | Attributes:
26 | has_se (bool): Whether the block contains a Squeeze and Excitation layer.
27 | """
28 |
29 | def __init__(self, block_args, global_params):
30 | super().__init__()
31 | self._block_args = block_args
32 | self._bn_mom = 1 - global_params.batch_norm_momentum
33 | self._bn_eps = global_params.batch_norm_epsilon
34 | self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
35 | self.id_skip = block_args.id_skip # skip connection and drop connect
36 |
37 | # Get static or dynamic convolution depending on image size
38 | Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
39 |
40 | # Expansion phase
41 | inp = self._block_args.input_filters # number of input channels
42 | oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels
43 | if self._block_args.expand_ratio != 1:
44 | self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
45 | self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
46 |
47 | # Depthwise convolution phase
48 | k = self._block_args.kernel_size
49 | s = self._block_args.stride
50 | self._depthwise_conv = Conv2d(
51 | in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise
52 | kernel_size=k, stride=s, bias=False)
53 | self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
54 |
55 | # Squeeze and Excitation layer, if desired
56 | if self.has_se:
57 | num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
58 | self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
59 | self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
60 |
61 | # Output phase
62 | final_oup = self._block_args.output_filters
63 | self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
64 | self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
65 | self._swish = MemoryEfficientSwish()
66 |
67 | def forward(self, inputs, drop_connect_rate=None):
68 | """
69 | :param inputs: input tensor
70 | :param drop_connect_rate: drop connect rate (float, between 0 and 1)
71 | :return: output of block
72 | """
73 |
74 | # Expansion and Depthwise Convolution
75 | x = inputs
76 | if self._block_args.expand_ratio != 1:
77 | x = self._swish(self._bn0(self._expand_conv(inputs)))
78 | x = self._swish(self._bn1(self._depthwise_conv(x)))
79 |
80 | # Squeeze and Excitation
81 | if self.has_se:
82 | x_squeezed = F.adaptive_avg_pool2d(x, 1)
83 | x_squeezed = self._se_expand(self._swish(self._se_reduce(x_squeezed)))
84 | x = torch.sigmoid(x_squeezed) * x
85 |
86 | x = self._bn2(self._project_conv(x))
87 |
88 | # Skip connection and drop connect
89 | input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
90 | if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
91 | if drop_connect_rate:
92 | x = drop_connect(x, p=drop_connect_rate, training=self.training)
93 | x = x + inputs # skip connection
94 | return x
95 |
96 | def set_swish(self, memory_efficient=True):
97 | """Sets swish function as memory efficient (for training) or standard (for export)"""
98 | self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
99 |
100 |
101 | class EfficientNet(nn.Module):
102 | """
103 | An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
104 |
105 | Args:
106 | blocks_args (list): A list of BlockArgs to construct blocks
107 | global_params (namedtuple): A set of GlobalParams shared between blocks
108 |
109 | Example:
110 | model = EfficientNet.from_pretrained('efficientnet-b0')
111 |
112 | """
113 |
114 | def __init__(self, blocks_args=None, global_params=None):
115 | super().__init__()
116 | assert isinstance(blocks_args, list), 'blocks_args should be a list'
117 | assert len(blocks_args) > 0, 'block args must be greater than 0'
118 | self._global_params = global_params
119 | self._blocks_args = blocks_args
120 |
121 | # Get static or dynamic convolution depending on image size
122 | Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
123 |
124 | # Batch norm parameters
125 | bn_mom = 1 - self._global_params.batch_norm_momentum
126 | bn_eps = self._global_params.batch_norm_epsilon
127 |
128 | # Stem
129 | in_channels = 3 # rgb
130 | out_channels = round_filters(32, self._global_params) # number of output channels
131 | self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
132 | self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
133 |
134 | # Build blocks
135 | self._blocks = nn.ModuleList([])
136 | for block_args in self._blocks_args:
137 |
138 | # Update block input and output filters based on depth multiplier.
139 | block_args = block_args._replace(
140 | input_filters=round_filters(block_args.input_filters, self._global_params),
141 | output_filters=round_filters(block_args.output_filters, self._global_params),
142 | num_repeat=round_repeats(block_args.num_repeat, self._global_params)
143 | )
144 |
145 | # The first block needs to take care of stride and filter size increase.
146 | self._blocks.append(MBConvBlock(block_args, self._global_params))
147 | if block_args.num_repeat > 1:
148 | block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
149 | for _ in range(block_args.num_repeat - 1):
150 | self._blocks.append(MBConvBlock(block_args, self._global_params))
151 |
152 | # Head
153 | in_channels = block_args.output_filters # output of final block
154 | out_channels = round_filters(1280, self._global_params)
155 | self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
156 | self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
157 |
158 | # Final linear layer
159 | self._avg_pooling = nn.AdaptiveAvgPool2d(1)
160 | self._dropout = nn.Dropout(self._global_params.dropout_rate)
161 | self._fc = nn.Linear(out_channels, self._global_params.num_classes)
162 | self._swish = MemoryEfficientSwish()
163 |
164 | def set_swish(self, memory_efficient=True):
165 | """Sets swish function as memory efficient (for training) or standard (for export)"""
166 | self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
167 | for block in self._blocks:
168 | block.set_swish(memory_efficient)
169 |
170 |
171 | def extract_features(self, inputs):
172 | """ Returns output of the final convolution layer """
173 |
174 | # Stem
175 | x = self._swish(self._bn0(self._conv_stem(inputs)))
176 | x_before = x
177 | features = []
178 | block_index = 0
179 | repeat = 0
180 | # Blocks
181 | for idx, block in enumerate(self._blocks):
182 | drop_connect_rate = self._global_params.drop_connect_rate
183 | if drop_connect_rate:
184 | drop_connect_rate *= float(idx) / len(self._blocks)
185 | x = block(x, drop_connect_rate=drop_connect_rate)
186 | if x_before.shape[2] != x.shape[2]:
187 | features.append(x_before)
188 | x_before = x
189 | features.append(x)
190 | # repeat += 1
191 | # if(repeat == self._blocks_args[block_index].num_repeat):
192 | # repeat = 0
193 | # block_index += 1
194 | # features.append(x)
195 |
196 | # Head
197 | # x = self._swish(self._bn1(self._conv_head(x)))
198 |
199 | return features
200 |
201 | def forward(self, inputs):
202 | """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
203 | # bs = inputs.size(0)
204 | # Convolution layers
205 | x = self.extract_features(inputs)
206 |
207 | # Pooling and final linear layer
208 | # x = self._avg_pooling(x)
209 | # x = x.view(bs, -1)
210 | # x = self._dropout(x)
211 | # x = self._fc(x)
212 | return x
213 |
214 | @classmethod
215 | def from_name(cls, model_name, override_params=None):
216 | cls._check_model_name_is_valid(model_name)
217 | blocks_args, global_params = get_model_params(model_name, override_params)
218 | return cls(blocks_args, global_params)
219 |
220 | @classmethod
221 | def from_pretrained(cls, args, num_classes=1000, in_channels = 3):
222 | print(args)
223 | model_name = args.backbone
224 | print('backbone', model_name)
225 | model = cls.from_name(model_name, override_params={'num_classes': num_classes})
226 | if args.backbone_pretrained:
227 | load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000))
228 |
229 | if in_channels != 3:
230 | Conv2d = get_same_padding_conv2d(image_size = model._global_params.image_size)
231 | out_channels = round_filters(32, model._global_params)
232 | model._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
233 | return model
234 |
235 | @classmethod
236 | def from_pretrained(cls, args, num_classes=1000):
237 | # print(model_name)
238 | model_name = args.backbone
239 | model = cls.from_name(model_name, override_params={'num_classes': num_classes})
240 | if args.backbone_pretrained:
241 | load_pretrained_weights(model, model_name, load_fc=False)
242 |
243 | return model
244 |
245 | @classmethod
246 | def get_image_size(cls, model_name):
247 | cls._check_model_name_is_valid(model_name)
248 | _, _, res, _ = efficientnet_params(model_name)
249 | return res
250 |
251 | @classmethod
252 | def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False):
253 | """ Validates model name. None that pretrained weights are only available for
254 | the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """
255 | num_models = 4 if also_need_pretrained_weights else 8
256 | valid_models = ['efficientnet-b'+str(i) for i in range(num_models)]
257 | if model_name not in valid_models:
258 | raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
259 |
260 | def get_list_feature(self):
261 | list_feature = [80, 192, 320]
262 | # s_before = self._blocks_args[0]
263 | # for idx in range(len(self._blocks_args)-1):
264 | # print(self._blocks_args[idx].stride)
265 | # if self._blocks_args[idx].stride == self._blocks_args[idx+1].stride:
266 | # list_feature.append(self._blocks_args[idx].output_filters)
267 |
268 | return list_feature
269 |
--------------------------------------------------------------------------------
/model/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains helper functions for building the model and for loading model parameters.
3 | These helper functions are built to mirror those in the official TensorFlow implementation.
4 | """
5 |
6 | import re
7 | import math
8 | import collections
9 | from functools import partial
10 | import torch
11 | from torch import nn
12 | from torch.nn import functional as F
13 | from torch.utils import model_zoo
14 |
15 | ########################################################################
16 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
17 | ########################################################################
18 |
19 |
20 | # Parameters for the entire model (stem, all blocks, and head)
21 | GlobalParams = collections.namedtuple('GlobalParams', [
22 | 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
23 | 'num_classes', 'width_coefficient', 'depth_coefficient',
24 | 'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size'])
25 |
26 | # Parameters for an individual model block
27 | BlockArgs = collections.namedtuple('BlockArgs', [
28 | 'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
29 | 'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
30 |
31 | # Change namedtuple defaults
32 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
33 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
34 |
35 |
36 | class SwishImplementation(torch.autograd.Function):
37 | @staticmethod
38 | def forward(ctx, i):
39 | result = i * torch.sigmoid(i)
40 | ctx.save_for_backward(i)
41 | return result
42 |
43 | @staticmethod
44 | def backward(ctx, grad_output):
45 | i = ctx.saved_variables[0]
46 | sigmoid_i = torch.sigmoid(i)
47 | return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
48 |
49 |
50 | class MemoryEfficientSwish(nn.Module):
51 | def forward(self, x):
52 | return SwishImplementation.apply(x)
53 |
54 | class Swish(nn.Module):
55 | def forward(self, x):
56 | return x * torch.sigmoid(x)
57 |
58 |
59 | def round_filters(filters, global_params):
60 | """ Calculate and round number of filters based on depth multiplier. """
61 | multiplier = global_params.width_coefficient
62 | if not multiplier:
63 | return filters
64 | divisor = global_params.depth_divisor
65 | min_depth = global_params.min_depth
66 | filters *= multiplier
67 | min_depth = min_depth or divisor
68 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
69 | if new_filters < 0.9 * filters: # prevent rounding by more than 10%
70 | new_filters += divisor
71 | return int(new_filters)
72 |
73 |
74 | def round_repeats(repeats, global_params):
75 | """ Round number of filters based on depth multiplier. """
76 | multiplier = global_params.depth_coefficient
77 | if not multiplier:
78 | return repeats
79 | return int(math.ceil(multiplier * repeats))
80 |
81 |
82 | def drop_connect(inputs, p, training):
83 | """ Drop connect. """
84 | if not training: return inputs
85 | batch_size = inputs.shape[0]
86 | keep_prob = 1 - p
87 | random_tensor = keep_prob
88 | random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
89 | binary_tensor = torch.floor(random_tensor)
90 | output = inputs / keep_prob * binary_tensor
91 | return output
92 |
93 |
94 | def get_same_padding_conv2d(image_size=None):
95 | """ Chooses static padding if you have specified an image size, and dynamic padding otherwise.
96 | Static padding is necessary for ONNX exporting of models. """
97 | if image_size is None:
98 | return Conv2dDynamicSamePadding
99 | else:
100 | return partial(Conv2dStaticSamePadding, image_size=image_size)
101 |
102 |
103 | class Conv2dDynamicSamePadding(nn.Conv2d):
104 | """ 2D Convolutions like TensorFlow, for a dynamic image size """
105 |
106 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
107 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
108 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
109 |
110 | def forward(self, x):
111 | ih, iw = x.size()[-2:]
112 | kh, kw = self.weight.size()[-2:]
113 | sh, sw = self.stride
114 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
115 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
116 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
117 | if pad_h > 0 or pad_w > 0:
118 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
119 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
120 |
121 |
122 | class Conv2dStaticSamePadding(nn.Conv2d):
123 | """ 2D Convolutions like TensorFlow, for a fixed image size"""
124 |
125 | def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
126 | super().__init__(in_channels, out_channels, kernel_size, **kwargs)
127 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
128 |
129 | # Calculate padding based on image size and save it
130 | assert image_size is not None
131 | ih, iw = image_size if type(image_size) == list else [image_size, image_size]
132 | kh, kw = self.weight.size()[-2:]
133 | sh, sw = self.stride
134 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
135 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
136 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
137 | if pad_h > 0 or pad_w > 0:
138 | self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
139 | else:
140 | self.static_padding = Identity()
141 |
142 | def forward(self, x):
143 | x = self.static_padding(x)
144 | x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
145 | return x
146 |
147 |
148 | class Identity(nn.Module):
149 | def __init__(self, ):
150 | super(Identity, self).__init__()
151 |
152 | def forward(self, input):
153 | return input
154 |
155 |
156 | ########################################################################
157 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
158 | ########################################################################
159 |
160 |
161 | def efficientnet_params(model_name):
162 | """ Map EfficientNet model name to parameter coefficients. """
163 | params_dict = {
164 | # Coefficients: width,depth,res,dropout
165 | 'efficientnet-b0': (1.0, 1.0, 224, 0.2),
166 | 'efficientnet-b1': (1.0, 1.1, 240, 0.2),
167 | 'efficientnet-b2': (1.1, 1.2, 260, 0.3),
168 | 'efficientnet-b3': (1.2, 1.4, 300, 0.3),
169 | 'efficientnet-b4': (1.4, 1.8, 380, 0.4),
170 | 'efficientnet-b5': (1.6, 2.2, 456, 0.4),
171 | 'efficientnet-b6': (1.8, 2.6, 528, 0.5),
172 | 'efficientnet-b7': (2.0, 3.1, 600, 0.5),
173 | }
174 | return params_dict[model_name]
175 |
176 |
177 | class BlockDecoder(object):
178 | """ Block Decoder for readability, straight from the official TensorFlow repository """
179 |
180 | @staticmethod
181 | def _decode_block_string(block_string):
182 | """ Gets a block through a string notation of arguments. """
183 | assert isinstance(block_string, str)
184 |
185 | ops = block_string.split('_')
186 | options = {}
187 | for op in ops:
188 | splits = re.split(r'(\d.*)', op)
189 | if len(splits) >= 2:
190 | key, value = splits[:2]
191 | options[key] = value
192 |
193 | # Check stride
194 | assert (('s' in options and len(options['s']) == 1) or
195 | (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
196 |
197 | return BlockArgs(
198 | kernel_size=int(options['k']),
199 | num_repeat=int(options['r']),
200 | input_filters=int(options['i']),
201 | output_filters=int(options['o']),
202 | expand_ratio=int(options['e']),
203 | id_skip=('noskip' not in block_string),
204 | se_ratio=float(options['se']) if 'se' in options else None,
205 | stride=[int(options['s'][0])])
206 |
207 | @staticmethod
208 | def _encode_block_string(block):
209 | """Encodes a block to a string."""
210 | args = [
211 | 'r%d' % block.num_repeat,
212 | 'k%d' % block.kernel_size,
213 | 's%d%d' % (block.strides[0], block.strides[1]),
214 | 'e%s' % block.expand_ratio,
215 | 'i%d' % block.input_filters,
216 | 'o%d' % block.output_filters
217 | ]
218 | if 0 < block.se_ratio <= 1:
219 | args.append('se%s' % block.se_ratio)
220 | if block.id_skip is False:
221 | args.append('noskip')
222 | return '_'.join(args)
223 |
224 | @staticmethod
225 | def decode(string_list):
226 | """
227 | Decodes a list of string notations to specify blocks inside the network.
228 |
229 | :param string_list: a list of strings, each string is a notation of block
230 | :return: a list of BlockArgs namedtuples of block args
231 | """
232 | assert isinstance(string_list, list)
233 | blocks_args = []
234 | for block_string in string_list:
235 | blocks_args.append(BlockDecoder._decode_block_string(block_string))
236 | return blocks_args
237 |
238 | @staticmethod
239 | def encode(blocks_args):
240 | """
241 | Encodes a list of BlockArgs to a list of strings.
242 |
243 | :param blocks_args: a list of BlockArgs namedtuples of block args
244 | :return: a list of strings, each string is a notation of block
245 | """
246 | block_strings = []
247 | for block in blocks_args:
248 | block_strings.append(BlockDecoder._encode_block_string(block))
249 | return block_strings
250 |
251 |
252 | def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2,
253 | drop_connect_rate=0.2, image_size=None, num_classes=1000):
254 | """ Creates a efficientnet model. """
255 |
256 | blocks_args = [
257 | 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s11_e6_i16_o24_se0.25',
258 | 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
259 | 'r3_k5_s22_e6_i80_o112_se0.25', 'r4_k5_s11_e6_i112_o192_se0.25',
260 | 'r1_k3_s22_e6_i192_o320_se0.25',
261 | ]
262 | blocks_args = BlockDecoder.decode(blocks_args)
263 |
264 | global_params = GlobalParams(
265 | batch_norm_momentum=0.99,
266 | batch_norm_epsilon=1e-3,
267 | dropout_rate=dropout_rate,
268 | drop_connect_rate=drop_connect_rate,
269 | # data_format='channels_last', # removed, this is always true in PyTorch
270 | num_classes=num_classes,
271 | width_coefficient=width_coefficient,
272 | depth_coefficient=depth_coefficient,
273 | depth_divisor=8,
274 | min_depth=None,
275 | image_size=image_size,
276 | )
277 |
278 | return blocks_args, global_params
279 |
280 |
281 | def get_model_params(model_name, override_params):
282 | """ Get the block args and global params for a given model """
283 | if model_name.startswith('efficientnet'):
284 | w, d, s, p = efficientnet_params(model_name)
285 | # note: all models have drop connect rate = 0.2
286 | blocks_args, global_params = efficientnet(
287 | width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
288 | else:
289 | raise NotImplementedError('model name is not pre-defined: %s' % model_name)
290 | if override_params:
291 | # ValueError will be raised here if override_params has fields not included in global_params.
292 | global_params = global_params._replace(**override_params)
293 | return blocks_args, global_params
294 |
295 |
296 | url_map = {
297 | 'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth',
298 | 'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth',
299 | 'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth',
300 | 'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth',
301 | 'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth',
302 | 'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth',
303 | 'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth',
304 | 'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth',
305 | }
306 |
307 |
308 | def load_pretrained_weights(model, model_name, load_fc=False):
309 | """ Loads pretrained weights, and downloads if loading for the first time. """
310 | state_dict = model_zoo.load_url(url_map[model_name])
311 | # state_dict = torch.load('/home/pre_trained/efficientnet-b0-355c32eb.pth')
312 | if load_fc:
313 | model.load_state_dict(state_dict)
314 | else:
315 | state_dict.pop('_fc.weight')
316 | state_dict.pop('_fc.bias')
317 | res = model.load_state_dict(state_dict, strict=False)
318 | assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
319 | print('Loaded pretrained weights for {}'.format(model_name))
320 |
--------------------------------------------------------------------------------
/dataset/dataloader.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division
2 | import sys
3 | import os
4 | import torch
5 | import numpy as np
6 | import random
7 | import csv
8 | import cv2
9 | from torchvision import transforms
10 | from torch.utils.data import Dataset, DataLoader
11 | from torchvision import transforms, utils
12 | from torch.utils.data.sampler import Sampler
13 |
14 | from pycocotools.coco import COCO
15 |
16 | import skimage.io
17 | import skimage.transform
18 | import skimage.color
19 | import skimage
20 |
21 | from PIL import Image
22 |
23 |
24 | class CocoDataset(Dataset):
25 | """Coco dataset."""
26 |
27 | def __init__(self, root_dir, set_name='train2017', transform=None):
28 | """
29 | Args:
30 | root_dir (string): COCO directory.
31 | transform (callable, optional): Optional transform to be applied
32 | on a sample.
33 | """
34 | self.root_dir = root_dir
35 | self.set_name = set_name
36 | self.transform = transform
37 |
38 | self.coco = COCO(os.path.join(self.root_dir, 'annotations_trainval2017', 'annotations', 'instances_' + self.set_name + '.json'))
39 | self.image_ids = self.coco.getImgIds()
40 |
41 | self.load_classes()
42 |
43 | def load_classes(self):
44 | # load class names (name -> label)
45 | categories = self.coco.loadCats(self.coco.getCatIds())
46 | categories.sort(key=lambda x: x['id'])
47 |
48 | self.classes = {}
49 | self.coco_labels = {}
50 | self.coco_labels_inverse = {}
51 | for c in categories:
52 | self.coco_labels[len(self.classes)] = c['id']
53 | self.coco_labels_inverse[c['id']] = len(self.classes)
54 | self.classes[c['name']] = len(self.classes)
55 |
56 | # also load the reverse (label -> name)
57 | self.labels = {}
58 | for key, value in self.classes.items():
59 | self.labels[value] = key
60 |
61 | def __len__(self):
62 | return len(self.image_ids)
63 |
64 | def __getitem__(self, idx):
65 |
66 | img = self.load_image(idx)
67 | annot = self.load_annotations(idx)
68 | sample = {'img': img, 'annot': annot}
69 | if self.transform:
70 | sample = self.transform(sample)
71 |
72 | return sample
73 |
74 | def load_image(self, image_index):
75 | image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
76 | path = os.path.join(self.root_dir, self.set_name, image_info['file_name'])
77 | img = skimage.io.imread(path)
78 | # img = cv2.imread(path)
79 | # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
80 | if len(img.shape) == 2:
81 | img = skimage.color.gray2rgb(img)
82 | # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
83 |
84 | return img.astype(np.float32) / 255.0
85 |
86 | def load_annotations(self, image_index):
87 | # get ground truth annotations
88 | annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
89 | annotations = np.zeros((0, 5))
90 |
91 | # some images appear to miss annotations (like image with id 257034)
92 | if len(annotations_ids) == 0:
93 | return annotations
94 |
95 | # parse annotations
96 | coco_annotations = self.coco.loadAnns(annotations_ids)
97 | for idx, a in enumerate(coco_annotations):
98 |
99 | # some annotations have basically no width / height, skip them
100 | if a['bbox'][2] < 1 or a['bbox'][3] < 1:
101 | continue
102 |
103 | annotation = np.zeros((1, 5))
104 | annotation[0, :4] = a['bbox']
105 | annotation[0, 4] = self.coco_label_to_label(a['category_id'])
106 | annotations = np.append(annotations, annotation, axis=0)
107 |
108 | # transform from [x, y, w, h] to [x1, y1, x2, y2]
109 | annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
110 | annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
111 |
112 | return annotations
113 |
114 | def coco_label_to_label(self, coco_label):
115 | return self.coco_labels_inverse[coco_label]
116 |
117 |
118 | def label_to_coco_label(self, label):
119 | return self.coco_labels[label]
120 |
121 | def image_aspect_ratio(self, image_index):
122 | image = self.coco.loadImgs(self.image_ids[image_index])[0]
123 | return float(image['width']) / float(image['height'])
124 |
125 | def num_classes(self):
126 | return 80
127 |
128 |
129 | class CSVDataset(Dataset):
130 | """CSV dataset."""
131 |
132 | def __init__(self, train_file, class_list, transform=None):
133 | """
134 | Args:
135 | train_file (string): CSV file with training annotations
136 | annotations (string): CSV file with class list
137 | test_file (string, optional): CSV file with testing annotations
138 | """
139 | self.train_file = train_file
140 | self.class_list = class_list
141 | self.transform = transform
142 |
143 | # parse the provided class file
144 | try:
145 | with self._open_for_csv(self.class_list) as file:
146 | self.classes = self.load_classes(csv.reader(file, delimiter=','))
147 | except ValueError as e:
148 | raise_from(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None)
149 |
150 | self.labels = {}
151 | for key, value in self.classes.items():
152 | self.labels[value] = key
153 |
154 | # csv with img_path, x1, y1, x2, y2, class_name
155 | try:
156 | with self._open_for_csv(self.train_file) as file:
157 | self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes)
158 | except ValueError as e:
159 | raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None)
160 | self.image_names = list(self.image_data.keys())
161 |
162 | def _parse(self, value, function, fmt):
163 | """
164 | Parse a string into a value, and format a nice ValueError if it fails.
165 | Returns `function(value)`.
166 | Any `ValueError` raised is catched and a new `ValueError` is raised
167 | with message `fmt.format(e)`, where `e` is the caught `ValueError`.
168 | """
169 | try:
170 | return function(value)
171 | except ValueError as e:
172 | raise_from(ValueError(fmt.format(e)), None)
173 |
174 | def _open_for_csv(self, path):
175 | """
176 | Open a file with flags suitable for csv.reader.
177 | This is different for python2 it means with mode 'rb',
178 | for python3 this means 'r' with "universal newlines".
179 | """
180 | if sys.version_info[0] < 3:
181 | return open(path, 'rb')
182 | else:
183 | return open(path, 'r', newline='')
184 |
185 |
186 | def load_classes(self, csv_reader):
187 | result = {}
188 |
189 | for line, row in enumerate(csv_reader):
190 | line += 1
191 |
192 | try:
193 | class_name, class_id = row
194 | except ValueError:
195 | raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None)
196 | class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
197 |
198 | if class_name in result:
199 | raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
200 | result[class_name] = class_id
201 | return result
202 |
203 |
204 | def __len__(self):
205 | return len(self.image_names)
206 |
207 | def __getitem__(self, idx):
208 |
209 | img = self.load_image(idx)
210 | annot = self.load_annotations(idx)
211 | sample = {'img': img, 'annot': annot}
212 | if self.transform:
213 | sample = self.transform(sample)
214 |
215 | return sample
216 |
217 | def load_image(self, image_index):
218 | img = skimage.io.imread(self.image_names[image_index])
219 |
220 | if len(img.shape) == 2:
221 | img = skimage.color.gray2rgb(img)
222 |
223 | return img.astype(np.float32)/255.0
224 |
225 | def load_annotations(self, image_index):
226 | # get ground truth annotations
227 | annotation_list = self.image_data[self.image_names[image_index]]
228 | annotations = np.zeros((0, 5))
229 |
230 | # some images appear to miss annotations (like image with id 257034)
231 | if len(annotation_list) == 0:
232 | return annotations
233 |
234 | # parse annotations
235 | for idx, a in enumerate(annotation_list):
236 | # some annotations have basically no width / height, skip them
237 | x1 = a['x1']
238 | x2 = a['x2']
239 | y1 = a['y1']
240 | y2 = a['y2']
241 |
242 | if (x2-x1) < 1 or (y2-y1) < 1:
243 | continue
244 |
245 | annotation = np.zeros((1, 5))
246 |
247 | annotation[0, 0] = x1
248 | annotation[0, 1] = y1
249 | annotation[0, 2] = x2
250 | annotation[0, 3] = y2
251 |
252 | annotation[0, 4] = self.name_to_label(a['class'])
253 | annotations = np.append(annotations, annotation, axis=0)
254 |
255 | return annotations
256 |
257 | def _read_annotations(self, csv_reader, classes):
258 | result = {}
259 | for line, row in enumerate(csv_reader):
260 | line += 1
261 |
262 | try:
263 | img_file, x1, y1, x2, y2, class_name = row[:6]
264 | except ValueError:
265 | raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
266 |
267 | if img_file not in result:
268 | result[img_file] = []
269 |
270 | # If a row contains only an image path, it's an image without annotations.
271 | if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
272 | continue
273 |
274 | x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
275 | y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
276 | x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
277 | y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
278 |
279 | # Check that the bounding box is valid.
280 | if x2 <= x1:
281 | raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
282 | if y2 <= y1:
283 | raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
284 |
285 | # check if the current class name is correctly present
286 | if class_name not in classes:
287 | raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
288 |
289 | result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
290 | return result
291 |
292 | def name_to_label(self, name):
293 | return self.classes[name]
294 |
295 | def label_to_name(self, label):
296 | return self.labels[label]
297 |
298 | def num_classes(self):
299 | return max(self.classes.values()) + 1
300 |
301 | def image_aspect_ratio(self, image_index):
302 | image = Image.open(self.image_names[image_index])
303 | return float(image.width) / float(image.height)
304 |
305 |
306 | def collater(data):
307 |
308 | imgs = [s['img'] for s in data]
309 | annots = [s['annot'] for s in data]
310 | scales1 = [s['scale1'] for s in data]
311 | scales2 = [s['scale2'] for s in data]
312 |
313 | widths = [int(s.shape[0]) for s in imgs]
314 | heights = [int(s.shape[1]) for s in imgs]
315 | batch_size = len(imgs)
316 |
317 | max_width = np.array(widths).max()
318 | max_height = np.array(heights).max()
319 |
320 | padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
321 |
322 | for i in range(batch_size):
323 | img = imgs[i]
324 | padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
325 |
326 | max_num_annots = max(annot.shape[0] for annot in annots)
327 |
328 | if max_num_annots > 0:
329 |
330 | annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
331 |
332 | if max_num_annots > 0:
333 | for idx, annot in enumerate(annots):
334 | #print(annot.shape)
335 | if annot.shape[0] > 0:
336 | annot_padded[idx, :annot.shape[0], :] = annot
337 | else:
338 | annot_padded = torch.ones((len(annots), 1, 5)) * -1
339 |
340 |
341 | padded_imgs = padded_imgs.permute(0, 3, 1, 2)
342 |
343 | return {'img': padded_imgs, 'annot': annot_padded, 'scale1': scales1, 'scale2': scales2}
344 |
345 | class Resizer(object):
346 | """Convert ndarrays in sample to Tensors."""
347 |
348 | def __call__(self, sample, min_side=512, max_side=512):
349 | image, annots = sample['img'], sample['annot']
350 |
351 | rows, cols, cns = image.shape
352 |
353 | smallest_side = min(rows, cols)
354 |
355 | # rescale the image so the smallest side is min_side
356 | scale = min_side / smallest_side
357 | scale1 = 512 / rows
358 | scale2 = 512 / cols
359 | # check if the largest side is now greater than max_side, which can happen
360 | # when images have a large aspect ratio
361 | largest_side = max(rows, cols)
362 |
363 | if largest_side * scale > max_side:
364 | scale = max_side / largest_side
365 |
366 | # resize the image with the computed scale
367 | # image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
368 | image = skimage.transform.resize(image, (512, 512))
369 |
370 | rows, cols, cns = image.shape
371 |
372 | pad_w = 32 - rows%32
373 | pad_h = 32 - cols%32
374 |
375 | new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
376 | new_image[:rows, :cols, :] = image.astype(np.float32)
377 | image = image.astype(np.float32)
378 | # print(np.shape(annots))
379 | # annots[:, :2] *= scale1
380 | # annots[:, 2:4] *= scale2
381 |
382 | annots[:, 0] *= scale2
383 | annots[:, 2] *= scale2
384 | annots[:, 1] *= scale1
385 | annots[:, 3] *= scale1
386 |
387 | return {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale1': scale1, 'scale2': scale2}
388 |
389 |
390 | class Augmenter(object):
391 | """Convert ndarrays in sample to Tensors."""
392 |
393 | def __call__(self, sample, flip_x=0.5):
394 |
395 | if np.random.rand() < flip_x:
396 | image, annots = sample['img'], sample['annot']
397 | image = image[:, ::-1, :]
398 |
399 | rows, cols, channels = image.shape
400 |
401 | x1 = annots[:, 0].copy()
402 | x2 = annots[:, 2].copy()
403 |
404 | x_tmp = x1.copy()
405 |
406 | annots[:, 0] = cols - x2
407 | annots[:, 2] = cols - x_tmp
408 |
409 | sample = {'img': image, 'annot': annots}
410 |
411 | return sample
412 |
413 |
414 | class Normalizer(object):
415 |
416 | def __init__(self):
417 | self.mean = np.array([[[0.485, 0.456, 0.406]]])
418 | self.std = np.array([[[0.229, 0.224, 0.225]]])
419 |
420 | def __call__(self, sample):
421 |
422 | image, annots = sample['img'], sample['annot']
423 |
424 | return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}
425 |
426 | # class to_tensor(object):
427 | #
428 | # def __call__(self, sample):
429 | # image, annots = sample['img'], sample['annot']
430 | # return {}
431 |
432 |
433 |
434 | class UnNormalizer(object):
435 | def __init__(self, mean=None, std=None):
436 | if mean == None:
437 | self.mean = [0.485, 0.456, 0.406]
438 | else:
439 | self.mean = mean
440 | if std == None:
441 | self.std = [0.229, 0.224, 0.225]
442 | else:
443 | self.std = std
444 |
445 | def __call__(self, tensor):
446 | """
447 | Args:
448 | tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
449 | Returns:
450 | Tensor: Normalized image.
451 | """
452 | for t, m, s in zip(tensor, self.mean, self.std):
453 | t.mul_(s).add_(m)
454 | return tensor
455 |
456 |
457 | class AspectRatioBasedSampler(Sampler):
458 |
459 | def __init__(self, data_source, batch_size, drop_last):
460 | self.data_source = data_source
461 | self.batch_size = batch_size
462 | self.drop_last = drop_last
463 | self.groups = self.group_images()
464 |
465 | def __iter__(self):
466 | random.shuffle(self.groups)
467 | for group in self.groups:
468 | yield group
469 |
470 | def __len__(self):
471 | if self.drop_last:
472 | return len(self.data_source) // self.batch_size
473 | else:
474 | return (len(self.data_source) + self.batch_size - 1) // self.batch_size
475 |
476 | def group_images(self):
477 | # determine the order of the images
478 | order = list(range(len(self.data_source)))
479 | order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
480 |
481 | # divide into groups, one group = one batch
482 | return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
483 |
--------------------------------------------------------------------------------
| |