├── lib ├── __init__.py ├── nms │ ├── __init__.py │ ├── src │ │ ├── cuda │ │ │ ├── nms_kernel.cu.o │ │ │ ├── nms_kernel.h │ │ │ └── nms_kernel.cu │ │ ├── nms_cuda.h │ │ ├── nms.h │ │ ├── nms_cuda.c │ │ └── nms.c │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── pth_nms.cpython-36.pyc │ ├── build.py │ └── pth_nms.py ├── __pycache__ │ └── __init__.cpython-36.pyc └── build.sh ├── img ├── 1.png ├── 2.png ├── 3.jpg └── 4.png ├── imges ├── 1.jpg ├── 2.jpg ├── 3.jpg ├── 4.jpg ├── 5.jpg └── 6.jpg ├── __pycache__ └── coco_eval.cpython-36.pyc ├── model ├── __pycache__ │ ├── BiFPN.cpython-35.pyc │ ├── BiFPN.cpython-36.pyc │ ├── losses.cpython-35.pyc │ ├── losses.cpython-36.pyc │ ├── model.cpython-35.pyc │ ├── model.cpython-36.pyc │ ├── util.cpython-35.pyc │ ├── util.cpython-36.pyc │ ├── utils.cpython-35.pyc │ ├── utils.cpython-36.pyc │ ├── __init__.cpython-35.pyc │ ├── __init__.cpython-36.pyc │ ├── anchors.cpython-35.pyc │ ├── anchors.cpython-36.pyc │ ├── RetinaHead.cpython-35.pyc │ ├── RetinaHead.cpython-36.pyc │ ├── efficientdet.cpython-35.pyc │ └── efficientdet.cpython-36.pyc ├── __init__.py ├── efficientdet.py ├── anchors.py ├── RetinaHead.py ├── losses.py ├── BiFPN.py ├── util.py ├── model.py └── utils.py ├── dataset ├── __pycache__ │ └── dataloader.cpython-36.pyc └── dataloader.py ├── log ├── events.out.tfevents.1577539929.fineserver └── events.out.tfevents.1577540185.fineserver ├── .idea ├── misc.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── modules.xml ├── bishe.iml └── workspace.xml ├── README.md ├── coco_eval.py ├── demo.py └── train.py /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/1.png -------------------------------------------------------------------------------- /img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/2.png -------------------------------------------------------------------------------- /img/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/3.jpg -------------------------------------------------------------------------------- /img/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/img/4.png -------------------------------------------------------------------------------- /imges/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/1.jpg -------------------------------------------------------------------------------- /imges/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/2.jpg -------------------------------------------------------------------------------- /imges/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/3.jpg -------------------------------------------------------------------------------- /imges/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/4.jpg -------------------------------------------------------------------------------- /imges/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/5.jpg -------------------------------------------------------------------------------- /imges/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/imges/6.jpg -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/src/cuda/nms_kernel.cu.o -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh); -------------------------------------------------------------------------------- /__pycache__/coco_eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/__pycache__/coco_eval.cpython-36.pyc -------------------------------------------------------------------------------- /lib/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /model/__pycache__/BiFPN.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/BiFPN.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/BiFPN.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/BiFPN.cpython-36.pyc -------------------------------------------------------------------------------- /model/__pycache__/losses.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/losses.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/losses.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/losses.cpython-36.pyc -------------------------------------------------------------------------------- /model/__pycache__/model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/model.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /model/__pycache__/util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/util.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /model/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /model/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /model/__pycache__/anchors.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/anchors.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/anchors.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/anchors.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nms/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nms/__pycache__/pth_nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/lib/nms/__pycache__/pth_nms.cpython-36.pyc -------------------------------------------------------------------------------- /model/__pycache__/RetinaHead.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/RetinaHead.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/RetinaHead.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/RetinaHead.cpython-36.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/dataloader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/dataset/__pycache__/dataloader.cpython-36.pyc -------------------------------------------------------------------------------- /log/events.out.tfevents.1577539929.fineserver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/log/events.out.tfevents.1577539929.fineserver -------------------------------------------------------------------------------- /log/events.out.tfevents.1577540185.fineserver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/log/events.out.tfevents.1577540185.fineserver -------------------------------------------------------------------------------- /model/__pycache__/efficientdet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/efficientdet.cpython-35.pyc -------------------------------------------------------------------------------- /model/__pycache__/efficientdet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderhss/efficientdet-pytorch/HEAD/model/__pycache__/efficientdet.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nms/src/nms.h: -------------------------------------------------------------------------------- 1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh); -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.5.1" 2 | from .model import EfficientNet 3 | from .utils import ( 4 | GlobalParams, 5 | BlockArgs, 6 | BlockDecoder, 7 | efficientnet, 8 | get_model_params, 9 | ) 10 | 11 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NMS_KERNEL 2 | #define _NMS_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 10 | 11 | void _nms(int boxes_num, float * boxes_dev, 12 | unsigned long long * mask_dev, float nms_overlap_thresh); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /lib/build.sh: -------------------------------------------------------------------------------- 1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 2 | -gencode arch=compute_35,code=sm_35 \ 3 | -gencode arch=compute_50,code=sm_50 \ 4 | -gencode arch=compute_52,code=sm_52 \ 5 | -gencode arch=compute_60,code=sm_60 \ 6 | -gencode arch=compute_61,code=sm_61" 7 | 8 | 9 | # Build NMS 10 | cd nms/src/cuda 11 | echo "Compiling nms kernels by nvcc..." 12 | /usr/local/cuda/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 13 | cd ../../ 14 | python build.py install 15 | cd ../ 16 | -------------------------------------------------------------------------------- /.idea/bishe.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /lib/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | #from torch.utils.ffi import create_extension 4 | from torch.utils.cpp_extension import BuildExtension 5 | 6 | 7 | sources = ['src/nms.c'] 8 | headers = ['src/nms.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/nms_cuda.c'] 15 | headers += ['src/nms_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | print(this_file) 21 | extra_objects = ['src/cuda/nms_kernel.cu.o'] 22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 23 | 24 | ffi = BuildExtension( 25 | '_ext.nms', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects, 32 | extra_compile_args=['-std=c99'] 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/nms/pth_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ._ext import nms 3 | import numpy as np 4 | 5 | def pth_nms(dets, thresh): 6 | """ 7 | dets has to be a tensor 8 | """ 9 | if not dets.is_cuda: 10 | x1 = dets[:, 0] 11 | y1 = dets[:, 1] 12 | x2 = dets[:, 2] 13 | y2 = dets[:, 3] 14 | scores = dets[:, 4] 15 | 16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 17 | order = scores.sort(0, descending=True)[1] 18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long() 19 | 20 | keep = torch.LongTensor(dets.size(0)) 21 | num_out = torch.LongTensor(1) 22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh) 23 | 24 | return keep[:num_out[0]] 25 | else: 26 | x1 = dets[:, 0] 27 | y1 = dets[:, 1] 28 | x2 = dets[:, 2] 29 | y2 = dets[:, 3] 30 | scores = dets[:, 4] 31 | 32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 33 | order = scores.sort(0, descending=True)[1] 34 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda() 35 | 36 | dets = dets[order].contiguous() 37 | 38 | keep = torch.LongTensor(dets.size(0)) 39 | num_out = torch.LongTensor(1) 40 | # keep = torch.cuda.LongTensor(dets.size(0)) 41 | # num_out = torch.cuda.LongTensor(1) 42 | nms.gpu_nms(keep, num_out, dets, thresh) 43 | 44 | return order[keep[:num_out[0]].cuda()].contiguous() 45 | # return order[keep[:num_out[0]]].contiguous() 46 | 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # efficientdet-pytorch 2 | ![image](https://github.com/coderhss/efficientdet-pytorch/blob/master/img/2.png) 3 | ![image](https://github.com/coderhss/efficientdet-pytorch/blob/master/img/1.png) 4 | ![image](https://github.com/coderhss/efficientdet-pytorch/blob/master/img/3.jpg) 5 | ![image](https://github.com/coderhss/efficientdet-pytorch/blob/master/img/4.png) 6 | 7 | Pytorch implementtation of EfficientDet object detection as described in [EfficientDet: Scalable and Efficient Object Detection](https://arxiv.org/pdf/1911.09070.pdf) 8 | 9 | This implementation is a very simple version without many data augmentation. 10 | 11 | The EfficientNet code are borrowed from the [A PyTorch implementation of EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch),if you want to train EffcicientDet from scratch,you should load the efficientnet pretrained parameter. use 12 | 13 | ``` 14 | python train.py --coco_path '/home/hoo/Dataset/COCO' --backbon 'efficientnet-b0' --backbone_pretrained True 15 | ``` 16 | 17 | and the efficientnet pretrainied parameter will be download and load automatically, and start to train. 18 | 19 | I've only trained efficientdet-d0 so far,and without many data augmentation.if you want to load efficientnet pretrained parameter,use 20 | 21 | ``` 22 | python train.py --coco_path '/home/hoo/Dataset/COCO' --backbone 'efficientnet-b0' --backbone_pretrained False --EfficientDet_pretrained True --pretrained './weights/efficientdet_0.pth' 23 | ``` 24 | | Model | mAP | pre_trained | 25 | | :-------------: | :---: | :----------------------------------------------------------: | 26 | | efficientdet-d0 | 25.9% | [download](https://drive.google.com/open?id=1UgQp9wqtc1O_EabU9O6NWNG6B8imYmv_) | 27 | 28 | **QQ-group: 607724770(Torch交流群)** 29 | 30 | ## Acknowledgements 31 | - The EfficientNet code are borrowed from the [A PyTorch implementation of EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch) 32 | - The code of RetinaNet are borrowed from the [Pytorch implementation of RetinaNet object detection.](https://github.com/yhenon/pytorch-retinanet) 33 | -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuda/nms_kernel.h" 13 | 14 | 15 | extern THCState *state; 16 | 17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) { 18 | // boxes has to be sorted 19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous"); 20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous"); 21 | // Number of ROIs 22 | int boxes_num = THCudaTensor_size(state, boxes, 0); 23 | int boxes_dim = THCudaTensor_size(state, boxes, 1); 24 | 25 | float* boxes_flat = THCudaTensor_data(state, boxes); 26 | 27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks); 29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask); 30 | 31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh); 32 | 33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks); 34 | THLongTensor_copyCuda(state, mask_cpu, mask); 35 | THCudaLongTensor_free(state, mask); 36 | 37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu); 38 | 39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks); 40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu); 41 | THLongTensor_fill(remv_cpu, 0); 42 | 43 | long * keep_flat = THLongTensor_data(keep); 44 | long num_to_keep = 0; 45 | 46 | int i, j; 47 | for (i = 0; i < boxes_num; i++) { 48 | int nblock = i / threadsPerBlock; 49 | int inblock = i % threadsPerBlock; 50 | 51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) { 52 | keep_flat[num_to_keep++] = i; 53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks; 54 | for (j = nblock; j < col_blocks; j++) { 55 | remv_cpu_flat[j] |= p[j]; 56 | } 57 | } 58 | } 59 | 60 | long * num_out_flat = THLongTensor_data(num_out); 61 | * num_out_flat = num_to_keep; 62 | 63 | THLongTensor_free(mask_cpu); 64 | THLongTensor_free(remv_cpu); 65 | 66 | return 1; 67 | } 68 | -------------------------------------------------------------------------------- /model/efficientdet.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: huashuoshuo 3 | # Data: 2019/12/17 10:53 4 | 5 | 6 | import torch 7 | import numpy as np 8 | import torch.nn as nn 9 | from .BiFPN import BiFPN 10 | # from .RetinaHead import RetinaHead 11 | 12 | # class ConvBlock(nn.Module): 13 | # def __init__(self): 14 | # super().__init__() 15 | class ConvBlock(nn.Module): 16 | """ 17 | 18 | """ 19 | def __init__(self, inp, oup, k_size, stride=1, padding=0): 20 | super().__init__() 21 | # Conv2d = get_same_padding_conv2d 22 | self.conv = nn.Conv2d(in_channels=inp, out_channels=oup, kernel_size=k_size, stride=stride, padding=padding, bias=False) 23 | self.norm = nn.BatchNorm2d(num_features=oup) 24 | self.act = nn.ReLU(inplace=True) 25 | def forward(self, x): 26 | x = self.norm(self.conv(x)) 27 | return self.act(x) 28 | 29 | from model import EfficientNet 30 | # from .RetinaHead import RetinaHead 31 | class EfficientDet(nn.Module): 32 | """ 33 | 34 | """ 35 | def __init__(self, args): 36 | super().__init__() 37 | 38 | self.inp = 64 39 | self.oup = 64 40 | self.bifpn_repeat = 2 41 | print(args.backbone) 42 | self.backbone = EfficientNet.from_pretrained(args) 43 | # self.backbone.get_list_features() 44 | self.tail = nn.ModuleList([ConvBlock(320, self.oup, 3, 2, 1), ConvBlock(self.oup, self.oup, 3, 2, 1)]) 45 | self.channel_same = self.change_channel(self.backbone.get_list_feature()[-3:]) 46 | self.BiFPN_first = BiFPN(oup=self.oup, first=True) 47 | self.BiFPN = nn.ModuleList() 48 | for i in range(self.bifpn_repeat-1): 49 | self.BiFPN.append(BiFPN(oup=self.oup, first=False)) 50 | 51 | def forward(self, inputs): 52 | features_in = self.extra(inputs) 53 | features_out = self.BiFPN_first(features_in) 54 | for i, bifpn in enumerate(self.BiFPN): 55 | features_out = bifpn(features_out) 56 | return features_out 57 | 58 | 59 | def extra(self, img): 60 | x = self.backbone(img)[-3:] 61 | # before_fpn = self.channel_same(x[-5:]) 62 | # print(x[-1].shape) 63 | # print(self.tail) 64 | # tail = [tail_conv(x[-1]) for i, tail_conv in enumerate(self.tail)] 65 | for i, tail_conv in enumerate(self.tail): 66 | x.append(tail_conv(x[-1])) 67 | 68 | 69 | before_fpn = [ 70 | conv(x[i]) 71 | for i, conv in enumerate(self.channel_same)] 72 | 73 | before_fpn.extend(x[-2:]) 74 | 75 | return before_fpn 76 | 77 | def change_channel(self, channel): 78 | convs = nn.ModuleList() 79 | for i in range(len(channel)): 80 | conv = ConvBlock(channel[i], self.oup, k_size=1, stride=1, padding=0) 81 | convs.append(conv) 82 | return convs -------------------------------------------------------------------------------- /lib/nms/src/nms.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) { 5 | // boxes has to be sorted 6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous"); 7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous"); 8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous"); 9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous"); 10 | // Number of ROIs 11 | long boxes_num = THFloatTensor_size(boxes, 0); 12 | long boxes_dim = THFloatTensor_size(boxes, 1); 13 | 14 | long * keep_out_flat = THLongTensor_data(keep_out); 15 | float * boxes_flat = THFloatTensor_data(boxes); 16 | long * order_flat = THLongTensor_data(order); 17 | float * areas_flat = THFloatTensor_data(areas); 18 | 19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num); 20 | THByteTensor_fill(suppressed, 0); 21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed); 22 | 23 | // nominal indices 24 | int i, j; 25 | // sorted indices 26 | int _i, _j; 27 | // temp variables for box i's (the box currently under consideration) 28 | float ix1, iy1, ix2, iy2, iarea; 29 | // variables for computing overlap with box j (lower scoring box) 30 | float xx1, yy1, xx2, yy2; 31 | float w, h; 32 | float inter, ovr; 33 | 34 | long num_to_keep = 0; 35 | for (_i=0; _i < boxes_num; ++_i) { 36 | i = order_flat[_i]; 37 | if (suppressed_flat[i] == 1) { 38 | continue; 39 | } 40 | keep_out_flat[num_to_keep++] = i; 41 | ix1 = boxes_flat[i * boxes_dim]; 42 | iy1 = boxes_flat[i * boxes_dim + 1]; 43 | ix2 = boxes_flat[i * boxes_dim + 2]; 44 | iy2 = boxes_flat[i * boxes_dim + 3]; 45 | iarea = areas_flat[i]; 46 | for (_j = _i + 1; _j < boxes_num; ++_j) { 47 | j = order_flat[_j]; 48 | if (suppressed_flat[j] == 1) { 49 | continue; 50 | } 51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]); 52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]); 53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]); 54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]); 55 | w = fmaxf(0.0, xx2 - xx1 + 1); 56 | h = fmaxf(0.0, yy2 - yy1 + 1); 57 | inter = w * h; 58 | ovr = inter / (iarea + areas_flat[j] - inter); 59 | if (ovr >= nms_overlap_thresh) { 60 | suppressed_flat[j] = 1; 61 | } 62 | } 63 | } 64 | 65 | long *num_out_flat = THLongTensor_data(num_out); 66 | *num_out_flat = num_to_keep; 67 | THByteTensor_free(suppressed); 68 | return 1; 69 | } -------------------------------------------------------------------------------- /coco_eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from pycocotools.coco import COCO 4 | from pycocotools.cocoeval import COCOeval 5 | 6 | import numpy as np 7 | import json 8 | import os 9 | 10 | import torch 11 | 12 | def evaluate_coco(dataset, model, threshold=0.05): 13 | 14 | model.eval() 15 | 16 | with torch.no_grad(): 17 | 18 | # start collecting results 19 | results = [] 20 | image_ids = [] 21 | 22 | for index in range(len(dataset)-4500): 23 | data = dataset[index] 24 | # scale = data['scale'] 25 | scale1 = data['scale1'] 26 | scale2 = data['scale2'] 27 | 28 | # run network 29 | scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0)) 30 | scores = scores.cpu() 31 | labels = labels.cpu() 32 | boxes = boxes.cpu() 33 | 34 | # correct boxes for image scale 35 | # boxes /= scale 36 | boxes[:, 0] /= scale2 37 | boxes[:, 2] /= scale2 38 | boxes[:, 1] /= scale1 39 | boxes[:, 3] /= scale1 40 | 41 | if boxes.shape[0] > 0: 42 | # change to (x, y, w, h) (MS COCO standard) 43 | boxes[:, 2] -= boxes[:, 0] 44 | boxes[:, 3] -= boxes[:, 1] 45 | 46 | # compute predicted labels and scores 47 | #for box, score, label in zip(boxes[0], scores[0], labels[0]): 48 | for box_id in range(boxes.shape[0]): 49 | score = float(scores[box_id]) 50 | label = int(labels[box_id]) 51 | box = boxes[box_id, :] 52 | 53 | # scores are sorted, so we can break 54 | if score < threshold: 55 | break 56 | 57 | # append detection for each positively labeled class 58 | image_result = { 59 | 'image_id' : dataset.image_ids[index], 60 | 'category_id' : dataset.label_to_coco_label(label), 61 | 'score' : float(score), 62 | 'bbox' : box.tolist(), 63 | } 64 | 65 | # append detection to results 66 | results.append(image_result) 67 | 68 | # append image to list of processed images 69 | image_ids.append(dataset.image_ids[index]) 70 | 71 | # print progress 72 | print('{}/{}'.format(index, len(dataset)), end='\r') 73 | 74 | if not len(results): 75 | return 76 | 77 | # write output 78 | json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4) 79 | 80 | # load results in COCO evaluation tool 81 | coco_true = dataset.coco 82 | coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name)) 83 | 84 | # run COCO evaluation 85 | coco_eval = COCOeval(coco_true, coco_pred, 'bbox') 86 | coco_eval.params.imgIds = image_ids 87 | coco_eval.evaluate() 88 | coco_eval.accumulate() 89 | mAP = coco_eval.summarize() 90 | 91 | model.train() 92 | 93 | return mAP 94 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nms_kernel.h" 15 | 16 | __device__ inline float devIoU(float const * const a, float const * const b) { 17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 5]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 5 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 42 | block_boxes[threadIdx.x * 5 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 44 | block_boxes[threadIdx.x * 5 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 46 | block_boxes[threadIdx.x * 5 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 48 | block_boxes[threadIdx.x * 5 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 50 | } 51 | __syncthreads(); 52 | 53 | if (threadIdx.x < row_size) { 54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 55 | const float *cur_box = dev_boxes + cur_box_idx * 5; 56 | int i = 0; 57 | unsigned long long t = 0; 58 | int start = 0; 59 | if (row_start == col_start) { 60 | start = threadIdx.x + 1; 61 | } 62 | for (i = start; i < col_size; i++) { 63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | 73 | void _nms(int boxes_num, float * boxes_dev, 74 | unsigned long long * mask_dev, float nms_overlap_thresh) { 75 | 76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 77 | DIVUP(boxes_num, threadsPerBlock)); 78 | dim3 threads(threadsPerBlock); 79 | nms_kernel<<>>(boxes_num, 80 | nms_overlap_thresh, 81 | boxes_dev, 82 | mask_dev); 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: huashuoshuo 3 | # Data: 12/26/19 2:12 PM 4 | import torch 5 | import torch.nn as nn 6 | from model.util import Filter_boxes 7 | import os 8 | import argparse 9 | from RetinaHead import RetinaHead 10 | import skimage.io 11 | import skimage 12 | import skimage.transform 13 | import numpy as np 14 | import cv2 as cv2 15 | import matplotlib.pyplot as plt 16 | import time 17 | from model.util import num2name 18 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 19 | 20 | def main(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('--img_path', type=str, default='/home/huashuoshuo/bishe/imges/6.jpg') 23 | parser.add_argument('--weight_path', type=str, default='./weights/retinanet_15.pth') 24 | parser.add_argument('--backbone', type=str, default='efficientnet-b0') 25 | parser.add_argument('--backbone_pretrained', type=bool, default=False) 26 | parser.add_argument('--threshold', type=float, default=0.35) 27 | 28 | parser = parser.parse_args() 29 | with torch.no_grad(): 30 | efficientdet = RetinaHead(parser, is_demo=True) 31 | # efficientdet = torch.nn.DataParallel(efficientdet).cuda() 32 | efficientdet = efficientdet.cuda() 33 | state_dict = torch.load(parser.weight_path) 34 | efficientdet.load_state_dict(state_dict) 35 | 36 | # img read 37 | img = skimage.io.imread(parser.img_path) 38 | img_input, scale1, scale2= preprocessing(img) 39 | efficientdet.eval() 40 | img_input = img_input.cuda() 41 | time_start = time.time() 42 | # for i in range(1000): 43 | boxes, classification, scores = efficientdet(img_input) 44 | boxes, scores, labels= Filter_boxes(parser)([boxes, classification, scores]) 45 | 46 | time_stop = time.time() 47 | print('time:', time_stop-time_start) 48 | # scores = scores.cpu().numpy() 49 | # labels = labels.cpu().numpy() 50 | # boxes = boxes.cpu().numpy() 51 | 52 | # print(boxes) 53 | # print(np.shape(img)) 54 | text_thickness = 1 55 | thickness = 2 56 | scale = 0.4 57 | line_type = 8 58 | for i in range(np.shape(boxes)[0]): 59 | box = boxes[i].cpu().numpy() 60 | score = scores[i].cpu().numpy() 61 | for j in range(np.shape(box)[0]): 62 | p1 = (int(box[j][0]/scale2), int(box[j][1]/scale1)) 63 | p2 = (int(box[j][2]/scale2), int(box[j][3]/scale1)) 64 | cv2.rectangle(img, p1, p2, (0, 0, 255), 2) 65 | s = '%s/%.1f%%' % (num2name[labels[i]+1], score[j] * 100) 66 | text_size, baseline = cv2.getTextSize(s, cv2.FONT_HERSHEY_SIMPLEX, scale, text_thickness) 67 | 68 | if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1): 69 | continue 70 | # p1 = (p1[0] - text_size[1], p1[1]) 71 | 72 | cv2.rectangle(img, (p1[0], p1[1]), 73 | (p1[0] + text_size[0], p1[1] + text_size[1]), (0, 0, 255), -1) 74 | 75 | cv2.putText(img, s, (p1[0], p1[1] + 2*baseline), cv2.FONT_HERSHEY_SIMPLEX, scale, (255, 255, 255), 76 | text_thickness, line_type) 77 | plt.imshow(img) 78 | plt.show() 79 | # print(scores, labels) 80 | 81 | 82 | 83 | return 84 | 85 | def preprocessing(img): 86 | 87 | img = img.astype(np.float32) / 255.0 88 | # normalize 89 | mean = np.array([[[0.485, 0.456, 0.406]]]) 90 | std = np.array([[[0.229, 0.224, 0.225]]]) 91 | img = (img - mean) / std 92 | # resize 93 | rows, cols, cns = np.shape(img) 94 | scale1 = 512 / rows 95 | scale2 = 512 / cols 96 | img_input = skimage.transform.resize(img, (512, 512)) 97 | img_input = torch.from_numpy(img_input) 98 | img_input = img_input.unsqueeze(0) 99 | img_input = img_input.permute(0, 3, 1, 2).float() 100 | return img_input, scale1, scale2 101 | 102 | 103 | def box_filter(scores, labels, boxes): 104 | scores = scores.cpu() 105 | labels = labels.cpu() 106 | boxes = boxes.cpu() 107 | 108 | return 109 | 110 | 111 | if __name__=='__main__': 112 | main() 113 | -------------------------------------------------------------------------------- /model/anchors.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: huashuoshuo 3 | # Data: 2019/12/19 18:58 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | class Anchors(nn.Module): 11 | def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None): 12 | super(Anchors, self).__init__() 13 | 14 | if pyramid_levels is None: 15 | self.pyramid_levels = [3, 4, 5, 6, 7] 16 | if strides is None: 17 | self.strides = [2 ** x for x in self.pyramid_levels] 18 | if sizes is None: 19 | self.sizes = [2 ** (x + 2) for x in self.pyramid_levels] 20 | if ratios is None: 21 | self.ratios = np.array([0.5, 1, 2]) 22 | if scales is None: 23 | self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) 24 | 25 | def forward(self, image): 26 | 27 | image_shape = image.shape[2:] 28 | image_shape = np.array(image_shape) 29 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels] 30 | 31 | # compute anchors over all pyramid levels 32 | all_anchors = np.zeros((0, 4)).astype(np.float32) 33 | 34 | for idx, p in enumerate(self.pyramid_levels): 35 | anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales) 36 | shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors) 37 | all_anchors = np.append(all_anchors, shifted_anchors, axis=0) 38 | 39 | all_anchors = np.expand_dims(all_anchors, axis=0) 40 | 41 | return torch.from_numpy(all_anchors.astype(np.float32)).cuda() 42 | 43 | 44 | def generate_anchors(base_size=16, ratios=None, scales=None): 45 | """ 46 | Generate anchor (reference) windows by enumerating aspect ratios X 47 | scales w.r.t. a reference window. 48 | """ 49 | 50 | if ratios is None: 51 | ratios = np.array([0.5, 1, 2]) 52 | 53 | if scales is None: 54 | scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) 55 | 56 | num_anchors = len(ratios) * len(scales) 57 | 58 | # initialize output anchors 59 | anchors = np.zeros((num_anchors, 4)) 60 | 61 | # scale base_size 62 | anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T 63 | 64 | # compute areas of anchors 65 | areas = anchors[:, 2] * anchors[:, 3] 66 | 67 | # correct for ratios 68 | anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales))) 69 | anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales)) 70 | 71 | # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2) 72 | anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T 73 | anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T 74 | 75 | return anchors 76 | 77 | 78 | def compute_shape(image_shape, pyramid_levels): 79 | """Compute shapes based on pyramid levels. 80 | 81 | :param image_shape: 82 | :param pyramid_levels: 83 | :return: 84 | """ 85 | image_shape = np.array(image_shape[:2]) 86 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels] 87 | return image_shapes 88 | 89 | 90 | def anchors_for_shape( 91 | image_shape, 92 | pyramid_levels=None, 93 | ratios=None, 94 | scales=None, 95 | strides=None, 96 | sizes=None, 97 | shapes_callback=None, 98 | ): 99 | image_shapes = compute_shape(image_shape, pyramid_levels) 100 | 101 | # compute anchors over all pyramid levels 102 | all_anchors = np.zeros((0, 4)) 103 | for idx, p in enumerate(pyramid_levels): 104 | anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales) 105 | shifted_anchors = shift(image_shapes[idx], strides[idx], anchors) 106 | all_anchors = np.append(all_anchors, shifted_anchors, axis=0) 107 | 108 | return all_anchors 109 | 110 | 111 | def shift(shape, stride, anchors): 112 | shift_x = (np.arange(0, shape[1]) + 0.5) * stride 113 | shift_y = (np.arange(0, shape[0]) + 0.5) * stride 114 | 115 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 116 | 117 | shifts = np.vstack(( 118 | shift_x.ravel(), shift_y.ravel(), 119 | shift_x.ravel(), shift_y.ravel() 120 | )).transpose() 121 | 122 | # add A anchors (1, A, 4) to 123 | # cell K shifts (K, 1, 4) to get 124 | # shift anchors (K, A, 4) 125 | # reshape to (K*A, 4) shifted anchors 126 | A = anchors.shape[0] 127 | K = shifts.shape[0] 128 | all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) 129 | all_anchors = all_anchors.reshape((K * A, 4)) 130 | 131 | return all_anchors -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: huashuoshuo 3 | # Data: 2019/12/19 14:57 4 | 5 | import os 6 | import torch 7 | import numpy as np 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from dataset.dataloader import CocoDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer 11 | from torch.utils.data import Dataset, DataLoader 12 | from torchvision import transforms 13 | # from model.efficientdet import EfficientDet 14 | from model.RetinaHead import RetinaHead 15 | import coco_eval 16 | import argparse 17 | from tensorboardX import SummaryWriter 18 | import cv2 as cv2 19 | import matplotlib.pyplot as plt 20 | 21 | # writer = SummaryWriter('log') 22 | 23 | os.environ['CUDA_VISIBLE_DEVICES']='0, 1, 2, 3' 24 | def main(arg=None): 25 | parser = argparse.ArgumentParser() 26 | 27 | parser.add_argument('--coco_path', type=str, default='/home/hoo/Dataset/COCO') 28 | parser.add_argument('--depth', type=int, default=3) 29 | parser.add_argument('--epoches', type=int, default=50) 30 | parser.add_argument('--phi', type=int, default=0) 31 | parser.add_argument('--backbone', type=str, default='efficientnet-b0') 32 | parser.add_argument('--backbone_pretrained', type=bool, default=True) 33 | parser.add_argument('--EfficientDet_pretrained', type=bool, default=False) 34 | parser.add_argument('--pretrained', type=str, default='./weights/retinanet_1.pth') 35 | parser.add_argument('--batch_size', type=int, default=24) 36 | 37 | parser = parser.parse_args(arg) 38 | dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) 39 | # print(dataset_train.num_classes()) 40 | dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) 41 | 42 | sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) 43 | dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) 44 | 45 | 46 | sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) 47 | dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) 48 | 49 | # Create the Model 50 | 51 | efficientdet = RetinaHead(parser) 52 | 53 | 54 | 55 | efficientdet = torch.nn.DataParallel(efficientdet).cuda() 56 | if parser.EfficientDet_pretrained: 57 | state_dict = torch.load(parser.pretrained) 58 | # print(state_dict) 59 | efficientdet.module.load_state_dict(state_dict) 60 | 61 | efficientdet.training = True 62 | 63 | optimizer = optim.Adam(efficientdet.parameters(), lr=1e-3) 64 | # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) 65 | scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 5, 7, 9, 11, 13, 15, 17, 19], gamma=0.5) 66 | 67 | for epoch_num in range(parser.epoches): 68 | efficientdet.train() 69 | 70 | epoch_loss = [] 71 | 72 | for iter_num, data in enumerate(dataloader_train): 73 | break 74 | # try: 75 | # print(data) 76 | optimizer.zero_grad() 77 | # print(np.shape(data['annot'])) 78 | classification_loss, regression_loss = efficientdet([data['img'].cuda().float(), data['annot']]) 79 | classification_loss = classification_loss.mean() 80 | regression_loss = regression_loss.mean() 81 | loss = classification_loss + regression_loss 82 | if bool(loss==0): 83 | continue 84 | loss.backward() 85 | 86 | torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1) 87 | optimizer.step() 88 | epoch_loss.append(float(loss)) 89 | print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss))) 90 | 91 | if iter_num % 200 == 199: 92 | niter = epoch_num * len(dataloader_train) + iter_num 93 | # print(loss) 94 | writer.add_scalar('Train/Loss', loss, niter) 95 | writer.add_scalar('Train/Reg_Loss', regression_loss, niter) 96 | writer.add_scalar('Train/Cls_Loss', classification_loss, niter) 97 | 98 | 99 | del classification_loss 100 | del regression_loss 101 | # except Exception as e: 102 | # print(e) 103 | # continue 104 | # if iter_num == 20: 105 | # break 106 | 107 | # print('Evaluating dataset') 108 | mAP = coco_eval.evaluate_coco(dataset_val, efficientdet) 109 | # writer.add_scalar('Test/mAP', mAP, epoch_num) 110 | print('Save Model') 111 | # torch.save(efficientdet.module.state_dict(), './weights/retinanet_{}.pth'.format(epoch_num)) 112 | # scheduler.step(np.mean(epoch_loss)) 113 | scheduler.step(epoch=epoch_num) 114 | # writer.close() 115 | 116 | 117 | if __name__ == '__main__': 118 | main() -------------------------------------------------------------------------------- /model/RetinaHead.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: huashuoshuo 3 | # Data: 2019/12/18 16:37 4 | 5 | import torch 6 | import torch.nn as nn 7 | from model.BiFPN import ConvBlock 8 | import model.losses as losses 9 | from model.efficientdet import EfficientDet 10 | from pycocotools.coco import COCO as COCO 11 | from model.anchors import Anchors 12 | # from lib.nms.pth_nms import pth_nms 13 | import torchvision.ops as ops 14 | from model.util import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes, Filter_boxes 15 | def nms(bbox, score, thresh): 16 | # bbox, score = dets 17 | return ops.nms(boxes=bbox, scores=score, iou_threshold=thresh) 18 | # return pth_nms(dets, thresh) 19 | 20 | 21 | class Reg(nn.Module): 22 | """ 23 | 24 | """ 25 | def __init__(self, inp, oup, depth, num_anchor): 26 | super().__init__() 27 | self.inp = inp 28 | self.oup = oup 29 | self.D = depth 30 | self.reg = nn.ModuleList() 31 | self.num_anchors = num_anchor 32 | 33 | for i in range(self.D): 34 | self.reg.append(ConvBlock(inp=self.inp, oup=self.oup, k_size=3, stride=1, padding=1)) 35 | # self.retina_cls = nn.Conv2d(self.oup, self.num_anchors * self.num_class, 3, padding=1) 36 | self.retina_reg = nn.Conv2d(self.oup, self.num_anchors * 4, 3, padding=1) 37 | def forward(self, x): 38 | reg = x 39 | for conv in self.reg: 40 | reg = conv(reg) 41 | 42 | reg = self.retina_reg(reg) 43 | 44 | reg = reg.permute(0, 2, 3, 1) 45 | return reg.contiguous().view(reg.shape[0], -1, 4) 46 | 47 | class Cls(nn.Module): 48 | """ 49 | 50 | """ 51 | def __init__(self, inp, oup, depth, num_anchor, num_class): 52 | super().__init__() 53 | self.inp = inp 54 | self.oup = oup 55 | self.D = depth 56 | self.cls = nn.ModuleList() 57 | self.num_anchors = num_anchor 58 | self.num_class = num_class 59 | for i in range(self.D): 60 | self.cls.append(ConvBlock(inp=self.inp, oup=self.oup, k_size=3, stride=1, padding=1)) 61 | self.retina_cls = nn.Conv2d(self.oup, self.num_anchors * self.num_class, 3, padding=1) 62 | self.act = nn.Sigmoid() 63 | def forward(self, x): 64 | cls = x 65 | for conv in self.cls: 66 | cls = conv(cls) 67 | cls = self.retina_cls(cls) 68 | cls = self.act(cls) 69 | 70 | cls = cls.permute(0, 2, 3, 1) 71 | 72 | batch_size, width, height, channel = cls.shape 73 | 74 | out = cls.view(batch_size, width, height, self.num_anchors, self.num_class) 75 | return out.contiguous().view(cls.shape[0], -1, self.num_class) 76 | 77 | 78 | class RetinaHead(nn.Module): 79 | """ 80 | 81 | """ 82 | def __init__(self, parser, num_classes=80, num_anchor=9, is_demo=False): 83 | super().__init__() 84 | depth = 3 85 | inp = oup = 64 86 | 87 | self.regression = Reg(inp, oup, depth-1, num_anchor) 88 | self.classification = Cls(inp, oup, depth-1, num_anchor, num_classes) 89 | self.FocalLoss = losses.FocalLoss() 90 | self.anchors = Anchors() 91 | self.EfficientDet = EfficientDet(parser) 92 | self.regressBoxes = BBoxTransform() 93 | self.is_demo = is_demo 94 | self.clipBoxes = ClipBoxes() 95 | def forward(self, inputs): 96 | if self.training: 97 | img_batch, annotations = inputs 98 | else: 99 | img_batch = inputs 100 | 101 | features = self.EfficientDet(img_batch) 102 | regression = torch.cat([self.regression(feature) for feature in features], dim=1) 103 | classification = torch.cat([self.classification(feature) for feature in features], dim=1) 104 | anchors = self.anchors(img_batch) 105 | 106 | # self.FocalLoss(classification, regression, anchors, annotations) 107 | if self.training: 108 | return self.FocalLoss(classification, regression, anchors, annotations) 109 | else: 110 | transformed_anchors = self.regressBoxes(anchors, regression) 111 | transformed_anchors = self.clipBoxes(transformed_anchors, img_batch) 112 | 113 | scores = torch.max(classification, dim=2, keepdim=True)[0] 114 | 115 | if self.is_demo: 116 | return transformed_anchors, classification, scores 117 | 118 | scores_over_thresh = (scores>0.01)[0, :, 0] 119 | 120 | if scores_over_thresh.sum() == 0: 121 | # no boxes to NMS, just return 122 | return [torch.zeros(0).cuda(), torch.zeros(0).cuda(), torch.zeros(0, 4).cuda()] 123 | 124 | classification = classification[:, scores_over_thresh, :] 125 | transformed_anchors = transformed_anchors[:, scores_over_thresh, :] 126 | scores = scores[:, scores_over_thresh, :] 127 | # print(transformed_anchors.shape, scores.shape) 128 | 129 | # anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5) 130 | # print(transformed_anchors[0, :, :]) 131 | anchors_nms_idx = nms(transformed_anchors[0, :, :], scores[0, :, 0], 0.45) 132 | nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1) 133 | 134 | return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]] 135 | 136 | 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 34 | 35 | 36 | 37 | 38 | 57 | 58 | 59 | 78 | 79 | 80 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 1576477752047 114 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | file://$PROJECT_DIR$/model/model.py 125 | 198 126 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /model/losses.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: huashuoshuo 3 | # Data: 2019/12/19 15:05 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | def calc_iou(a, b): 10 | 11 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) 12 | iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0]) 13 | ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1]) 14 | 15 | iw = torch.clamp(iw, min=0) 16 | ih = torch.clamp(ih, min=0) 17 | 18 | ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih 19 | 20 | ua = torch.clamp(ua, min=1e-8) 21 | 22 | intersection = iw * ih 23 | 24 | IoU = intersection / ua 25 | 26 | return IoU 27 | 28 | class FocalLoss(nn.Module): 29 | #def __init__(self): 30 | 31 | def forward(self, classifications, regressions, anchors, annotations): 32 | alpha = 0.25 33 | gamma = 2.0 34 | batch_size = classifications.shape[0] 35 | classification_losses = [] 36 | regression_losses = [] 37 | 38 | anchor = anchors[0, :, :] 39 | 40 | anchor_widths = anchor[:, 2] - anchor[:, 0] 41 | anchor_heights = anchor[:, 3] - anchor[:, 1] 42 | anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths 43 | anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights 44 | 45 | for j in range(batch_size): 46 | 47 | classification = classifications[j, :, :] 48 | regression = regressions[j, :, :] 49 | 50 | bbox_annotation = annotations[j, :, :] 51 | bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] 52 | 53 | if bbox_annotation.shape[0] == 0: 54 | regression_losses.append(torch.tensor(0).float().to(anchors.device)) 55 | classification_losses.append(torch.tensor(0).float().to(anchors.device)) 56 | 57 | continue 58 | 59 | classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) 60 | 61 | IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations 62 | 63 | IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 64 | 65 | #import pdb 66 | #pdb.set_trace() 67 | 68 | # compute the loss for classification 69 | targets = torch.ones(classification.shape) * -1 70 | targets = targets.to(anchors.device) 71 | 72 | targets[torch.lt(IoU_max, 0.4), :] = 0 73 | 74 | positive_indices = torch.ge(IoU_max, 0.5) 75 | 76 | num_positive_anchors = positive_indices.sum() 77 | 78 | assigned_annotations = bbox_annotation[IoU_argmax, :] 79 | 80 | targets[positive_indices, :] = 0 81 | targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 82 | 83 | alpha_factor = torch.ones(targets.shape) * alpha 84 | alpha_factor = alpha_factor.to(anchors.device) 85 | alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) 86 | focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) 87 | focal_weight = alpha_factor * torch.pow(focal_weight, gamma) 88 | 89 | bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) 90 | 91 | # cls_loss = focal_weight * torch.pow(bce, gamma) 92 | cls_loss = focal_weight * bce 93 | 94 | cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).to(anchors.device)) 95 | 96 | classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0)) 97 | 98 | # compute the loss for regression 99 | 100 | if positive_indices.sum() > 0: 101 | assigned_annotations = assigned_annotations[positive_indices, :] 102 | 103 | anchor_widths_pi = anchor_widths[positive_indices] 104 | anchor_heights_pi = anchor_heights[positive_indices] 105 | anchor_ctr_x_pi = anchor_ctr_x[positive_indices] 106 | anchor_ctr_y_pi = anchor_ctr_y[positive_indices] 107 | 108 | gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] 109 | gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] 110 | gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths 111 | gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights 112 | 113 | # clip widths to 1 114 | gt_widths = torch.clamp(gt_widths, min=1) 115 | gt_heights = torch.clamp(gt_heights, min=1) 116 | 117 | targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi 118 | targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi 119 | targets_dw = torch.log(gt_widths / anchor_widths_pi) 120 | targets_dh = torch.log(gt_heights / anchor_heights_pi) 121 | 122 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh)) 123 | targets = targets.t() 124 | 125 | targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).to(anchors.device) 126 | 127 | 128 | negative_indices = ~positive_indices 129 | 130 | regression_diff = torch.abs(targets - regression[positive_indices, :]) 131 | 132 | regression_loss = torch.where( 133 | torch.le(regression_diff, 1.0 / 9.0), 134 | 0.5 * 9.0 * torch.pow(regression_diff, 2), 135 | regression_diff - 0.5 / 9.0 136 | ) 137 | regression_losses.append(regression_loss.mean()) 138 | else: 139 | regression_losses.append(torch.tensor(0).float().to(anchors.device)) 140 | 141 | return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True) -------------------------------------------------------------------------------- /model/BiFPN.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Author: huashuoshuo 3 | # Data: 2019/12/17 14:36 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.functional as F 8 | from .utils import ( 9 | round_filters, 10 | round_repeats, 11 | drop_connect, 12 | get_same_padding_conv2d, 13 | get_model_params, 14 | efficientnet_params, 15 | load_pretrained_weights, 16 | Swish, 17 | MemoryEfficientSwish, 18 | ) 19 | 20 | class ConvBlock(nn.Module): 21 | """ 22 | 23 | """ 24 | def __init__(self, inp, oup, k_size, stride=1, padding=0, group=1): 25 | super().__init__() 26 | # Conv2d = get_same_padding_conv2d 27 | self.conv = nn.Conv2d(in_channels=inp, out_channels=oup, kernel_size=k_size, stride=stride, padding=padding, bias=False, groups=group).cuda() 28 | self.norm = nn.BatchNorm2d(num_features=oup).cuda() 29 | self.act = nn.ReLU(inplace=True) 30 | 31 | def forward(self, x): 32 | x = self.norm(self.conv(x)) 33 | # print(self.conv) 34 | x = self.conv(x) 35 | return self.act(x) 36 | 37 | 38 | class BiFPN(nn.Module): 39 | """ 40 | 41 | """ 42 | def __init__(self,oup, first=True): 43 | super().__init__() 44 | # self.features_in = features_in 45 | self.oup = oup 46 | # self.dw_conv = ConvBlock(oup, oup, k_size=3, stride=1, padding=1, group=oup) 47 | # self.pw_conv = ConvBlock(oup, oup, k_size=1, stride=1, padding=0) 48 | 49 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 50 | self.first = first 51 | self.conv_gen() 52 | self.w_gen() 53 | def forward(self, features_in): 54 | # self.tail(x) 55 | # P3_in, P4_in, P5_in, P6_in, P7_in = features_in 56 | 57 | features_out = self.top_down(features_in) 58 | return features_out 59 | 60 | def conv_gen(self): 61 | # P3_in, P4_in, P5_in, P6_in, P7_in = features_in 62 | if not self.first: 63 | self.P3_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0) 64 | self.P4_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0) 65 | self.P5_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0) 66 | self.P6_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0) 67 | self.P7_in_conv = ConvBlock(self.oup, self.oup, k_size=1, stride=1, padding=0) 68 | 69 | # upsample 70 | self.P6_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup) 71 | self.P5_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup) 72 | self.P4_td_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup) 73 | self.P3_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup) 74 | 75 | # downsample 76 | self.P4_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup) 77 | self.P5_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup) 78 | self.P6_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup) 79 | self.P7_out_conv = ConvBlock(self.oup, self.oup, k_size=3, stride=1, padding=1, group=self.oup) 80 | 81 | def w_gen(self): 82 | self.P6_td_add = wAdd(2) 83 | self.P5_td_add = wAdd(2) 84 | self.P4_td_add = wAdd(2) 85 | self.P3_out_add = wAdd(2) 86 | self.P4_out_add = wAdd(3) 87 | self.P5_out_add = wAdd(3) 88 | self.P6_out_add = wAdd(3) 89 | self.P7_out_add = wAdd(2) 90 | 91 | def top_down_no_w(self, features_in): 92 | P3_in, P4_in, P5_in, P6_in, P7_in = features_in 93 | if not self.first: 94 | P3_in = self.P3_in_conv(P3_in) 95 | P4_in = self.P4_in_conv(P4_in) 96 | P5_in = self.P5_in_conv(P5_in) 97 | P6_in = self.P6_in_conv(P6_in) 98 | P7_in = self.P7_in_conv(P7_in) 99 | 100 | # upsample 101 | P7_U = self.Resize()(P7_in) 102 | P6_td = P7_U + P6_in 103 | P6_td = self.P6_td_conv(P6_td) 104 | P6_U = self.Resize()(P6_td) 105 | P5_td = P6_U + P5_in 106 | P5_td = self.P5_td_conv(P5_td) 107 | P5_U = self.Resize()(P5_td) 108 | P4_td = P5_U + P4_in 109 | P4_td = self.P4_td_conv(P4_td) 110 | P4_U = self.Resize()(P4_td) 111 | P3_out = P4_U + P3_in 112 | P3_out = self.P3_out_conv(P3_out) 113 | 114 | # downsample 115 | P3_D = self.pool(P3_out) 116 | P4_out = P3_D + P4_td + P4_in 117 | P4_out = self.P4_out_conv(P4_out) 118 | P4_D = self.pool(P4_out) 119 | P5_out = P4_D + P5_td + P5_in 120 | P5_out = self.P5_out_conv(P5_out) 121 | P5_D = self.pool(P5_out) 122 | P6_out = P5_D + P6_td + P6_in 123 | P6_out = self.P6_out_conv(P6_out) 124 | P6_D = self.pool(P6_out) 125 | P7_out = P6_D + P7_in 126 | P7_out = self.P7_out_conv(P7_out) 127 | return [P3_out, P4_out, P5_out, P6_out, P7_out] 128 | 129 | def top_down(self, features_in): 130 | P3_in, P4_in, P5_in, P6_in, P7_in = features_in 131 | if not self.first: 132 | P3_in = self.P3_in_conv(P3_in) 133 | P4_in = self.P4_in_conv(P4_in) 134 | P5_in = self.P5_in_conv(P5_in) 135 | P6_in = self.P6_in_conv(P6_in) 136 | P7_in = self.P7_in_conv(P7_in) 137 | 138 | # upsample 139 | P7_U = self.Resize()(P7_in) 140 | P6_td = self.P6_td_add([P6_in, P7_U]) 141 | P6_td = self.P6_td_conv(P6_td) 142 | P6_U = self.Resize()(P6_td) 143 | P5_td = self.P5_td_add([P5_in, P6_U]) 144 | P5_td = self.P5_td_conv(P5_td) 145 | P5_U = self.Resize()(P5_td) 146 | P4_td = self.P4_td_add([P4_in, P5_U]) 147 | P4_td = self.P4_td_conv(P4_td) 148 | P4_U = self.Resize()(P4_td) 149 | P3_out = self.P3_out_add([P3_in, P4_U]) 150 | P3_out = self.P3_out_conv(P3_out) 151 | 152 | # downsample 153 | P3_D = self.pool(P3_out) 154 | P4_out = self.P4_out_add([P3_D, P4_td, P4_in]) 155 | P4_out = self.P4_out_conv(P4_out) 156 | P4_D = self.pool(P4_out) 157 | P5_out = self.P5_out_add([P4_D, P5_td, P5_in]) 158 | P5_out = self.P5_out_conv(P5_out) 159 | P5_D = self.pool(P5_out) 160 | P6_out = self.P6_out_add([P5_D, P6_td, P6_in]) 161 | P6_out = self.P6_out_conv(P6_out) 162 | P6_D = self.pool(P6_out) 163 | P7_out = self.P7_out_add([P6_D, P7_in]) 164 | P7_out = self.P7_out_conv(P7_out) 165 | 166 | return [P3_out, P4_out, P5_out, P6_out, P7_out] 167 | 168 | 169 | 170 | def Resize(self, scale=2, mode='nearest'): 171 | upsample = nn.Upsample(scale_factor=scale, mode=mode) 172 | return upsample 173 | 174 | # def get_weight(self): 175 | 176 | 177 | class wAdd(nn.Module): 178 | """ 179 | 180 | """ 181 | def __init__(self, num_in): 182 | super().__init__() 183 | self.epsilon = 1e-4 184 | self.w = nn.Parameter(torch.Tensor(num_in).fill_(1 / num_in)) 185 | 186 | def forward(self, inputs): 187 | # len(inputs) 188 | num_in = len(inputs) 189 | # w = nn.Parameter(torch.Tensor(num_in).fill_(1 / num_in)) 190 | w = self.w.cuda() 191 | # x = [w[i] * inputs[i] for i in range(num_in)] 192 | x = 0 193 | # print(w[0]) 194 | for i in range(num_in): 195 | x += w[i] * inputs[i] 196 | x /= (torch.sum(w) + self.epsilon) 197 | # x = x.cuda() 198 | return x 199 | # x = torch.sum(x) 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /model/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import torchvision.ops as ops 5 | 6 | 7 | def conv3x3(in_planes, out_planes, stride=1): 8 | """3x3 convolution with padding""" 9 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 10 | padding=1, bias=False) 11 | 12 | class BasicBlock(nn.Module): 13 | expansion = 1 14 | 15 | def __init__(self, inplanes, planes, stride=1, downsample=None): 16 | super(BasicBlock, self).__init__() 17 | self.conv1 = conv3x3(inplanes, planes, stride) 18 | self.bn1 = nn.BatchNorm2d(planes) 19 | self.relu = nn.ReLU(inplace=True) 20 | self.conv2 = conv3x3(planes, planes) 21 | self.bn2 = nn.BatchNorm2d(planes) 22 | self.downsample = downsample 23 | self.stride = stride 24 | 25 | def forward(self, x): 26 | residual = x 27 | 28 | out = self.conv1(x) 29 | out = self.bn1(out) 30 | out = self.relu(out) 31 | 32 | out = self.conv2(out) 33 | out = self.bn2(out) 34 | 35 | if self.downsample is not None: 36 | residual = self.downsample(x) 37 | 38 | out += residual 39 | out = self.relu(out) 40 | 41 | return out 42 | 43 | 44 | class Bottleneck(nn.Module): 45 | expansion = 4 46 | 47 | def __init__(self, inplanes, planes, stride=1, downsample=None): 48 | super(Bottleneck, self).__init__() 49 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 50 | self.bn1 = nn.BatchNorm2d(planes) 51 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 52 | padding=1, bias=False) 53 | self.bn2 = nn.BatchNorm2d(planes) 54 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 55 | self.bn3 = nn.BatchNorm2d(planes * 4) 56 | self.relu = nn.ReLU(inplace=True) 57 | self.downsample = downsample 58 | self.stride = stride 59 | 60 | def forward(self, x): 61 | residual = x 62 | 63 | out = self.conv1(x) 64 | out = self.bn1(out) 65 | out = self.relu(out) 66 | 67 | out = self.conv2(out) 68 | out = self.bn2(out) 69 | out = self.relu(out) 70 | 71 | out = self.conv3(out) 72 | out = self.bn3(out) 73 | 74 | if self.downsample is not None: 75 | residual = self.downsample(x) 76 | 77 | out += residual 78 | out = self.relu(out) 79 | 80 | return out 81 | 82 | class BBoxTransform(nn.Module): 83 | 84 | def __init__(self, mean=None, std=None): 85 | super(BBoxTransform, self).__init__() 86 | if mean is None: 87 | self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda() 88 | else: 89 | self.mean = mean 90 | if std is None: 91 | self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda() 92 | else: 93 | self.std = std 94 | 95 | def forward(self, boxes, deltas): 96 | 97 | widths = boxes[:, :, 2] - boxes[:, :, 0] 98 | heights = boxes[:, :, 3] - boxes[:, :, 1] 99 | ctr_x = boxes[:, :, 0] + 0.5 * widths 100 | ctr_y = boxes[:, :, 1] + 0.5 * heights 101 | 102 | dx = deltas[:, :, 0] * self.std[0] + self.mean[0] 103 | dy = deltas[:, :, 1] * self.std[1] + self.mean[1] 104 | dw = deltas[:, :, 2] * self.std[2] + self.mean[2] 105 | dh = deltas[:, :, 3] * self.std[3] + self.mean[3] 106 | 107 | pred_ctr_x = ctr_x + dx * widths 108 | pred_ctr_y = ctr_y + dy * heights 109 | pred_w = torch.exp(dw) * widths 110 | pred_h = torch.exp(dh) * heights 111 | 112 | pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w 113 | pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h 114 | pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w 115 | pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h 116 | 117 | pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2) 118 | 119 | return pred_boxes 120 | 121 | 122 | class ClipBoxes(nn.Module): 123 | 124 | def __init__(self, width=None, height=None): 125 | super(ClipBoxes, self).__init__() 126 | 127 | def forward(self, boxes, img): 128 | 129 | batch_size, num_channels, height, width = img.shape 130 | 131 | boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0) 132 | boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0) 133 | 134 | boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width) 135 | boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height) 136 | 137 | return boxes 138 | 139 | class Filter_boxes(nn.Module): 140 | """ 141 | 142 | """ 143 | def __init__(self, args): 144 | super().__init__() 145 | self.threshold = args.threshold 146 | 147 | def forward(self, inputs): 148 | transformed_anchors, classification, scores = inputs 149 | 150 | boxes_dict, scores_dict = self.select(transformed_anchors, classification) 151 | box = [] 152 | score =[] 153 | cls = [] 154 | for i in range(80): 155 | anchors_nms_idx = ops.nms(boxes=boxes_dict[i], scores=scores_dict[i], iou_threshold=0.5) 156 | if len(scores_dict[i])>0: 157 | box.append(boxes_dict[i][anchors_nms_idx, :]) 158 | score.append(scores_dict[i][anchors_nms_idx]) 159 | cls.append(i) 160 | 161 | 162 | return box, score, cls 163 | 164 | def select(self, transformed_anchors, classification): 165 | boxes = {} 166 | scores = {} 167 | for cls in range(80): 168 | cls_score = classification[0, :, cls] 169 | select_mask = cls_score > self.threshold 170 | boxes[cls] = transformed_anchors[0, select_mask, :] 171 | scores[cls] = cls_score[select_mask] 172 | return boxes, scores 173 | 174 | num2name = {0: u'__background__', 175 | 1: u'person', 176 | 2: u'bicycle', 177 | 3: u'car', 178 | 4: u'motorcycle', 179 | 5: u'airplane', 180 | 6: u'bus', 181 | 7: u'train', 182 | 8: u'truck', 183 | 9: u'boat', 184 | 10: u'traffic light', 185 | 11: u'fire hydrant', 186 | 12: u'stop sign', 187 | 13: u'parking meter', 188 | 14: u'bench', 189 | 15: u'bird', 190 | 16: u'cat', 191 | 17: u'dog', 192 | 18: u'horse', 193 | 19: u'sheep', 194 | 20: u'cow', 195 | 21: u'elephant', 196 | 22: u'bear', 197 | 23: u'zebra', 198 | 24: u'giraffe', 199 | 25: u'backpack', 200 | 26: u'umbrella', 201 | 27: u'handbag', 202 | 28: u'tie', 203 | 29: u'suitcase', 204 | 30: u'frisbee', 205 | 31: u'skis', 206 | 32: u'snowboard', 207 | 33: u'sports ball', 208 | 34: u'kite', 209 | 35: u'baseball bat', 210 | 36: u'baseball glove', 211 | 37: u'skateboard', 212 | 38: u'surfboard', 213 | 39: u'tennis racket', 214 | 40: u'bottle', 215 | 41: u'wine glass', 216 | 42: u'cup', 217 | 43: u'fork', 218 | 44: u'knife', 219 | 45: u'spoon', 220 | 46: u'bowl', 221 | 47: u'banana', 222 | 48: u'apple', 223 | 49: u'sandwich', 224 | 50: u'orange', 225 | 51: u'broccoli', 226 | 52: u'carrot', 227 | 53: u'hot dog', 228 | 54: u'pizza', 229 | 55: u'donut', 230 | 56: u'cake', 231 | 57: u'chair', 232 | 58: u'couch', 233 | 59: u'potted plant', 234 | 60: u'bed', 235 | 61: u'dining table', 236 | 62: u'toilet', 237 | 63: u'tv', 238 | 64: u'laptop', 239 | 65: u'mouse', 240 | 66: u'remote', 241 | 67: u'keyboard', 242 | 68: u'cell phone', 243 | 69: u'microwave', 244 | 70: u'oven', 245 | 71: u'toaster', 246 | 72: u'sink', 247 | 73: u'refrigerator', 248 | 74: u'book', 249 | 75: u'clock', 250 | 76: u'vase', 251 | 77: u'scissors', 252 | 78: u'teddy bear', 253 | 79: u'hair drier', 254 | 80: u'toothbrush'} -------------------------------------------------------------------------------- /model/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from .utils import ( 6 | round_filters, 7 | round_repeats, 8 | drop_connect, 9 | get_same_padding_conv2d, 10 | get_model_params, 11 | efficientnet_params, 12 | load_pretrained_weights, 13 | Swish, 14 | MemoryEfficientSwish, 15 | ) 16 | 17 | class MBConvBlock(nn.Module): 18 | """ 19 | Mobile Inverted Residual Bottleneck Block 20 | 21 | Args: 22 | block_args (namedtuple): BlockArgs, see above 23 | global_params (namedtuple): GlobalParam, see above 24 | 25 | Attributes: 26 | has_se (bool): Whether the block contains a Squeeze and Excitation layer. 27 | """ 28 | 29 | def __init__(self, block_args, global_params): 30 | super().__init__() 31 | self._block_args = block_args 32 | self._bn_mom = 1 - global_params.batch_norm_momentum 33 | self._bn_eps = global_params.batch_norm_epsilon 34 | self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1) 35 | self.id_skip = block_args.id_skip # skip connection and drop connect 36 | 37 | # Get static or dynamic convolution depending on image size 38 | Conv2d = get_same_padding_conv2d(image_size=global_params.image_size) 39 | 40 | # Expansion phase 41 | inp = self._block_args.input_filters # number of input channels 42 | oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels 43 | if self._block_args.expand_ratio != 1: 44 | self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False) 45 | self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) 46 | 47 | # Depthwise convolution phase 48 | k = self._block_args.kernel_size 49 | s = self._block_args.stride 50 | self._depthwise_conv = Conv2d( 51 | in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise 52 | kernel_size=k, stride=s, bias=False) 53 | self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) 54 | 55 | # Squeeze and Excitation layer, if desired 56 | if self.has_se: 57 | num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio)) 58 | self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1) 59 | self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1) 60 | 61 | # Output phase 62 | final_oup = self._block_args.output_filters 63 | self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False) 64 | self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps) 65 | self._swish = MemoryEfficientSwish() 66 | 67 | def forward(self, inputs, drop_connect_rate=None): 68 | """ 69 | :param inputs: input tensor 70 | :param drop_connect_rate: drop connect rate (float, between 0 and 1) 71 | :return: output of block 72 | """ 73 | 74 | # Expansion and Depthwise Convolution 75 | x = inputs 76 | if self._block_args.expand_ratio != 1: 77 | x = self._swish(self._bn0(self._expand_conv(inputs))) 78 | x = self._swish(self._bn1(self._depthwise_conv(x))) 79 | 80 | # Squeeze and Excitation 81 | if self.has_se: 82 | x_squeezed = F.adaptive_avg_pool2d(x, 1) 83 | x_squeezed = self._se_expand(self._swish(self._se_reduce(x_squeezed))) 84 | x = torch.sigmoid(x_squeezed) * x 85 | 86 | x = self._bn2(self._project_conv(x)) 87 | 88 | # Skip connection and drop connect 89 | input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters 90 | if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: 91 | if drop_connect_rate: 92 | x = drop_connect(x, p=drop_connect_rate, training=self.training) 93 | x = x + inputs # skip connection 94 | return x 95 | 96 | def set_swish(self, memory_efficient=True): 97 | """Sets swish function as memory efficient (for training) or standard (for export)""" 98 | self._swish = MemoryEfficientSwish() if memory_efficient else Swish() 99 | 100 | 101 | class EfficientNet(nn.Module): 102 | """ 103 | An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods 104 | 105 | Args: 106 | blocks_args (list): A list of BlockArgs to construct blocks 107 | global_params (namedtuple): A set of GlobalParams shared between blocks 108 | 109 | Example: 110 | model = EfficientNet.from_pretrained('efficientnet-b0') 111 | 112 | """ 113 | 114 | def __init__(self, blocks_args=None, global_params=None): 115 | super().__init__() 116 | assert isinstance(blocks_args, list), 'blocks_args should be a list' 117 | assert len(blocks_args) > 0, 'block args must be greater than 0' 118 | self._global_params = global_params 119 | self._blocks_args = blocks_args 120 | 121 | # Get static or dynamic convolution depending on image size 122 | Conv2d = get_same_padding_conv2d(image_size=global_params.image_size) 123 | 124 | # Batch norm parameters 125 | bn_mom = 1 - self._global_params.batch_norm_momentum 126 | bn_eps = self._global_params.batch_norm_epsilon 127 | 128 | # Stem 129 | in_channels = 3 # rgb 130 | out_channels = round_filters(32, self._global_params) # number of output channels 131 | self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) 132 | self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) 133 | 134 | # Build blocks 135 | self._blocks = nn.ModuleList([]) 136 | for block_args in self._blocks_args: 137 | 138 | # Update block input and output filters based on depth multiplier. 139 | block_args = block_args._replace( 140 | input_filters=round_filters(block_args.input_filters, self._global_params), 141 | output_filters=round_filters(block_args.output_filters, self._global_params), 142 | num_repeat=round_repeats(block_args.num_repeat, self._global_params) 143 | ) 144 | 145 | # The first block needs to take care of stride and filter size increase. 146 | self._blocks.append(MBConvBlock(block_args, self._global_params)) 147 | if block_args.num_repeat > 1: 148 | block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) 149 | for _ in range(block_args.num_repeat - 1): 150 | self._blocks.append(MBConvBlock(block_args, self._global_params)) 151 | 152 | # Head 153 | in_channels = block_args.output_filters # output of final block 154 | out_channels = round_filters(1280, self._global_params) 155 | self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 156 | self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) 157 | 158 | # Final linear layer 159 | self._avg_pooling = nn.AdaptiveAvgPool2d(1) 160 | self._dropout = nn.Dropout(self._global_params.dropout_rate) 161 | self._fc = nn.Linear(out_channels, self._global_params.num_classes) 162 | self._swish = MemoryEfficientSwish() 163 | 164 | def set_swish(self, memory_efficient=True): 165 | """Sets swish function as memory efficient (for training) or standard (for export)""" 166 | self._swish = MemoryEfficientSwish() if memory_efficient else Swish() 167 | for block in self._blocks: 168 | block.set_swish(memory_efficient) 169 | 170 | 171 | def extract_features(self, inputs): 172 | """ Returns output of the final convolution layer """ 173 | 174 | # Stem 175 | x = self._swish(self._bn0(self._conv_stem(inputs))) 176 | x_before = x 177 | features = [] 178 | block_index = 0 179 | repeat = 0 180 | # Blocks 181 | for idx, block in enumerate(self._blocks): 182 | drop_connect_rate = self._global_params.drop_connect_rate 183 | if drop_connect_rate: 184 | drop_connect_rate *= float(idx) / len(self._blocks) 185 | x = block(x, drop_connect_rate=drop_connect_rate) 186 | if x_before.shape[2] != x.shape[2]: 187 | features.append(x_before) 188 | x_before = x 189 | features.append(x) 190 | # repeat += 1 191 | # if(repeat == self._blocks_args[block_index].num_repeat): 192 | # repeat = 0 193 | # block_index += 1 194 | # features.append(x) 195 | 196 | # Head 197 | # x = self._swish(self._bn1(self._conv_head(x))) 198 | 199 | return features 200 | 201 | def forward(self, inputs): 202 | """ Calls extract_features to extract features, applies final linear layer, and returns logits. """ 203 | # bs = inputs.size(0) 204 | # Convolution layers 205 | x = self.extract_features(inputs) 206 | 207 | # Pooling and final linear layer 208 | # x = self._avg_pooling(x) 209 | # x = x.view(bs, -1) 210 | # x = self._dropout(x) 211 | # x = self._fc(x) 212 | return x 213 | 214 | @classmethod 215 | def from_name(cls, model_name, override_params=None): 216 | cls._check_model_name_is_valid(model_name) 217 | blocks_args, global_params = get_model_params(model_name, override_params) 218 | return cls(blocks_args, global_params) 219 | 220 | @classmethod 221 | def from_pretrained(cls, args, num_classes=1000, in_channels = 3): 222 | print(args) 223 | model_name = args.backbone 224 | print('backbone', model_name) 225 | model = cls.from_name(model_name, override_params={'num_classes': num_classes}) 226 | if args.backbone_pretrained: 227 | load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000)) 228 | 229 | if in_channels != 3: 230 | Conv2d = get_same_padding_conv2d(image_size = model._global_params.image_size) 231 | out_channels = round_filters(32, model._global_params) 232 | model._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) 233 | return model 234 | 235 | @classmethod 236 | def from_pretrained(cls, args, num_classes=1000): 237 | # print(model_name) 238 | model_name = args.backbone 239 | model = cls.from_name(model_name, override_params={'num_classes': num_classes}) 240 | if args.backbone_pretrained: 241 | load_pretrained_weights(model, model_name, load_fc=False) 242 | 243 | return model 244 | 245 | @classmethod 246 | def get_image_size(cls, model_name): 247 | cls._check_model_name_is_valid(model_name) 248 | _, _, res, _ = efficientnet_params(model_name) 249 | return res 250 | 251 | @classmethod 252 | def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False): 253 | """ Validates model name. None that pretrained weights are only available for 254 | the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """ 255 | num_models = 4 if also_need_pretrained_weights else 8 256 | valid_models = ['efficientnet-b'+str(i) for i in range(num_models)] 257 | if model_name not in valid_models: 258 | raise ValueError('model_name should be one of: ' + ', '.join(valid_models)) 259 | 260 | def get_list_feature(self): 261 | list_feature = [80, 192, 320] 262 | # s_before = self._blocks_args[0] 263 | # for idx in range(len(self._blocks_args)-1): 264 | # print(self._blocks_args[idx].stride) 265 | # if self._blocks_args[idx].stride == self._blocks_args[idx+1].stride: 266 | # list_feature.append(self._blocks_args[idx].output_filters) 267 | 268 | return list_feature 269 | -------------------------------------------------------------------------------- /model/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains helper functions for building the model and for loading model parameters. 3 | These helper functions are built to mirror those in the official TensorFlow implementation. 4 | """ 5 | 6 | import re 7 | import math 8 | import collections 9 | from functools import partial 10 | import torch 11 | from torch import nn 12 | from torch.nn import functional as F 13 | from torch.utils import model_zoo 14 | 15 | ######################################################################## 16 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ############### 17 | ######################################################################## 18 | 19 | 20 | # Parameters for the entire model (stem, all blocks, and head) 21 | GlobalParams = collections.namedtuple('GlobalParams', [ 22 | 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 23 | 'num_classes', 'width_coefficient', 'depth_coefficient', 24 | 'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size']) 25 | 26 | # Parameters for an individual model block 27 | BlockArgs = collections.namedtuple('BlockArgs', [ 28 | 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', 29 | 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) 30 | 31 | # Change namedtuple defaults 32 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) 33 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) 34 | 35 | 36 | class SwishImplementation(torch.autograd.Function): 37 | @staticmethod 38 | def forward(ctx, i): 39 | result = i * torch.sigmoid(i) 40 | ctx.save_for_backward(i) 41 | return result 42 | 43 | @staticmethod 44 | def backward(ctx, grad_output): 45 | i = ctx.saved_variables[0] 46 | sigmoid_i = torch.sigmoid(i) 47 | return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) 48 | 49 | 50 | class MemoryEfficientSwish(nn.Module): 51 | def forward(self, x): 52 | return SwishImplementation.apply(x) 53 | 54 | class Swish(nn.Module): 55 | def forward(self, x): 56 | return x * torch.sigmoid(x) 57 | 58 | 59 | def round_filters(filters, global_params): 60 | """ Calculate and round number of filters based on depth multiplier. """ 61 | multiplier = global_params.width_coefficient 62 | if not multiplier: 63 | return filters 64 | divisor = global_params.depth_divisor 65 | min_depth = global_params.min_depth 66 | filters *= multiplier 67 | min_depth = min_depth or divisor 68 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) 69 | if new_filters < 0.9 * filters: # prevent rounding by more than 10% 70 | new_filters += divisor 71 | return int(new_filters) 72 | 73 | 74 | def round_repeats(repeats, global_params): 75 | """ Round number of filters based on depth multiplier. """ 76 | multiplier = global_params.depth_coefficient 77 | if not multiplier: 78 | return repeats 79 | return int(math.ceil(multiplier * repeats)) 80 | 81 | 82 | def drop_connect(inputs, p, training): 83 | """ Drop connect. """ 84 | if not training: return inputs 85 | batch_size = inputs.shape[0] 86 | keep_prob = 1 - p 87 | random_tensor = keep_prob 88 | random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device) 89 | binary_tensor = torch.floor(random_tensor) 90 | output = inputs / keep_prob * binary_tensor 91 | return output 92 | 93 | 94 | def get_same_padding_conv2d(image_size=None): 95 | """ Chooses static padding if you have specified an image size, and dynamic padding otherwise. 96 | Static padding is necessary for ONNX exporting of models. """ 97 | if image_size is None: 98 | return Conv2dDynamicSamePadding 99 | else: 100 | return partial(Conv2dStaticSamePadding, image_size=image_size) 101 | 102 | 103 | class Conv2dDynamicSamePadding(nn.Conv2d): 104 | """ 2D Convolutions like TensorFlow, for a dynamic image size """ 105 | 106 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): 107 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 108 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 109 | 110 | def forward(self, x): 111 | ih, iw = x.size()[-2:] 112 | kh, kw = self.weight.size()[-2:] 113 | sh, sw = self.stride 114 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 115 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 116 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 117 | if pad_h > 0 or pad_w > 0: 118 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) 119 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 120 | 121 | 122 | class Conv2dStaticSamePadding(nn.Conv2d): 123 | """ 2D Convolutions like TensorFlow, for a fixed image size""" 124 | 125 | def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs): 126 | super().__init__(in_channels, out_channels, kernel_size, **kwargs) 127 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 128 | 129 | # Calculate padding based on image size and save it 130 | assert image_size is not None 131 | ih, iw = image_size if type(image_size) == list else [image_size, image_size] 132 | kh, kw = self.weight.size()[-2:] 133 | sh, sw = self.stride 134 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 135 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 136 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 137 | if pad_h > 0 or pad_w > 0: 138 | self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)) 139 | else: 140 | self.static_padding = Identity() 141 | 142 | def forward(self, x): 143 | x = self.static_padding(x) 144 | x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 145 | return x 146 | 147 | 148 | class Identity(nn.Module): 149 | def __init__(self, ): 150 | super(Identity, self).__init__() 151 | 152 | def forward(self, input): 153 | return input 154 | 155 | 156 | ######################################################################## 157 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ############## 158 | ######################################################################## 159 | 160 | 161 | def efficientnet_params(model_name): 162 | """ Map EfficientNet model name to parameter coefficients. """ 163 | params_dict = { 164 | # Coefficients: width,depth,res,dropout 165 | 'efficientnet-b0': (1.0, 1.0, 224, 0.2), 166 | 'efficientnet-b1': (1.0, 1.1, 240, 0.2), 167 | 'efficientnet-b2': (1.1, 1.2, 260, 0.3), 168 | 'efficientnet-b3': (1.2, 1.4, 300, 0.3), 169 | 'efficientnet-b4': (1.4, 1.8, 380, 0.4), 170 | 'efficientnet-b5': (1.6, 2.2, 456, 0.4), 171 | 'efficientnet-b6': (1.8, 2.6, 528, 0.5), 172 | 'efficientnet-b7': (2.0, 3.1, 600, 0.5), 173 | } 174 | return params_dict[model_name] 175 | 176 | 177 | class BlockDecoder(object): 178 | """ Block Decoder for readability, straight from the official TensorFlow repository """ 179 | 180 | @staticmethod 181 | def _decode_block_string(block_string): 182 | """ Gets a block through a string notation of arguments. """ 183 | assert isinstance(block_string, str) 184 | 185 | ops = block_string.split('_') 186 | options = {} 187 | for op in ops: 188 | splits = re.split(r'(\d.*)', op) 189 | if len(splits) >= 2: 190 | key, value = splits[:2] 191 | options[key] = value 192 | 193 | # Check stride 194 | assert (('s' in options and len(options['s']) == 1) or 195 | (len(options['s']) == 2 and options['s'][0] == options['s'][1])) 196 | 197 | return BlockArgs( 198 | kernel_size=int(options['k']), 199 | num_repeat=int(options['r']), 200 | input_filters=int(options['i']), 201 | output_filters=int(options['o']), 202 | expand_ratio=int(options['e']), 203 | id_skip=('noskip' not in block_string), 204 | se_ratio=float(options['se']) if 'se' in options else None, 205 | stride=[int(options['s'][0])]) 206 | 207 | @staticmethod 208 | def _encode_block_string(block): 209 | """Encodes a block to a string.""" 210 | args = [ 211 | 'r%d' % block.num_repeat, 212 | 'k%d' % block.kernel_size, 213 | 's%d%d' % (block.strides[0], block.strides[1]), 214 | 'e%s' % block.expand_ratio, 215 | 'i%d' % block.input_filters, 216 | 'o%d' % block.output_filters 217 | ] 218 | if 0 < block.se_ratio <= 1: 219 | args.append('se%s' % block.se_ratio) 220 | if block.id_skip is False: 221 | args.append('noskip') 222 | return '_'.join(args) 223 | 224 | @staticmethod 225 | def decode(string_list): 226 | """ 227 | Decodes a list of string notations to specify blocks inside the network. 228 | 229 | :param string_list: a list of strings, each string is a notation of block 230 | :return: a list of BlockArgs namedtuples of block args 231 | """ 232 | assert isinstance(string_list, list) 233 | blocks_args = [] 234 | for block_string in string_list: 235 | blocks_args.append(BlockDecoder._decode_block_string(block_string)) 236 | return blocks_args 237 | 238 | @staticmethod 239 | def encode(blocks_args): 240 | """ 241 | Encodes a list of BlockArgs to a list of strings. 242 | 243 | :param blocks_args: a list of BlockArgs namedtuples of block args 244 | :return: a list of strings, each string is a notation of block 245 | """ 246 | block_strings = [] 247 | for block in blocks_args: 248 | block_strings.append(BlockDecoder._encode_block_string(block)) 249 | return block_strings 250 | 251 | 252 | def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2, 253 | drop_connect_rate=0.2, image_size=None, num_classes=1000): 254 | """ Creates a efficientnet model. """ 255 | 256 | blocks_args = [ 257 | 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s11_e6_i16_o24_se0.25', 258 | 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', 259 | 'r3_k5_s22_e6_i80_o112_se0.25', 'r4_k5_s11_e6_i112_o192_se0.25', 260 | 'r1_k3_s22_e6_i192_o320_se0.25', 261 | ] 262 | blocks_args = BlockDecoder.decode(blocks_args) 263 | 264 | global_params = GlobalParams( 265 | batch_norm_momentum=0.99, 266 | batch_norm_epsilon=1e-3, 267 | dropout_rate=dropout_rate, 268 | drop_connect_rate=drop_connect_rate, 269 | # data_format='channels_last', # removed, this is always true in PyTorch 270 | num_classes=num_classes, 271 | width_coefficient=width_coefficient, 272 | depth_coefficient=depth_coefficient, 273 | depth_divisor=8, 274 | min_depth=None, 275 | image_size=image_size, 276 | ) 277 | 278 | return blocks_args, global_params 279 | 280 | 281 | def get_model_params(model_name, override_params): 282 | """ Get the block args and global params for a given model """ 283 | if model_name.startswith('efficientnet'): 284 | w, d, s, p = efficientnet_params(model_name) 285 | # note: all models have drop connect rate = 0.2 286 | blocks_args, global_params = efficientnet( 287 | width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s) 288 | else: 289 | raise NotImplementedError('model name is not pre-defined: %s' % model_name) 290 | if override_params: 291 | # ValueError will be raised here if override_params has fields not included in global_params. 292 | global_params = global_params._replace(**override_params) 293 | return blocks_args, global_params 294 | 295 | 296 | url_map = { 297 | 'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth', 298 | 'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth', 299 | 'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth', 300 | 'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth', 301 | 'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth', 302 | 'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth', 303 | 'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth', 304 | 'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth', 305 | } 306 | 307 | 308 | def load_pretrained_weights(model, model_name, load_fc=False): 309 | """ Loads pretrained weights, and downloads if loading for the first time. """ 310 | state_dict = model_zoo.load_url(url_map[model_name]) 311 | # state_dict = torch.load('/home/pre_trained/efficientnet-b0-355c32eb.pth') 312 | if load_fc: 313 | model.load_state_dict(state_dict) 314 | else: 315 | state_dict.pop('_fc.weight') 316 | state_dict.pop('_fc.bias') 317 | res = model.load_state_dict(state_dict, strict=False) 318 | assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights' 319 | print('Loaded pretrained weights for {}'.format(model_name)) 320 | -------------------------------------------------------------------------------- /dataset/dataloader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import sys 3 | import os 4 | import torch 5 | import numpy as np 6 | import random 7 | import csv 8 | import cv2 9 | from torchvision import transforms 10 | from torch.utils.data import Dataset, DataLoader 11 | from torchvision import transforms, utils 12 | from torch.utils.data.sampler import Sampler 13 | 14 | from pycocotools.coco import COCO 15 | 16 | import skimage.io 17 | import skimage.transform 18 | import skimage.color 19 | import skimage 20 | 21 | from PIL import Image 22 | 23 | 24 | class CocoDataset(Dataset): 25 | """Coco dataset.""" 26 | 27 | def __init__(self, root_dir, set_name='train2017', transform=None): 28 | """ 29 | Args: 30 | root_dir (string): COCO directory. 31 | transform (callable, optional): Optional transform to be applied 32 | on a sample. 33 | """ 34 | self.root_dir = root_dir 35 | self.set_name = set_name 36 | self.transform = transform 37 | 38 | self.coco = COCO(os.path.join(self.root_dir, 'annotations_trainval2017', 'annotations', 'instances_' + self.set_name + '.json')) 39 | self.image_ids = self.coco.getImgIds() 40 | 41 | self.load_classes() 42 | 43 | def load_classes(self): 44 | # load class names (name -> label) 45 | categories = self.coco.loadCats(self.coco.getCatIds()) 46 | categories.sort(key=lambda x: x['id']) 47 | 48 | self.classes = {} 49 | self.coco_labels = {} 50 | self.coco_labels_inverse = {} 51 | for c in categories: 52 | self.coco_labels[len(self.classes)] = c['id'] 53 | self.coco_labels_inverse[c['id']] = len(self.classes) 54 | self.classes[c['name']] = len(self.classes) 55 | 56 | # also load the reverse (label -> name) 57 | self.labels = {} 58 | for key, value in self.classes.items(): 59 | self.labels[value] = key 60 | 61 | def __len__(self): 62 | return len(self.image_ids) 63 | 64 | def __getitem__(self, idx): 65 | 66 | img = self.load_image(idx) 67 | annot = self.load_annotations(idx) 68 | sample = {'img': img, 'annot': annot} 69 | if self.transform: 70 | sample = self.transform(sample) 71 | 72 | return sample 73 | 74 | def load_image(self, image_index): 75 | image_info = self.coco.loadImgs(self.image_ids[image_index])[0] 76 | path = os.path.join(self.root_dir, self.set_name, image_info['file_name']) 77 | img = skimage.io.imread(path) 78 | # img = cv2.imread(path) 79 | # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) 80 | if len(img.shape) == 2: 81 | img = skimage.color.gray2rgb(img) 82 | # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) 83 | 84 | return img.astype(np.float32) / 255.0 85 | 86 | def load_annotations(self, image_index): 87 | # get ground truth annotations 88 | annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False) 89 | annotations = np.zeros((0, 5)) 90 | 91 | # some images appear to miss annotations (like image with id 257034) 92 | if len(annotations_ids) == 0: 93 | return annotations 94 | 95 | # parse annotations 96 | coco_annotations = self.coco.loadAnns(annotations_ids) 97 | for idx, a in enumerate(coco_annotations): 98 | 99 | # some annotations have basically no width / height, skip them 100 | if a['bbox'][2] < 1 or a['bbox'][3] < 1: 101 | continue 102 | 103 | annotation = np.zeros((1, 5)) 104 | annotation[0, :4] = a['bbox'] 105 | annotation[0, 4] = self.coco_label_to_label(a['category_id']) 106 | annotations = np.append(annotations, annotation, axis=0) 107 | 108 | # transform from [x, y, w, h] to [x1, y1, x2, y2] 109 | annotations[:, 2] = annotations[:, 0] + annotations[:, 2] 110 | annotations[:, 3] = annotations[:, 1] + annotations[:, 3] 111 | 112 | return annotations 113 | 114 | def coco_label_to_label(self, coco_label): 115 | return self.coco_labels_inverse[coco_label] 116 | 117 | 118 | def label_to_coco_label(self, label): 119 | return self.coco_labels[label] 120 | 121 | def image_aspect_ratio(self, image_index): 122 | image = self.coco.loadImgs(self.image_ids[image_index])[0] 123 | return float(image['width']) / float(image['height']) 124 | 125 | def num_classes(self): 126 | return 80 127 | 128 | 129 | class CSVDataset(Dataset): 130 | """CSV dataset.""" 131 | 132 | def __init__(self, train_file, class_list, transform=None): 133 | """ 134 | Args: 135 | train_file (string): CSV file with training annotations 136 | annotations (string): CSV file with class list 137 | test_file (string, optional): CSV file with testing annotations 138 | """ 139 | self.train_file = train_file 140 | self.class_list = class_list 141 | self.transform = transform 142 | 143 | # parse the provided class file 144 | try: 145 | with self._open_for_csv(self.class_list) as file: 146 | self.classes = self.load_classes(csv.reader(file, delimiter=',')) 147 | except ValueError as e: 148 | raise_from(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)), None) 149 | 150 | self.labels = {} 151 | for key, value in self.classes.items(): 152 | self.labels[value] = key 153 | 154 | # csv with img_path, x1, y1, x2, y2, class_name 155 | try: 156 | with self._open_for_csv(self.train_file) as file: 157 | self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes) 158 | except ValueError as e: 159 | raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)), None) 160 | self.image_names = list(self.image_data.keys()) 161 | 162 | def _parse(self, value, function, fmt): 163 | """ 164 | Parse a string into a value, and format a nice ValueError if it fails. 165 | Returns `function(value)`. 166 | Any `ValueError` raised is catched and a new `ValueError` is raised 167 | with message `fmt.format(e)`, where `e` is the caught `ValueError`. 168 | """ 169 | try: 170 | return function(value) 171 | except ValueError as e: 172 | raise_from(ValueError(fmt.format(e)), None) 173 | 174 | def _open_for_csv(self, path): 175 | """ 176 | Open a file with flags suitable for csv.reader. 177 | This is different for python2 it means with mode 'rb', 178 | for python3 this means 'r' with "universal newlines". 179 | """ 180 | if sys.version_info[0] < 3: 181 | return open(path, 'rb') 182 | else: 183 | return open(path, 'r', newline='') 184 | 185 | 186 | def load_classes(self, csv_reader): 187 | result = {} 188 | 189 | for line, row in enumerate(csv_reader): 190 | line += 1 191 | 192 | try: 193 | class_name, class_id = row 194 | except ValueError: 195 | raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None) 196 | class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line)) 197 | 198 | if class_name in result: 199 | raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name)) 200 | result[class_name] = class_id 201 | return result 202 | 203 | 204 | def __len__(self): 205 | return len(self.image_names) 206 | 207 | def __getitem__(self, idx): 208 | 209 | img = self.load_image(idx) 210 | annot = self.load_annotations(idx) 211 | sample = {'img': img, 'annot': annot} 212 | if self.transform: 213 | sample = self.transform(sample) 214 | 215 | return sample 216 | 217 | def load_image(self, image_index): 218 | img = skimage.io.imread(self.image_names[image_index]) 219 | 220 | if len(img.shape) == 2: 221 | img = skimage.color.gray2rgb(img) 222 | 223 | return img.astype(np.float32)/255.0 224 | 225 | def load_annotations(self, image_index): 226 | # get ground truth annotations 227 | annotation_list = self.image_data[self.image_names[image_index]] 228 | annotations = np.zeros((0, 5)) 229 | 230 | # some images appear to miss annotations (like image with id 257034) 231 | if len(annotation_list) == 0: 232 | return annotations 233 | 234 | # parse annotations 235 | for idx, a in enumerate(annotation_list): 236 | # some annotations have basically no width / height, skip them 237 | x1 = a['x1'] 238 | x2 = a['x2'] 239 | y1 = a['y1'] 240 | y2 = a['y2'] 241 | 242 | if (x2-x1) < 1 or (y2-y1) < 1: 243 | continue 244 | 245 | annotation = np.zeros((1, 5)) 246 | 247 | annotation[0, 0] = x1 248 | annotation[0, 1] = y1 249 | annotation[0, 2] = x2 250 | annotation[0, 3] = y2 251 | 252 | annotation[0, 4] = self.name_to_label(a['class']) 253 | annotations = np.append(annotations, annotation, axis=0) 254 | 255 | return annotations 256 | 257 | def _read_annotations(self, csv_reader, classes): 258 | result = {} 259 | for line, row in enumerate(csv_reader): 260 | line += 1 261 | 262 | try: 263 | img_file, x1, y1, x2, y2, class_name = row[:6] 264 | except ValueError: 265 | raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None) 266 | 267 | if img_file not in result: 268 | result[img_file] = [] 269 | 270 | # If a row contains only an image path, it's an image without annotations. 271 | if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''): 272 | continue 273 | 274 | x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line)) 275 | y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line)) 276 | x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line)) 277 | y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line)) 278 | 279 | # Check that the bounding box is valid. 280 | if x2 <= x1: 281 | raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1)) 282 | if y2 <= y1: 283 | raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1)) 284 | 285 | # check if the current class name is correctly present 286 | if class_name not in classes: 287 | raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes)) 288 | 289 | result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name}) 290 | return result 291 | 292 | def name_to_label(self, name): 293 | return self.classes[name] 294 | 295 | def label_to_name(self, label): 296 | return self.labels[label] 297 | 298 | def num_classes(self): 299 | return max(self.classes.values()) + 1 300 | 301 | def image_aspect_ratio(self, image_index): 302 | image = Image.open(self.image_names[image_index]) 303 | return float(image.width) / float(image.height) 304 | 305 | 306 | def collater(data): 307 | 308 | imgs = [s['img'] for s in data] 309 | annots = [s['annot'] for s in data] 310 | scales1 = [s['scale1'] for s in data] 311 | scales2 = [s['scale2'] for s in data] 312 | 313 | widths = [int(s.shape[0]) for s in imgs] 314 | heights = [int(s.shape[1]) for s in imgs] 315 | batch_size = len(imgs) 316 | 317 | max_width = np.array(widths).max() 318 | max_height = np.array(heights).max() 319 | 320 | padded_imgs = torch.zeros(batch_size, max_width, max_height, 3) 321 | 322 | for i in range(batch_size): 323 | img = imgs[i] 324 | padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img 325 | 326 | max_num_annots = max(annot.shape[0] for annot in annots) 327 | 328 | if max_num_annots > 0: 329 | 330 | annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1 331 | 332 | if max_num_annots > 0: 333 | for idx, annot in enumerate(annots): 334 | #print(annot.shape) 335 | if annot.shape[0] > 0: 336 | annot_padded[idx, :annot.shape[0], :] = annot 337 | else: 338 | annot_padded = torch.ones((len(annots), 1, 5)) * -1 339 | 340 | 341 | padded_imgs = padded_imgs.permute(0, 3, 1, 2) 342 | 343 | return {'img': padded_imgs, 'annot': annot_padded, 'scale1': scales1, 'scale2': scales2} 344 | 345 | class Resizer(object): 346 | """Convert ndarrays in sample to Tensors.""" 347 | 348 | def __call__(self, sample, min_side=512, max_side=512): 349 | image, annots = sample['img'], sample['annot'] 350 | 351 | rows, cols, cns = image.shape 352 | 353 | smallest_side = min(rows, cols) 354 | 355 | # rescale the image so the smallest side is min_side 356 | scale = min_side / smallest_side 357 | scale1 = 512 / rows 358 | scale2 = 512 / cols 359 | # check if the largest side is now greater than max_side, which can happen 360 | # when images have a large aspect ratio 361 | largest_side = max(rows, cols) 362 | 363 | if largest_side * scale > max_side: 364 | scale = max_side / largest_side 365 | 366 | # resize the image with the computed scale 367 | # image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale))))) 368 | image = skimage.transform.resize(image, (512, 512)) 369 | 370 | rows, cols, cns = image.shape 371 | 372 | pad_w = 32 - rows%32 373 | pad_h = 32 - cols%32 374 | 375 | new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32) 376 | new_image[:rows, :cols, :] = image.astype(np.float32) 377 | image = image.astype(np.float32) 378 | # print(np.shape(annots)) 379 | # annots[:, :2] *= scale1 380 | # annots[:, 2:4] *= scale2 381 | 382 | annots[:, 0] *= scale2 383 | annots[:, 2] *= scale2 384 | annots[:, 1] *= scale1 385 | annots[:, 3] *= scale1 386 | 387 | return {'img': torch.from_numpy(image), 'annot': torch.from_numpy(annots), 'scale1': scale1, 'scale2': scale2} 388 | 389 | 390 | class Augmenter(object): 391 | """Convert ndarrays in sample to Tensors.""" 392 | 393 | def __call__(self, sample, flip_x=0.5): 394 | 395 | if np.random.rand() < flip_x: 396 | image, annots = sample['img'], sample['annot'] 397 | image = image[:, ::-1, :] 398 | 399 | rows, cols, channels = image.shape 400 | 401 | x1 = annots[:, 0].copy() 402 | x2 = annots[:, 2].copy() 403 | 404 | x_tmp = x1.copy() 405 | 406 | annots[:, 0] = cols - x2 407 | annots[:, 2] = cols - x_tmp 408 | 409 | sample = {'img': image, 'annot': annots} 410 | 411 | return sample 412 | 413 | 414 | class Normalizer(object): 415 | 416 | def __init__(self): 417 | self.mean = np.array([[[0.485, 0.456, 0.406]]]) 418 | self.std = np.array([[[0.229, 0.224, 0.225]]]) 419 | 420 | def __call__(self, sample): 421 | 422 | image, annots = sample['img'], sample['annot'] 423 | 424 | return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots} 425 | 426 | # class to_tensor(object): 427 | # 428 | # def __call__(self, sample): 429 | # image, annots = sample['img'], sample['annot'] 430 | # return {} 431 | 432 | 433 | 434 | class UnNormalizer(object): 435 | def __init__(self, mean=None, std=None): 436 | if mean == None: 437 | self.mean = [0.485, 0.456, 0.406] 438 | else: 439 | self.mean = mean 440 | if std == None: 441 | self.std = [0.229, 0.224, 0.225] 442 | else: 443 | self.std = std 444 | 445 | def __call__(self, tensor): 446 | """ 447 | Args: 448 | tensor (Tensor): Tensor image of size (C, H, W) to be normalized. 449 | Returns: 450 | Tensor: Normalized image. 451 | """ 452 | for t, m, s in zip(tensor, self.mean, self.std): 453 | t.mul_(s).add_(m) 454 | return tensor 455 | 456 | 457 | class AspectRatioBasedSampler(Sampler): 458 | 459 | def __init__(self, data_source, batch_size, drop_last): 460 | self.data_source = data_source 461 | self.batch_size = batch_size 462 | self.drop_last = drop_last 463 | self.groups = self.group_images() 464 | 465 | def __iter__(self): 466 | random.shuffle(self.groups) 467 | for group in self.groups: 468 | yield group 469 | 470 | def __len__(self): 471 | if self.drop_last: 472 | return len(self.data_source) // self.batch_size 473 | else: 474 | return (len(self.data_source) + self.batch_size - 1) // self.batch_size 475 | 476 | def group_images(self): 477 | # determine the order of the images 478 | order = list(range(len(self.data_source))) 479 | order.sort(key=lambda x: self.data_source.image_aspect_ratio(x)) 480 | 481 | # divide into groups, one group = one batch 482 | return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)] 483 | --------------------------------------------------------------------------------