├── README.md
├── data
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-36.pyc
    │   ├── config.cpython-36.pyc
    │   ├── data_augment.cpython-36.pyc
    │   ├── voc0712.cpython-36.pyc
    │   └── voc_eval.cpython-36.pyc
    ├── config.py
    ├── data_augment.py
    ├── voc0712.py
    └── voc_eval.py
├── demo.py
├── layers
    ├── __init__.py
    ├── functions
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── detection.cpython-36.pyc
    │   │   └── prior_box.cpython-36.pyc
    │   ├── detection.py
    │   └── prior_box.py
    └── modules
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-36.pyc
    │       └── multibox_loss.cpython-36.pyc
    │   └── multibox_loss.py
├── make.sh
├── models
    ├── RFB_Net_vgg-1.py
    ├── RFB_Net_vgg.py
    ├── __pycache__
    │   ├── RFB_Net_vgg.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   └── module.cpython-36.pyc
    ├── module-1.py
    ├── module-3.py
    └── module.py
├── train_RFB.py
├── utils
    ├── box_utils.py
    ├── build.py
    ├── build
    │   ├── temp.linux-aarch64-3.6
    │   │   └── nms
    │   │   │   ├── cpu_nms.o
    │   │   │   ├── gpu_nms.o
    │   │   │   └── nms_kernel.o
    │   └── temp.linux-x86_64-3.6
    │   │   └── nms
    │   │       ├── cpu_nms.o
    │   │       ├── gpu_nms.o
    │   │       └── nms_kernel.o
    ├── nms
    │   ├── cpu_nms.c
    │   ├── cpu_nms.cpython-36m-aarch64-linux-gnu.so
    │   ├── cpu_nms.cpython-36m-x86_64-linux-gnu.so
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.cpp
    │   ├── gpu_nms.cpython-36m-aarch64-linux-gnu.so
    │   ├── gpu_nms.cpython-36m-x86_64-linux-gnu.so
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── py_cpu_nms.py
    ├── nms_wrapper.py
    ├── timer.py
    └── visualize.py
└── weights
    └── epoches_100.pth


/README.md:
--------------------------------------------------------------------------------
 1 | # fast-object-detection-nano
 2 | # 程序在nano上面的安装教程
 3 | ## 1. 首先需要在nano上面配置pytorch
 4 | https://blog.csdn.net/donkey_1993/article/details/102794617
 5 | ## 2. 然后需要编译pytorch的torch2trt使用tensorrt加速
 6 | https://github.com/NVIDIA-AI-IOT/torch2trt
 7 | ## 3. 下载本工程，然后运行make.sh编译工程
 8 | sudo bash make.sh
 9 | ## 4. 修改demo.py里面的视频路径。
10 | sudo python3 demo.py  就可以跑起来了
11 | 
12 | # 训练代码
13 | ## 直接使用的是voc2007的数据集。
14 | sudo python3 train_RFB.py
15 | 
16 | # 感谢下面两位作者。
17 | https://github.com/ruinmessi/RFBNet
18 | https://github.com/songwsx/RFB-Person
19 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | # from .voc import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
2 | from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
3 | from .data_augment import *
4 | from .config import *
5 | 


--------------------------------------------------------------------------------
/data/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/data/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/data/__pycache__/data_augment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/data_augment.cpython-36.pyc


--------------------------------------------------------------------------------
/data/__pycache__/voc0712.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/voc0712.cpython-36.pyc


--------------------------------------------------------------------------------
/data/__pycache__/voc_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/voc_eval.cpython-36.pyc


--------------------------------------------------------------------------------
/data/config.py:
--------------------------------------------------------------------------------
 1 | # config.py
 2 | import os.path
 3 | 
 4 | 
 5 | VOCroot = '/home/common/wangsong/VOC/VOCdevkit'
 6 | 
 7 | #RFB CONFIGS
 8 | VOC_Config = {
 9 |     'feature_maps' : [38, 19, 10, 5, 3, 1],
10 | 
11 |     'min_dim' : 300,
12 | 
13 |     'steps' : [8, 16, 32, 64, 100, 300],
14 | 
15 |     'min_sizes' : [26, 60, 111, 162, 213, 264],
16 | 
17 |     'max_sizes' : [60, 111, 162, 213, 264, 315],
18 | 
19 |     'aspect_ratios' : [[0.4, 1.0, 1.5],
20 |                        [0.4, 1.0, 1.6],
21 |                        [0.5, 1.1, 1.6],
22 |                        [0.5, 1.1, 1.6],
23 |                        [0.5, 1.1, 1.6],
24 |                        [0.7, 1.4]],
25 | 
26 |     'max_ratios' : [0.8, 0.8, 0.8, 0.9, 1, 1],
27 | 
28 |     'variance' : [0.1, 0.2],
29 | 
30 |     'clip' : True,
31 | }
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/data/data_augment.py:
--------------------------------------------------------------------------------
  1 | """Data augmentation functionality. Passed as callable transformations to
  2 | Dataset classes.
  3 | 
  4 | The data augmentation procedures were interpreted from @weiliu89's SSD paper
  5 | http://arxiv.org/abs/1512.02325
  6 | """
  7 | 
  8 | import torch
  9 | from torchvision import transforms
 10 | import cv2
 11 | import numpy as np
 12 | import random
 13 | import math
 14 | from utils.box_utils import matrix_iou
 15 | # import torch_transforms
 16 | 
 17 | def _crop(image, boxes, labels):
 18 |     height, width, _ = image.shape
 19 | 
 20 |     if len(boxes)== 0:
 21 |         return image, boxes, labels
 22 | 
 23 |     while True:
 24 |         mode = random.choice((
 25 |             None,
 26 |             (0.1, None),
 27 |             (0.3, None),
 28 |             (0.5, None),
 29 |             (0.7, None),
 30 |             (0.9, None),
 31 |             (None, None),
 32 |         ))
 33 | 
 34 |         if mode is None:
 35 |             return image, boxes, labels
 36 | 
 37 |         min_iou, max_iou = mode
 38 |         if min_iou is None:
 39 |             min_iou = float('-inf')
 40 |         if max_iou is None:
 41 |             max_iou = float('inf')
 42 | 
 43 |         for _ in range(50):
 44 |             scale = random.uniform(0.3,1.)
 45 |             min_ratio = max(0.5, scale*scale)
 46 |             max_ratio = min(2, 1. / scale / scale)
 47 |             ratio = math.sqrt(random.uniform(min_ratio, max_ratio))
 48 |             w = int(scale * ratio * width)
 49 |             h = int((scale / ratio) * height)
 50 | 
 51 | 
 52 |             l = random.randrange(width - w)
 53 |             t = random.randrange(height - h)
 54 |             roi = np.array((l, t, l + w, t + h))
 55 | 
 56 |             iou = matrix_iou(boxes, roi[np.newaxis])
 57 |             
 58 |             if not (min_iou <= iou.min() and iou.max() <= max_iou):
 59 |                 continue
 60 | 
 61 |             image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
 62 | 
 63 |             centers = (boxes[:, :2] + boxes[:, 2:]) / 2
 64 |             mask = np.logical_and(roi[:2] < centers, centers < roi[2:]) \
 65 |                      .all(axis=1)
 66 |             boxes_t = boxes[mask].copy()
 67 |             labels_t = labels[mask].copy()
 68 |             if len(boxes_t) == 0:
 69 |                 continue
 70 | 
 71 |             boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
 72 |             boxes_t[:, :2] -= roi[:2]
 73 |             boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
 74 |             boxes_t[:, 2:] -= roi[:2]
 75 | 
 76 |             return image_t, boxes_t,labels_t
 77 | 
 78 | 
 79 | def _distort(image):
 80 |     def _convert(image, alpha=1, beta=0):
 81 |         tmp = image.astype(float) * alpha + beta
 82 |         tmp[tmp < 0] = 0
 83 |         tmp[tmp > 255] = 255
 84 |         image[:] = tmp
 85 | 
 86 |     image = image.copy()
 87 | 
 88 |     if random.randrange(2):
 89 |         _convert(image, beta=random.uniform(-32, 32))
 90 | 
 91 |     if random.randrange(2):
 92 |         _convert(image, alpha=random.uniform(0.5, 1.5))
 93 | 
 94 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 95 | 
 96 |     if random.randrange(2):
 97 |         tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
 98 |         tmp %= 180
 99 |         image[:, :, 0] = tmp
100 | 
101 |     if random.randrange(2):
102 |         _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
103 | 
104 |     image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
105 | 
106 |     return image
107 | 
108 | 
109 | def _expand(image, boxes,fill, p):
110 |     if random.random() > p:
111 |         return image, boxes
112 | 
113 |     height, width, depth = image.shape
114 |     for _ in range(50):
115 |         scale = random.uniform(1,4)
116 | 
117 |         min_ratio = max(0.5, 1./scale/scale)
118 |         max_ratio = min(2, scale*scale)
119 |         ratio = math.sqrt(random.uniform(min_ratio, max_ratio))
120 |         ws = scale*ratio
121 |         hs = scale/ratio
122 |         if ws < 1 or hs < 1:
123 |             continue
124 |         w = int(ws * width)
125 |         h = int(hs * height)
126 | 
127 |         left = random.randint(0, w - width)
128 |         top = random.randint(0, h - height)
129 | 
130 |         boxes_t = boxes.copy()
131 |         boxes_t[:, :2] += (left, top)
132 |         boxes_t[:, 2:] += (left, top)
133 | 
134 | 
135 |         expand_image = np.empty(
136 |             (h, w, depth),
137 |             dtype=image.dtype)
138 |         expand_image[:, :] = fill
139 |         expand_image[top:top + height, left:left + width] = image
140 |         image = expand_image
141 | 
142 |         return image, boxes_t
143 | 
144 | 
145 | def _mirror(image, boxes):
146 |     _, width, _ = image.shape
147 |     if random.randrange(2):
148 |         image = image[:, ::-1]
149 |         boxes = boxes.copy()
150 |         boxes[:, 0::2] = width - boxes[:, 2::-2]
151 |     return image, boxes
152 | 
153 | 
154 | def preproc_for_test(image, insize, mean):
155 |     interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
156 |     interp_method = interp_methods[random.randrange(5)]
157 |     image = cv2.resize(image, (insize, insize),interpolation=interp_method)
158 |     image = image.astype(np.float32)
159 |     image -= mean
160 |     return image.transpose(2, 0, 1)
161 | 
162 | 
163 | class preproc(object):
164 | 
165 |     def __init__(self, resize, rgb_means, p):
166 |         self.means = rgb_means
167 |         self.resize = resize
168 |         self.p = p
169 | 
170 |     def __call__(self, image, targets):
171 |         boxes = targets[:,:-1].copy()
172 |         labels = targets[:,-1].copy()
173 |         if len(boxes) == 0:
174 |             #boxes = np.empty((0, 4))
175 |             targets = np.zeros((1,5))
176 |             image = preproc_for_test(image, self.resize, self.means)
177 |             return torch.from_numpy(image), targets
178 | 
179 |         image_o = image.copy()
180 |         targets_o = targets.copy()
181 |         height_o, width_o, _ = image_o.shape
182 |         boxes_o = targets_o[:,:-1]
183 |         labels_o = targets_o[:,-1]
184 |         boxes_o[:, 0::2] /= width_o
185 |         boxes_o[:, 1::2] /= height_o
186 |         labels_o = np.expand_dims(labels_o,1)
187 |         targets_o = np.hstack((boxes_o,labels_o))
188 | 
189 |         image_t, boxes, labels = _crop(image, boxes, labels)
190 |         image_t = _distort(image_t)
191 |         image_t, boxes = _expand(image_t, boxes, self.means, self.p)
192 |         image_t, boxes = _mirror(image_t, boxes)
193 |         #image_t, boxes = _mirror(image, boxes)
194 | 
195 |         height, width, _ = image_t.shape
196 |         image_t = preproc_for_test(image_t, self.resize, self.means)
197 |         boxes = boxes.copy()
198 |         boxes[:, 0::2] /= width
199 |         boxes[:, 1::2] /= height
200 |         b_w = (boxes[:, 2] - boxes[:, 0])*1.
201 |         b_h = (boxes[:, 3] - boxes[:, 1])*1.
202 |         mask_b= np.minimum(b_w, b_h) > 0.01
203 |         boxes_t = boxes[mask_b]
204 |         labels_t = labels[mask_b].copy()
205 | 
206 |         if len(boxes_t)==0:
207 |             image = preproc_for_test(image_o, self.resize, self.means)
208 |             return torch.from_numpy(image),targets_o
209 | 
210 |         labels_t = np.expand_dims(labels_t,1)
211 |         targets_t = np.hstack((boxes_t,labels_t))
212 | 
213 |         return torch.from_numpy(image_t), targets_t
214 | 
215 | 
216 | 
217 | class BaseTransform(object):
218 |     """Defines the transformations that should be applied to test PIL image
219 |         for input into the network
220 | 
221 |     dimension -> tensorize -> color adj
222 | 
223 |     Arguments:
224 |         resize (int): input dimension to SSD
225 |         rgb_means ((int,int,int)): average RGB of the dataset
226 |             (104,117,123)
227 |         swap ((int,int,int)): final order of channels
228 |     Returns:
229 |         transform (transform) : callable transform to be applied to test/val
230 |         data
231 |     """
232 |     def __init__(self, resize, rgb_means, swap=(2, 0, 1)):
233 |         self.means = rgb_means
234 |         self.resize = resize
235 |         self.swap = swap
236 | 
237 |     # assume input is cv2 img for now
238 |     def __call__(self, img):
239 | 
240 |         interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
241 |         interp_method = interp_methods[0]
242 |         img = cv2.resize(np.array(img), (self.resize,
243 |                                          self.resize),interpolation = interp_method).astype(np.float32)
244 |         img -= self.means
245 |         img = img.transpose(self.swap)
246 |         return torch.from_numpy(img)
247 | 


--------------------------------------------------------------------------------
/data/voc0712.py:
--------------------------------------------------------------------------------
  1 | """VOC Dataset Classes
  2 | 
  3 | Original author: Francisco Massa
  4 | https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py
  5 | 
  6 | Updated by: Ellis Brown, Max deGroot
  7 | """
  8 | 
  9 | import os
 10 | import pickle
 11 | import os.path
 12 | import sys
 13 | import torch
 14 | import torch.utils.data as data
 15 | import torchvision.transforms as transforms
 16 | from PIL import Image, ImageDraw, ImageFont
 17 | import cv2
 18 | import numpy as np
 19 | from .voc_eval import voc_eval
 20 | if sys.version_info[0] == 2:
 21 |     import xml.etree.cElementTree as ET
 22 | else:
 23 |     import xml.etree.ElementTree as ET
 24 | 
 25 | 
 26 | VOC_CLASSES = ( '__background__', # always index 0
 27 |     'person')
 28 | 
 29 | # for making bounding boxes pretty
 30 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
 31 |           (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
 32 | 
 33 | 
 34 | 
 35 | 
 36 | class AnnotationTransform(object):
 37 | 
 38 |     """Transforms a VOC annotation into a Tensor of bbox coords and label index
 39 |     Initilized with a dictionary lookup of classnames to indexes
 40 | 
 41 |     Arguments:
 42 |         class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
 43 |             (default: alphabetic indexing of VOC's 20 classes)
 44 |         keep_difficult (bool, optional): keep difficult instances or not
 45 |             (default: False)
 46 |         height (int): height
 47 |         width (int): width
 48 |     """
 49 | 
 50 |     def __init__(self, class_to_ind=None, keep_difficult=True):
 51 |         self.class_to_ind = class_to_ind or dict(
 52 |             zip(VOC_CLASSES, range(len(VOC_CLASSES))))
 53 |         self.keep_difficult = keep_difficult
 54 | 
 55 |     def __call__(self, target):
 56 |         """
 57 |         Arguments:
 58 |             target (annotation) : the target annotation to be made usable
 59 |                 will be an ET.Element
 60 |         Returns:
 61 |             a list containing lists of bounding boxes  [bbox coords, class name]
 62 |         """
 63 |         res = np.empty((0,5))
 64 |         for obj in target.iter('object'):
 65 |             difficult = int(obj.find('difficult').text) == 1
 66 |             if not self.keep_difficult and difficult:
 67 |                 continue
 68 |             name = obj.find('name').text.lower().strip()
 69 |             if name != 'person':
 70 |                 continue
 71 |             bbox = obj.find('bndbox')
 72 | 
 73 |             pts = ['xmin', 'ymin', 'xmax', 'ymax']
 74 |             bndbox = []
 75 |             for i, pt in enumerate(pts):
 76 |                 cur_pt = int(bbox.find(pt).text) - 1
 77 |                 # scale height or width
 78 |                 #cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
 79 |                 bndbox.append(cur_pt)
 80 |             label_idx = self.class_to_ind[name]
 81 |             bndbox.append(label_idx)
 82 |             res = np.vstack((res,bndbox))  # [xmin, ymin, xmax, ymax, label_ind]
 83 |             # img_id = target.find('filename').text[:-4]
 84 | 
 85 |         return res  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
 86 | 
 87 | 
 88 | class VOCDetection(data.Dataset):
 89 | 
 90 |     """VOC Detection Dataset Object
 91 | 
 92 |     input is image, target is annotation
 93 | 
 94 |     Arguments:
 95 |         root (string): filepath to VOCdevkit folder.
 96 |         image_set (string): imageset to use (eg. 'train', 'val', 'test')
 97 |         transform (callable, optional): transformation to perform on the
 98 |             input image
 99 |         target_transform (callable, optional): transformation to perform on the
100 |             target `annotation`
101 |             (eg: take in caption string, return tensor of word indices)
102 |         dataset_name (string, optional): which dataset to load
103 |             (default: 'VOC2007')
104 |     """
105 | 
106 |     def __init__(self, root, image_sets, preproc=None, target_transform=None,
107 |                  dataset_name='VOC0712'):
108 |         self.root = root
109 |         self.image_set = image_sets
110 |         self.preproc = preproc
111 |         self.target_transform = target_transform
112 |         self.name = dataset_name
113 |         self._annopath = os.path.join('%s', 'Annotations', '%s.xml')
114 |         self._imgpath = os.path.join('%s', 'JPEGImages', '%s.jpg')
115 |         self.ids = list()
116 |         for (year, name) in image_sets:
117 |             self._year = year
118 |             rootpath = os.path.join(self.root, 'VOC' + year)
119 |             for line in open(os.path.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
120 |                 img_id, value = line.split()
121 |                 if value != '1':
122 |                     continue
123 |                 self.ids.append((rootpath, img_id))
124 | 
125 |     def __getitem__(self, index):
126 |         img_id = self.ids[index]
127 |         target = ET.parse(self._annopath % img_id).getroot()
128 |         img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
129 |         height, width, _ = img.shape
130 | 
131 |         if self.target_transform is not None:
132 |             target = self.target_transform(target)
133 | 
134 | 
135 |         if self.preproc is not None:
136 |             img, target = self.preproc(img, target)
137 | 
138 | 
139 |         return img, target
140 | 
141 |     def __len__(self):
142 |         return len(self.ids)
143 | 
144 |     def pull_image(self, index):
145 |         '''Returns the original image object at index in PIL form
146 | 
147 |         Note: not using self.__getitem__(), as any transformations passed in
148 |         could mess up this functionality.
149 | 
150 |         Argument:
151 |             index (int): index of img to show
152 |         Return:
153 |             PIL img
154 |         '''
155 |         img_id = self.ids[index]
156 |         return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
157 | 
158 |     def pull_anno(self, index):
159 |         '''Returns the original annotation of image at index
160 | 
161 |         Note: not using self.__getitem__(), as any transformations passed in
162 |         could mess up this functionality.
163 | 
164 |         Argument:
165 |             index (int): index of img to get annotation of
166 |         Return:
167 |             list:  [img_id, [(label, bbox coords),...]]
168 |                 eg: ('001718', [('dog', (96, 13, 438, 332))])
169 |         '''
170 |         img_id = self.ids[index]
171 |         anno = ET.parse(self._annopath % img_id).getroot()
172 |         gt = self.target_transform(anno, 1, 1)
173 |         return img_id[1], gt
174 | 
175 |     def pull_tensor(self, index):
176 |         '''Returns the original image at an index in tensor form
177 | 
178 |         Note: not using self.__getitem__(), as any transformations passed in
179 |         could mess up this functionality.
180 | 
181 |         Argument:
182 |             index (int): index of img to show
183 |         Return:
184 |             tensorized version of img, squeezed
185 |         '''
186 |         to_tensor = transforms.ToTensor()
187 |         return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
188 | 
189 |     def evaluate_detections(self, all_boxes, output_dir=None):
190 |         """
191 |         all_boxes is a list of length number-of-classes.
192 |         Each list element is a list of length number-of-images.
193 |         Each of those list elements is either an empty list []
194 |         or a numpy array of detection.
195 | 
196 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
197 |         """
198 |         self._write_voc_results_file(all_boxes)
199 |         self._do_python_eval(output_dir)
200 | 
201 |     def _get_voc_results_file_template(self):
202 |         filename = 'comp4_det_test' + '_{:s}.txt'
203 |         filedir = os.path.join(
204 |             self.root, 'results', 'VOC' + self._year, 'Main')
205 |         if not os.path.exists(filedir):
206 |             os.makedirs(filedir)
207 |         path = os.path.join(filedir, filename)
208 |         return path
209 | 
210 |     def _write_voc_results_file(self, all_boxes):
211 |         for cls_ind, cls in enumerate(VOC_CLASSES):
212 |             cls_ind = cls_ind 
213 |             if cls == '__background__':
214 |                 continue
215 |             print('Writing {} VOC results file'.format(cls))
216 |             filename = self._get_voc_results_file_template().format(cls)
217 |             with open(filename, 'wt') as f:
218 |                 for im_ind, index in enumerate(self.ids):
219 |                     index = index[1]
220 |                     dets = all_boxes[cls_ind][im_ind]
221 |                     if dets == []:
222 |                         continue
223 |                     for k in range(dets.shape[0]):
224 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
225 |                                 format(index, dets[k, -1],
226 |                                 dets[k, 0] + 1, dets[k, 1] + 1,
227 |                                 dets[k, 2] + 1, dets[k, 3] + 1))
228 | 
229 |     def _do_python_eval(self, output_dir='output'):
230 |         rootpath = os.path.join(self.root, 'VOC' + self._year)
231 |         name = self.image_set[0][1]
232 |         annopath = os.path.join(
233 |                                 rootpath,
234 |                                 'Annotations',
235 |                                 '{:s}.xml')
236 |         imagesetfile = os.path.join(
237 |                                 rootpath,
238 |                                 'ImageSets',
239 |                                 'Main',
240 |                                 name+'.txt')
241 |         cachedir = os.path.join(self.root, 'annotations_cache')
242 |         aps = []
243 |         # The PASCAL VOC metric changed in 2010
244 |         use_07_metric = True if int(self._year) < 2010 else False
245 |         print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
246 |         if output_dir is not None and not os.path.isdir(output_dir):
247 |             os.mkdir(output_dir)
248 |         for i, cls in enumerate(VOC_CLASSES):
249 | 
250 |             if cls == '__background__':
251 |                 continue
252 | 
253 |             filename = self._get_voc_results_file_template().format(cls)
254 |             rec, prec, ap = voc_eval(
255 |                                     filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
256 |                                     use_07_metric=use_07_metric)
257 |             aps += [ap]
258 |             print('AP for {} = {:.4f}'.format(cls, ap))
259 |             if output_dir is not None:
260 |                 with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
261 |                     pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
262 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
263 |         print('~~~~~~~~')
264 |         print('Results:')
265 |         for ap in aps:
266 |             print('{:.3f}'.format(ap))
267 |         print('{:.3f}'.format(np.mean(aps)))
268 |         print('~~~~~~~~')
269 | 
270 | 
271 | def detection_collate(batch):
272 |     """Custom collate fn for dealing with batches of images that have a different
273 |     number of associated object annotations (bounding boxes).
274 | 
275 |     Arguments:
276 |         batch: (tuple) A tuple of tensor images and lists of annotations
277 | 
278 |     Return:
279 |         A tuple containing:
280 |             1) (tensor) batch of images stacked on their 0 dim
281 |             2) (list of tensors) annotations for a given image are stacked on 0 dim
282 |     """
283 |     targets = []
284 |     imgs = []
285 |     for _, sample in enumerate(batch):
286 |         for _, tup in enumerate(sample):
287 |             if torch.is_tensor(tup):
288 |                 imgs.append(tup)
289 |             elif isinstance(tup, type(np.empty(0))):
290 |                 annos = torch.from_numpy(tup).float()
291 |                 targets.append(annos)
292 | 
293 |     return (torch.stack(imgs, 0), targets)
294 | 


--------------------------------------------------------------------------------
/data/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import pickle
 10 | import numpy as np
 11 | import pdb
 12 | 
 13 | 
 14 | def parse_rec(filename):
 15 |     """ Parse a PASCAL VOC xml file """
 16 |     tree = ET.parse(filename)
 17 |     objects = []
 18 |     for obj in tree.findall('object'):
 19 |         obj_struct = {}
 20 |         obj_struct['name'] = obj.find('name').text
 21 |         obj_struct['pose'] = obj.find('pose').text
 22 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 23 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 24 |         bbox = obj.find('bndbox')
 25 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 26 |                               int(bbox.find('ymin').text),
 27 |                               int(bbox.find('xmax').text),
 28 |                               int(bbox.find('ymax').text)]
 29 |         objects.append(obj_struct)
 30 | 
 31 |     return objects
 32 | 
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |     Compute VOC AP given precision and recall.
 38 |     If use_07_metric is true, uses the
 39 |     VOC 07 11 point method (default:False).
 40 |     """
 41 |     if use_07_metric:
 42 |         # 11 point metric
 43 |         ap = 0.
 44 |         for t in np.arange(0., 1.1, 0.1):
 45 |             if np.sum(rec >= t) == 0:
 46 |                 p = 0
 47 |             else:
 48 |                 p = np.max(prec[rec >= t])
 49 |             ap = ap + p / 11.
 50 |     else:
 51 |         # correct AP calculation
 52 |         # first append sentinel values at the end
 53 |         mrec = np.concatenate(([0.], rec, [1.]))
 54 |         mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |         # compute the precision envelope
 57 |         for i in range(mpre.size - 1, 0, -1):
 58 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |         # to calculate area under PR curve, look for points
 61 |         # where X axis (recall) changes value
 62 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |         # and sum (\Delta recall) * prec
 65 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |     return ap
 67 | 
 68 | def voc_eval(detpath,
 69 |              annopath,
 70 |              imagesetfile,
 71 |              classname,
 72 |              cachedir,
 73 |              ovthresh=0.5,
 74 |              use_07_metric=False):
 75 |     """rec, prec, ap = voc_eval(detpath,
 76 |                                 annopath,
 77 |                                 imagesetfile,
 78 |                                 classname,
 79 |                                 [ovthresh],
 80 |                                 [use_07_metric])
 81 | 
 82 |     Top level function that does the PASCAL VOC evaluation.
 83 | 
 84 |     detpath: Path to detections
 85 |         detpath.format(classname) should produce the detection results file.
 86 |     annopath: Path to annotations
 87 |         annopath.format(imagename) should be the xml annotations file.
 88 |     imagesetfile: Text file containing the list of images, one image per line.
 89 |     classname: Category name (duh)
 90 |     cachedir: Directory for caching the annotations
 91 |     [ovthresh]: Overlap threshold (default = 0.5)
 92 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 93 |         (default False)
 94 |     """
 95 |     # assumes detections are in detpath.format(classname)
 96 |     # assumes annotations are in annopath.format(imagename)
 97 |     # assumes imagesetfile is a text file with each line an image name
 98 |     # cachedir caches the annotations in a pickle file
 99 | 
100 |     # first load gt
101 |     if not os.path.isdir(cachedir):
102 |         os.mkdir(cachedir)
103 |     cachefile = os.path.join(cachedir, 'annots.pkl')
104 |     # read list of images
105 |     with open(imagesetfile, 'r') as f:
106 |         lines = f.readlines()
107 |     #imagenames = [x.strip() for x in lines]
108 |     imagenames = []
109 |     for line in lines:
110 |         img_id, value = line.split()
111 |         if value != '1':
112 |             continue
113 |         imagenames.append(img_id)
114 | 
115 |     if not os.path.isfile(cachefile):
116 |         # load annots
117 |         recs = {}
118 |         for i, imagename in enumerate(imagenames):
119 |             recs[imagename] = parse_rec(annopath.format(imagename))
120 |             if i % 100 == 0:
121 |                 print('Reading annotation for {:d}/{:d}'.format(
122 |                     i + 1, len(imagenames)))
123 |         # save
124 |         print('Saving cached annotations to {:s}'.format(cachefile))
125 |         with open(cachefile, 'wb') as f:
126 |             pickle.dump(recs, f)
127 |     else:
128 |         # load
129 |         with open(cachefile, 'rb') as f:
130 |             recs = pickle.load(f)
131 | 
132 |     # extract gt objects for this class
133 |     class_recs = {}
134 |     npos = 0
135 |     for imagename in imagenames:
136 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
137 |         bbox = np.array([x['bbox'] for x in R])
138 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
139 |         det = [False] * len(R)
140 |         npos = npos + sum(~difficult)
141 |         class_recs[imagename] = {'bbox': bbox,
142 |                                  'difficult': difficult,
143 |                                  'det': det}
144 | 
145 |     # read dets
146 |     detfile = detpath.format(classname)
147 |     with open(detfile, 'r') as f:
148 |         lines = f.readlines()
149 | 
150 |     splitlines = [x.strip().split(' ') for x in lines]
151 |     image_ids = [x[0] for x in splitlines]
152 |     confidence = np.array([float(x[1]) for x in splitlines])
153 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
154 | 
155 |         # sort by confidence
156 |     sorted_ind = np.argsort(-confidence)
157 |     sorted_scores = np.sort(-confidence)
158 |     BB = BB[sorted_ind, :]
159 |     image_ids = [image_ids[x] for x in sorted_ind]
160 | 
161 |         # go down dets and mark TPs and FPs
162 |     nd = len(image_ids)
163 |     tp = np.zeros(nd)
164 |     fp = np.zeros(nd)
165 |     for d in range(nd):
166 |         R = class_recs[image_ids[d]]
167 |         bb = BB[d, :].astype(float)
168 |         ovmax = -np.inf
169 |         BBGT = R['bbox'].astype(float)
170 | 
171 |         if BBGT.size > 0:
172 |             # compute overlaps
173 |             # intersection
174 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
175 |             iymin = np.maximum(BBGT[:, 1], bb[1])
176 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
177 |             iymax = np.minimum(BBGT[:, 3], bb[3])
178 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
179 |             ih = np.maximum(iymax - iymin + 1., 0.)
180 |             inters = iw * ih
181 | 
182 |                 # union
183 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
184 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
185 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
186 | 
187 |             overlaps = inters / uni
188 |             ovmax = np.max(overlaps)
189 |             jmax = np.argmax(overlaps)
190 | 
191 |         if ovmax > ovthresh:
192 |             if not R['difficult'][jmax]:
193 |                 if not R['det'][jmax]:
194 |                     tp[d] = 1.
195 |                     R['det'][jmax] = 1
196 |                 else:
197 |                     fp[d] = 1.
198 |         else:
199 |             fp[d] = 1.
200 | 
201 |         # compute precision recall
202 |     fp = np.cumsum(fp)
203 |     tp = np.cumsum(tp)
204 |     rec = tp / float(npos)
205 |         # avoid divide by zero in case the first detection matches a difficult
206 |         # ground truth
207 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
208 |     ap = voc_ap(rec, prec, use_07_metric)
209 | 
210 |     return rec, prec, ap
211 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | import os
  4 | import pickle
  5 | import argparse
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.backends.cudnn as cudnn
  9 | import torchvision.transforms as transforms
 10 | import numpy as np
 11 | from torch.autograd import Variable
 12 | from data import VOCroot
 13 | from data import AnnotationTransform,VOCDetection, BaseTransform, VOC_Config
 14 | from models.RFB_Net_vgg import build_net
 15 | import torch.utils.data as data
 16 | from layers.functions import Detect,PriorBox
 17 | from utils.nms_wrapper import nms
 18 | from utils.timer import Timer
 19 | import cv2
 20 | import time
 21 | from collections import deque
 22 | from torch2trt import torch2trt
 23 | parser = argparse.ArgumentParser(description='Receptive Field Block Net')
 24 | parser.add_argument('--img_dir', default='images', type=str,
 25 |                     help='Dir to save results')
 26 | parser.add_argument('-m', '--trained_model', default='weights/epoches_100.pth',
 27 |                     type=str, help='Trained state_dict file path to open')
 28 | parser.add_argument('--cuda', default=True, type=bool,
 29 |                     help='Use cuda to train model')
 30 | parser.add_argument('--cpu', default=False, type=bool,
 31 |                     help='Use cpu nms')
 32 | args = parser.parse_args()
 33 | 
 34 | cfg = VOC_Config
 35 | img_dim = 300
 36 | num_classes = 2
 37 | rgb_means = (104, 117, 123)
 38 | 
 39 | priorbox = PriorBox(cfg)
 40 | with torch.no_grad():
 41 |     priors = priorbox.forward()
 42 |     if args.cuda:
 43 |         priors = priors.cuda()
 44 | 
 45 | class ObjectDetector:
 46 |     def __init__(self, net, detection, transform, num_classes=21, thresh=0.2, cuda=True):
 47 |         self.net = net
 48 |         self.detection = detection
 49 |         self.transform = transform
 50 |         self.num_classes = num_classes
 51 |         self.thresh = thresh
 52 |         self.cuda = cuda
 53 | 
 54 |     def predict(self, img):
 55 |         _t = {'im_detect': Timer(), 'misc': Timer()}
 56 |         scale = torch.Tensor([img.shape[1], img.shape[0],
 57 |                              img.shape[1], img.shape[0]])
 58 |         with torch.no_grad():
 59 |             x = self.transform(img).unsqueeze(0)
 60 |             if self.cuda:
 61 |                 x = x.cuda()
 62 |                 scale = scale.cuda()
 63 |         _t['im_detect'].tic()
 64 |         out = model_trt(x)  # forward pass
 65 |         #print(out)
 66 |         boxes, scores = self.detection.forward(out, priors)
 67 |         detect_time = _t['im_detect'].toc()
 68 |         boxes = boxes[0]
 69 |         scores = scores[0]
 70 |         # scale each detection back up to the image
 71 |         boxes *= scale
 72 |         boxes = boxes.cpu().numpy()
 73 |         scores = scores.cpu().numpy()
 74 |         _t['misc'].tic()
 75 |         all_boxes = [[] for _ in range(num_classes)]
 76 |         for j in range(1, num_classes):
 77 |             inds = np.where(scores[:, j] > self.thresh)[0]
 78 |             if len(inds) == 0:
 79 |                 all_boxes[j] = np.zeros([0, 5], dtype=np.float32)
 80 |                 continue
 81 |             c_bboxes = boxes[inds]
 82 |             c_scores = scores[inds, j]
 83 |             #print(scores[:, j])
 84 |             c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
 85 |                 np.float32, copy=False)
 86 |             # keep = nms(c_bboxes,c_scores)
 87 |             keep = nms(c_dets, 0.2, force_cpu=args.cpu)
 88 |             c_dets = c_dets[keep, :]
 89 |             all_boxes[j] = c_dets
 90 |         nms_time = _t['misc'].toc()
 91 |         total_time = detect_time+nms_time
 92 |         #print('total time: ', total_time)
 93 |         return all_boxes, total_time
 94 | 
 95 | if __name__ == '__main__':
 96 |     # load net
 97 |     net = build_net('test', img_dim, num_classes)    # initialize detector
 98 |     state_dict = torch.load(args.trained_model)
 99 |     # create new OrderedDict that does not contain `module.`
100 |     from collections import OrderedDict
101 |     new_state_dict = OrderedDict()
102 |     for k, v in state_dict.items():
103 |         head = k[:7]
104 |         if head == 'module.':
105 |             name = k[7:] # remove `module.`
106 |         else:
107 |             name = k
108 |         new_state_dict[name] = v
109 |     net.load_state_dict(new_state_dict)
110 |     net.eval()
111 |     print('Finished loading model!')
112 |     if args.cuda:
113 |         net = net.cuda()
114 |         cudnn.benchmark = True
115 |     else:
116 |         net = net.cpu()   
117 |     detector = Detect(num_classes,0,cfg)
118 |     transform = BaseTransform(img_dim, rgb_means, (2, 0, 1))
119 |     cap = cv2.VideoCapture('11.mp4')
120 |     #cap1 = cv2.VideoCapture('rtsp://admin:uc123456@101.205.119.109:554/Streaming/Channels/301')
121 |     ret,image = cap.read()
122 |     x = transform(image).unsqueeze(0)
123 |     x = x.cuda()
124 |     model_trt = torch2trt(net,[x])
125 |     object_detector = ObjectDetector(model_trt, detector, transform)
126 |     img_list = os.listdir(args.img_dir)
127 |     frame_no = 0
128 |     fourcc = cv2.VideoWriter_fourcc(*'MJPG')
129 |     output = cv2.VideoWriter("demo1.avi", fourcc, 20, (1280, 720))
130 |     while True:
131 |         start = time.time()    
132 |         frame_no +=1     
133 |         #print(frame_no)
134 |         #try:
135 |         ret,image = cap.read()
136 |             #ret1,image1 = cap1.read()
137 |         detect_bboxes, tim = object_detector.predict(image)
138 |         for i in range(len(detect_bboxes[1])):
139 |             pt = detect_bboxes[1][i]
140 |             cv2.rectangle(image,(pt[0],pt[1]),(pt[2],pt[3]),(0,255,0),2)
141 |         print(detect_bboxes)
142 |             #detect_bboxes1, tim1 = object_detector.predict(image1)
143 |         end = time.time()
144 |         frame_time = end - start
145 |         print(frame_time)
146 |         cv2.imshow('result',image)
147 |            # cv2.imshow('result1',image1)
148 |         cv2.waitKey(1)
149 |         output.write(image)
150 |         #except Exception:
151 |           #  cap = cv2.VideoCapture('rtsp://admin:uc123456@101.205.119.109:554/Streaming/Channels/301')
152 |            # cap1 = cv2.VideoCapture('rtsp://admin:uc123456@101.205.119.109:554/Streaming/Channels/301')
153 |          #   continue
154 | 


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 | 
4 | 
5 | __all__ = ['Detect', 'PriorBox']
6 | 


--------------------------------------------------------------------------------
/layers/functions/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/functions/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/layers/functions/__pycache__/detection.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/functions/__pycache__/detection.cpython-36.pyc


--------------------------------------------------------------------------------
/layers/functions/__pycache__/prior_box.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/functions/__pycache__/prior_box.cpython-36.pyc


--------------------------------------------------------------------------------
/layers/functions/detection.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.backends.cudnn as cudnn
 4 | from torch.autograd import Function
 5 | from torch.autograd import Variable
 6 | from utils.box_utils import decode
 7 | 
 8 | 
 9 | class Detect(Function):
10 |     """At test time, Detect is the final layer of SSD.  Decode location preds,
11 |     apply non-maximum suppression to location predictions based on conf
12 |     scores and threshold to a top_k number of output predictions for both
13 |     confidence score and locations.
14 |     """
15 |     def __init__(self, num_classes, bkg_label, cfg):
16 |         self.num_classes = num_classes
17 |         self.background_label = bkg_label
18 | 
19 |         self.variance = cfg['variance']
20 | 
21 |     def forward(self, predictions, prior):
22 |         """
23 |         Args:
24 |             loc_data: (tensor) Loc preds from loc layers
25 |                 Shape: [batch,num_priors*4]
26 |             conf_data: (tensor) Shape: Conf preds from conf layers
27 |                 Shape: [batch*num_priors,num_classes]
28 |             prior_data: (tensor) Prior boxes and variances from priorbox layers
29 |                 Shape: [1,num_priors,4]
30 |         """
31 | 
32 |         loc, conf = predictions
33 | 
34 |         loc_data = loc.data
35 |         conf_data = conf.data
36 |         prior_data = prior.data
37 |         num = loc_data.size(0)  # batch size
38 |         self.num_priors = prior_data.size(0)
39 |         self.boxes = torch.zeros(1, self.num_priors, 4)
40 |         self.scores = torch.zeros(1, self.num_priors, self.num_classes)
41 |         if loc_data.is_cuda:
42 |             self.boxes = self.boxes.cuda()
43 |             self.scores = self.scores.cuda()
44 | 
45 |         if num == 1:
46 |             # size batch x num_classes x num_priors
47 |             conf_preds = conf_data.unsqueeze(0)
48 | 
49 |         else:
50 |             conf_preds = conf_data.view(num, num_priors,
51 |                                         self.num_classes)
52 |             self.boxes.expand_(num, self.num_priors, 4)
53 |             self.scores.expand_(num, self.num_priors, self.num_classes)
54 | 
55 |         # Decode predictions into bboxes.
56 |         for i in range(num):
57 |             decoded_boxes = decode(loc_data[i], prior_data, self.variance)
58 |             conf_scores = conf_preds[i].clone()
59 | 
60 |             self.boxes[i] = decoded_boxes
61 |             self.scores[i] = conf_scores
62 | 
63 |         return self.boxes, self.scores
64 | 


--------------------------------------------------------------------------------
/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.backends.cudnn as cudnn
 4 | from math import sqrt as sqrt
 5 | from itertools import product as product
 6 | 
 7 | 
 8 | class PriorBox(object):
 9 |     """Compute priorbox coordinates in center-offset form for each source
10 |     feature map.
11 |     Note:
12 |     This 'layer' has changed between versions of the original SSD
13 |     paper, so we include both versions, but note v2 is the most tested and most
14 |     recent version of the paper.
15 | 
16 |     """
17 |     def __init__(self, cfg):
18 |         super(PriorBox, self).__init__()
19 |         self.image_size = cfg['min_dim']
20 |         self.variance = cfg['variance'] or [0.1]
21 |         self.feature_maps = cfg['feature_maps']
22 |         self.min_sizes = cfg['min_sizes']
23 |         self.max_sizes = cfg['max_sizes']
24 |         self.steps = cfg['steps']
25 |         self.aspect_ratios = cfg['aspect_ratios']
26 |         self.max_ratios = cfg['max_ratios']
27 | 
28 |         self.clip = cfg['clip']
29 |         for v in self.variance:
30 |             if v <= 0:
31 |                 raise ValueError('Variances must be greater than 0')
32 | 
33 |     def forward(self):
34 |         mean = []
35 |         for k, f in enumerate(self.feature_maps):
36 |             for i, j in product(range(f), repeat=2):
37 |                 f_k = self.image_size / self.steps[k]
38 |                 cx = (j + 0.5) / f_k
39 |                 cy = (i + 0.5) / f_k
40 | 
41 |                 s_k = self.min_sizes[k]/self.image_size
42 | 
43 |                 # aspect_ratio: 1
44 |                 # rel size: sqrt(s_k * s_(k+1))
45 |                 s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
46 |                 mean += [cx, cy, s_k_prime*sqrt(self.max_ratios[k]), s_k_prime/sqrt(self.max_ratios[k])]
47 | 
48 |                 # rest of aspect ratios
49 |                 for ar in self.aspect_ratios[k]:
50 |                     mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
51 | 
52 | 
53 |         # back to torch land
54 |         output = torch.Tensor(mean).view(-1, 4)
55 |         if self.clip:
56 |             output.clamp_(max=1, min=0)
57 |         return output
58 | 
59 | if __name__ == '__main__':
60 |     # RFB CONFIGS
61 |     VOC_Config = {
62 |         'feature_maps': [38, 19, 10, 5, 3, 1],
63 | 
64 |         'min_dim': 300,
65 | 
66 |         'steps': [8, 16, 32, 64, 100, 300],
67 | 
68 |         'min_sizes': [26, 60, 111, 162, 213, 264],
69 | 
70 |         'max_sizes': [60, 111, 162, 213, 264, 315],
71 | 
72 |         'aspect_ratios': [[0.4, 1.0, 1.5],
73 |                           [0.4, 1.0, 1.6],
74 |                           [0.5, 1.1, 1.6],
75 |                           [0.5, 1.1, 1.6],
76 |                           [0.5, 1.1, 1.6],
77 |                           [0.7, 1.4]],
78 | 
79 |         'max_ratios': [0.8, 0.8, 0.8, 0.9, 1, 1],
80 | 
81 |         'variance': [0.1, 0.2],
82 | 
83 |         'clip': True,
84 |     }
85 | 
86 |     priorbox = PriorBox(VOC_Config)
87 | 
88 |     with torch.no_grad():
89 |         priors = priorbox.forward()
90 |     print(priors.shape)


--------------------------------------------------------------------------------
/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .multibox_loss import MultiBoxLoss
2 | 
3 | __all__ = ['MultiBoxLoss']
4 | 


--------------------------------------------------------------------------------
/layers/modules/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/modules/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/layers/modules/__pycache__/multibox_loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/modules/__pycache__/multibox_loss.cpython-36.pyc


--------------------------------------------------------------------------------
/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from utils.box_utils import match, log_sum_exp
  6 | GPU = False
  7 | if torch.cuda.is_available():
  8 |     GPU = True
  9 | 
 10 | 
 11 | class MultiBoxLoss(nn.Module):
 12 |     """SSD Weighted Loss Function
 13 |     Compute Targets:
 14 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 15 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 16 |            (default threshold: 0.5).
 17 |         2) Produce localization target by 'encoding' variance into offsets of ground
 18 |            truth boxes and their matched  'priorboxes'.
 19 |         3) Hard negative mining to filter the excessive number of negative examples
 20 |            that comes with using a large number of default bounding boxes.
 21 |            (default negative:positive ratio 3:1)
 22 |     Objective Loss:
 23 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 24 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 25 |         weighted by α which is set to 1 by cross val.
 26 |         Args:
 27 |             c: class confidences,
 28 |             l: predicted boxes,
 29 |             g: ground truth boxes
 30 |             N: number of matched default boxes
 31 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 32 |     """
 33 | 
 34 | 
 35 |     def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target):
 36 |         super(MultiBoxLoss, self).__init__()
 37 |         self.num_classes = num_classes
 38 |         self.threshold = overlap_thresh
 39 |         self.background_label = bkg_label
 40 |         self.encode_target = encode_target
 41 |         self.use_prior_for_matching  = prior_for_matching
 42 |         self.do_neg_mining = neg_mining
 43 |         self.negpos_ratio = neg_pos
 44 |         self.neg_overlap = neg_overlap
 45 |         self.variance = [0.1,0.2]
 46 | 
 47 |     def forward(self, predictions, priors, targets):
 48 |         """Multibox Loss
 49 |         Args:
 50 |             predictions (tuple): A tuple containing loc preds, conf preds,
 51 |             and prior boxes from SSD net.
 52 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 53 |                 loc shape: torch.size(batch_size,num_priors,4)
 54 |                 priors shape: torch.size(num_priors,4)
 55 | 
 56 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 57 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 58 |         """
 59 | 
 60 |         loc_data, conf_data = predictions
 61 |         priors = priors
 62 |         num = loc_data.size(0)
 63 |         num_priors = (priors.size(0))
 64 |         num_classes = self.num_classes
 65 | 
 66 |         # match priors (default boxes) and ground truth boxes
 67 |         loc_t = torch.Tensor(num, num_priors, 4)
 68 |         conf_t = torch.LongTensor(num, num_priors)
 69 |         for idx in range(num):
 70 |             truths = targets[idx][:,:-1].data
 71 |             labels = targets[idx][:,-1].data
 72 |             defaults = priors.data
 73 |             match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
 74 |         if GPU:
 75 |             loc_t = loc_t.cuda()
 76 |             conf_t = conf_t.cuda()
 77 |         # wrap targets
 78 |         loc_t = Variable(loc_t, requires_grad=False)
 79 |         conf_t = Variable(conf_t,requires_grad=False)
 80 | 
 81 |         pos = conf_t > 0
 82 | 
 83 |         # Localization Loss (Smooth L1)
 84 |         # Shape: [batch,num_priors,4]
 85 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
 86 |         loc_p = loc_data[pos_idx].view(-1,4)
 87 |         loc_t = loc_t[pos_idx].view(-1,4)
 88 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
 89 | 
 90 |         # Compute max conf across batch for hard negative mining
 91 |         batch_conf = conf_data.view(-1,self.num_classes)
 92 |         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))
 93 | 
 94 |         # Hard Negative Mining
 95 |         loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now
 96 |         loss_c = loss_c.view(num, -1)
 97 |         _,loss_idx = loss_c.sort(1, descending=True)
 98 |         _,idx_rank = loss_idx.sort(1)
 99 |         num_pos = pos.long().sum(1,keepdim=True)
100 |         num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
101 |         neg = idx_rank < num_neg.expand_as(idx_rank)
102 | 
103 |         # Confidence Loss Including Positive and Negative Examples
104 |         pos_idx = pos.unsqueeze(2).expand_as(conf_data)
105 |         neg_idx = neg.unsqueeze(2).expand_as(conf_data)
106 |         conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
107 |         targets_weighted = conf_t[(pos+neg).gt(0)]
108 |         loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
109 | 
110 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
111 | 
112 |         N = max(num_pos.data.sum().float(), 1)
113 |         loss_l/=N
114 |         loss_c/=N
115 |         return loss_l,loss_c
116 | 


--------------------------------------------------------------------------------
/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd ./utils/
3 | 
4 | CUDA_PATH=/usr/local/cuda/
5 | 
6 | python3 build.py build_ext --inplace
7 | 
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/models/RFB_Net_vgg-1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from layers import *
  6 | import torchvision.transforms as transforms
  7 | import torchvision.models as models
  8 | import torch.backends.cudnn as cudnn
  9 | import os
 10 | from models.module import BasicRFB, Backbone
 11 | 
 12 | 
 13 | 
 14 | class RFBNet(nn.Module):
 15 | 
 16 |     def __init__(self, phase, size, head, num_classes):
 17 |         super(RFBNet, self).__init__()
 18 |         self.phase = phase
 19 |         self.num_classes = num_classes
 20 |         self.size = size
 21 | 
 22 |         self.base = Backbone()
 23 | 
 24 |         self.loc = nn.ModuleList(head[0])
 25 |         self.conf = nn.ModuleList(head[1])
 26 |         if self.phase == 'test':
 27 |             self.softmax = nn.Softmax(dim=-1)
 28 | 
 29 |     def forward(self, x):
 30 |         sources = list()
 31 |         loc = list()
 32 |         conf = list()
 33 | 
 34 |         f1, f2, f3, f4, f5, f6 = self.base(x)
 35 | 
 36 |         sources = [f1, f2, f3, f4, f5, f6]
 37 | 
 38 |         # apply multibox head to source layers
 39 |         for (x, l, c) in zip(sources, self.loc, self.conf):
 40 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
 41 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
 42 | 
 43 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
 44 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
 45 | 
 46 |         if self.phase == "test":
 47 |             output = (
 48 |                 loc.view(loc.size(0), -1, 4),                   # loc preds
 49 |                 self.softmax(conf.view(-1, self.num_classes)),  # conf preds
 50 |             )
 51 |         else:
 52 |             output = (
 53 |                 loc.view(loc.size(0), -1, 4),
 54 |                 conf.view(conf.size(0), -1, self.num_classes),
 55 |             )
 56 |         return output
 57 | 
 58 |     def load_weights(self, base_file):
 59 |         other, ext = os.path.splitext(base_file)
 60 |         if ext == '.pkl' or '.pth':
 61 |             print('Loading weights into state dict...')
 62 |             self.load_state_dict(torch.load(base_file))
 63 |             print('Finished!')
 64 |         else:
 65 |             print('Sorry only .pth and .pkl files supported.')
 66 | 
 67 | def multibox(num_classes=2):
 68 |     # 需要注意，这里要跟 prior_box.py 对应上
 69 |     # number of boxes per feature map location，就是各个feature map上预定义的anchor数，可结合prior_box.py；理解
 70 |     anchor_num = [4, 4, 4, 4, 4, 3] # number of boxes per feature map location
 71 |     loc_layers = []
 72 |     conf_layers = []
 73 | 
 74 |     ############################ 第1个检测层 ############################
 75 |     loc_layers  += [nn.Conv2d(256, anchor_num[0] * 4, kernel_size=3, padding=1)]
 76 |     conf_layers += [nn.Conv2d(256, anchor_num[0] * num_classes, kernel_size=3, padding=1)]
 77 |     ############################ 第2个检测层 ############################
 78 |     loc_layers  += [nn.Conv2d(256, anchor_num[1] * 4, kernel_size=3, padding=1)]
 79 |     conf_layers += [nn.Conv2d(256, anchor_num[1] * num_classes, kernel_size=3, padding=1)]
 80 |     ############################ 第3个检测层 ############################
 81 |     loc_layers  += [nn.Conv2d(256, anchor_num[2] * 4, kernel_size=3, padding=1)]
 82 |     conf_layers += [nn.Conv2d(256, anchor_num[2] * num_classes, kernel_size=3, padding=1)]
 83 |     ############################ 第4个检测层 ############################
 84 |     loc_layers  += [nn.Conv2d(256, anchor_num[3] * 4, kernel_size=3, padding=1)]
 85 |     conf_layers += [nn.Conv2d(256, anchor_num[3] * num_classes, kernel_size=3, padding=1)]
 86 |     ############################ 第5个检测层 ############################
 87 |     loc_layers  += [nn.Conv2d(256, anchor_num[4] * 4, kernel_size=3, padding=1)]
 88 |     conf_layers += [nn.Conv2d(256, anchor_num[4] * num_classes, kernel_size=3, padding=1)]
 89 |     ############################ 第6个检测层 ############################
 90 |     loc_layers  += [nn.Conv2d(256, anchor_num[5] * 4, kernel_size=3, padding=1)]
 91 |     conf_layers += [nn.Conv2d(256, anchor_num[5] * num_classes, kernel_size=3, padding=1)]
 92 | 
 93 |     return (loc_layers, conf_layers)
 94 | 
 95 | 
 96 | def build_net(phase, size=300, num_classes=2):
 97 |     if phase != "test" and phase != "train":
 98 |         print("Error: Phase not recognized")
 99 |         return
100 |     if size != 300:
101 |         print("Error: Sorry only RFBNet300 are supported!")
102 |         return
103 | 
104 |     return RFBNet(phase, size, multibox(num_classes), num_classes)
105 | 
106 | if __name__ == '__main__':
107 |     # 0.966 MB
108 |     x = torch.randn(2, 3, 300, 300)
109 |     net = build_net('test')
110 |     from torchsummary import summary
111 |     summary(net, (3, 300, 300))


--------------------------------------------------------------------------------
/models/RFB_Net_vgg.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from layers import *
  6 | import torchvision.transforms as transforms
  7 | import torchvision.models as models
  8 | import torch.backends.cudnn as cudnn
  9 | import os
 10 | from models.module import BasicRFB, Backbone
 11 | 
 12 | 
 13 | 
 14 | class RFBNet(nn.Module):
 15 | 
 16 |     def __init__(self, phase, size, head, num_classes):
 17 |         super(RFBNet, self).__init__()
 18 |         self.phase = phase
 19 |         self.num_classes = num_classes
 20 |         self.size = size
 21 | 
 22 |         self.base = Backbone()
 23 | 
 24 |         self.loc = nn.ModuleList(head[0])
 25 |         self.conf = nn.ModuleList(head[1])
 26 |         if self.phase == 'test':
 27 |             self.softmax = nn.Softmax(dim=1)
 28 | 
 29 |     def forward(self, x):
 30 |         sources = list()
 31 |         loc = list()
 32 |         conf = list()
 33 | 
 34 |         f1, f2, f3, f4, f5, f6 = self.base(x)
 35 | 
 36 |         sources = [f1, f2, f3, f4, f5, f6]
 37 | 
 38 |         # apply multibox head to source layers
 39 |         for (x, l, c) in zip(sources, self.loc, self.conf):
 40 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
 41 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
 42 | 
 43 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
 44 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
 45 | 
 46 |         if self.phase == "test":
 47 |             output = (
 48 |                 loc.view(loc.size(0), -1, 4),                   # loc preds
 49 |                 conf.view(conf.size(0), -1, self.num_classes),
 50 |                 #self.softmax(conf.view(-1, self.num_classes)),  # conf preds
 51 |             )
 52 |         else:
 53 |             output = (
 54 |                 loc.view(loc.size(0), -1, 4),
 55 |                 conf.view(conf.size(0), -1, self.num_classes),
 56 |             )
 57 |         return output
 58 | 
 59 |     def load_weights(self, base_file):
 60 |         other, ext = os.path.splitext(base_file)
 61 |         if ext == '.pkl' or '.pth':
 62 |             print('Loading weights into state dict...')
 63 |             self.load_state_dict(torch.load(base_file))
 64 |             print('Finished!')
 65 |         else:
 66 |             print('Sorry only .pth and .pkl files supported.')
 67 | 
 68 | def multibox(num_classes=2):
 69 |     # 需要注意，这里要跟 prior_box.py 对应上
 70 |     # number of boxes per feature map location，就是各个feature map上预定义的anchor数，可结合prior_box.py；理解
 71 |     anchor_num = [4, 4, 4, 4, 4, 3] # number of boxes per feature map location
 72 |     loc_layers = []
 73 |     conf_layers = []
 74 | 
 75 |     ############################ 第1个检测层 ############################
 76 |     loc_layers  += [nn.Conv2d(128, anchor_num[0] * 4, kernel_size=3, padding=1)]
 77 |     conf_layers += [nn.Conv2d(128, anchor_num[0] * num_classes, kernel_size=3, padding=1)]
 78 |     ############################ 第2个检测层 ############################
 79 |     loc_layers  += [nn.Conv2d(128, anchor_num[1] * 4, kernel_size=3, padding=1)]
 80 |     conf_layers += [nn.Conv2d(128, anchor_num[1] * num_classes, kernel_size=3, padding=1)]
 81 |     ############################ 第3个检测层 ############################
 82 |     loc_layers  += [nn.Conv2d(128, anchor_num[2] * 4, kernel_size=3, padding=1)]
 83 |     conf_layers += [nn.Conv2d(128, anchor_num[2] * num_classes, kernel_size=3, padding=1)]
 84 |     ############################ 第4个检测层 ############################
 85 |     loc_layers  += [nn.Conv2d(128, anchor_num[3] * 4, kernel_size=3, padding=1)]
 86 |     conf_layers += [nn.Conv2d(128, anchor_num[3] * num_classes, kernel_size=3, padding=1)]
 87 |     ############################ 第5个检测层 ############################
 88 |     loc_layers  += [nn.Conv2d(128, anchor_num[4] * 4, kernel_size=3, padding=1)]
 89 |     conf_layers += [nn.Conv2d(128, anchor_num[4] * num_classes, kernel_size=3, padding=1)]
 90 |     ############################ 第6个检测层 ############################
 91 |     loc_layers  += [nn.Conv2d(64, anchor_num[5] * 4, kernel_size=3, padding=1)]
 92 |     conf_layers += [nn.Conv2d(64, anchor_num[5] * num_classes, kernel_size=3, padding=1)]
 93 | 
 94 |     return (loc_layers, conf_layers)
 95 | 
 96 | 
 97 | def build_net(phase, size=300, num_classes=2):
 98 |     if phase != "test" and phase != "train":
 99 |         print("Error: Phase not recognized")
100 |         return
101 |     if size != 300:
102 |         print("Error: Sorry only RFBNet300 are supported!")
103 |         return
104 | 
105 |     return RFBNet(phase, size, multibox(num_classes), num_classes)
106 | 
107 | if __name__ == '__main__':
108 |     # 0.99 MB
109 |     x = torch.randn(2, 3, 300, 300)
110 |     net = build_net('test')
111 |     from torchsummary import summary
112 |     summary(net, (3, 300, 300))
113 | 


--------------------------------------------------------------------------------
/models/__pycache__/RFB_Net_vgg.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/models/__pycache__/RFB_Net_vgg.cpython-36.pyc


--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/models/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/models/__pycache__/module.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/models/__pycache__/module.cpython-36.pyc


--------------------------------------------------------------------------------
/models/module-1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import os
  5 | 
  6 | class BasicConv(nn.Module):
  7 | 
  8 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True):
  9 |         super(BasicConv, self).__init__()
 10 |         self.out_channels = out_planes
 11 |         if bn:
 12 |             self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False)
 13 |             self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
 14 |             self.relu = nn.ReLU(inplace=True) if relu else None
 15 |         else:
 16 |             self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True)
 17 |             self.bn = None
 18 |             self.relu = nn.ReLU(inplace=True) if relu else None
 19 | 
 20 |     def forward(self, x):
 21 |         x = self.conv(x)
 22 |         if self.bn is not None:
 23 |             x = self.bn(x)
 24 |         if self.relu is not None:
 25 |             x = self.relu(x)
 26 |         return x
 27 | 
 28 | 
 29 | class BasicRFB(nn.Module):
 30 | 
 31 |     def __init__(self, in_planes, out_planes, stride=1, scale = 0.1, map_reduce=8, vision=1, groups=1):
 32 |         super(BasicRFB, self).__init__()
 33 |         self.scale = scale
 34 |         self.out_channels = out_planes
 35 |         inter_planes = in_planes // map_reduce
 36 | 
 37 |         self.branch0 = nn.Sequential(
 38 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 39 |                 BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
 40 |                 BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision+1, dilation=vision+1, relu=False, groups=groups)
 41 |                 )
 42 |         self.branch1 = nn.Sequential(
 43 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 44 |                 BasicConv(inter_planes, 2*inter_planes, kernel_size=(3,3), stride=stride, padding=(1,1), groups=groups),
 45 |                 BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups)
 46 |                 )
 47 |         self.branch2 = nn.Sequential(
 48 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 49 |                 BasicConv(inter_planes, (inter_planes//2)*3, kernel_size=3, stride=1, padding=1, groups=groups),
 50 |                 BasicConv((inter_planes//2)*3, 2*inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups),
 51 |                 BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups)
 52 |                 )
 53 | 
 54 |         self.ConvLinear = BasicConv(6*inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
 55 |         self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
 56 |         self.relu = nn.ReLU(inplace=False)
 57 | 
 58 |     def forward(self,x):
 59 |         x0 = self.branch0(x)
 60 |         x1 = self.branch1(x)
 61 |         x2 = self.branch2(x)
 62 | 
 63 |         out = torch.cat((x0,x1,x2),1)
 64 |         out = self.ConvLinear(out)
 65 |         short = self.shortcut(x)
 66 |         out = out*self.scale + short
 67 |         out = self.relu(out)
 68 | 
 69 |         return out
 70 | 
 71 | class Backbone(nn.Module):
 72 |     def __init__(self, bn=True):
 73 |         super(Backbone, self).__init__()
 74 | 
 75 |         self.conv1_1 = BasicConv(3,  32, kernel_size=3, padding=1, bn=bn)
 76 |         self.conv1_2 = BasicConv(32, 32, kernel_size=3, padding=1, bn=bn)
 77 |         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # 2
 78 | 
 79 |         self.conv2_1 = BasicConv(32, 64, kernel_size=3, padding=1, bn=bn)
 80 |         self.conv2_2 = BasicConv(64, 64, kernel_size=3, padding=1, bn=bn)
 81 |         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 4
 82 | 
 83 |         self.conv3_1 = BasicConv(64, 128, kernel_size=1, bn=bn)
 84 |         self.conv3_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn)
 85 |         self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=bn)  # 8
 86 | 
 87 |         self.conv4_1 = BasicConv(128, 256, kernel_size=1, bn=bn)
 88 |         self.conv4_2 = BasicConv(256, 256, kernel_size=3, padding=1, bn=bn)         #### f1 ####
 89 |         self.conv4_3 = BasicRFB(256,256,stride = 1,scale=1.0)
 90 |         self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)  # 16
 91 | 
 92 |         self.conv5_1 = BasicConv(256, 128, kernel_size=1, relu=False, bn=bn)
 93 |         self.conv5_2 = BasicConv(128, 256, kernel_size=3, padding=1, stride=1, bn=bn) #### f2 ####
 94 | 
 95 |         self.conv6_1 = BasicConv(256, 128, kernel_size=1, relu=False)
 96 |         self.conv6_2 = BasicConv(128, 256, kernel_size=3, padding=1, stride=2) #### f3 ####
 97 | 
 98 |         self.conv7_1 = BasicConv(256, 128, kernel_size=1, relu=False)
 99 |         self.conv7_2 = BasicConv(128, 256, kernel_size=3, padding=1, stride=2) #### f4 ####
100 | 
101 |         self.conv8_1 = BasicConv(256,128,kernel_size=1, relu=False)
102 |         self.conv8_2 = BasicConv(128,256,kernel_size=3)                        #### f5 ####
103 | 
104 |         self.conv9_1 = BasicConv(256,128,kernel_size=1, relu=False)
105 |         self.conv9_2 = BasicConv(128,256,kernel_size=3)                         #### f6 ####
106 | 
107 | 
108 |     def forward(self, x):
109 |         x = self.conv1_1(x)
110 |         x = self.conv1_2(x)
111 |         x = self.pool1(x)
112 | 
113 |         x = self.conv2_1(x)
114 |         x = self.conv2_2(x)
115 |         x = self.pool2(x)
116 | 
117 |         x = self.conv3_1(x)
118 |         x = self.conv3_2(x)
119 |         x = self.pool3(x)
120 | 
121 |         x = self.conv4_1(x)
122 |         x = self.conv4_2(x)
123 |         x = self.conv4_3(x)
124 |         f1 = x # stride = 8
125 |         x = self.pool4(x)
126 | 
127 |         x = self.conv5_1(x)
128 |         x = self.conv5_2(x)
129 |         f2 = x # stride = 16
130 | 
131 |         x = self.conv6_1(x)
132 |         x = self.conv6_2(x)
133 |         f3 = x # stride = 32
134 | 
135 |         x = self.conv7_1(x)
136 |         x = self.conv7_2(x)
137 |         f4 = x # stride = 64
138 | 
139 |         x = self.conv8_1(x)
140 |         x = self.conv8_2(x)
141 |         f5 = x # -2
142 | 
143 |         x = self.conv9_1(x)
144 |         x = self.conv9_2(x)
145 |         f6 = x # -2
146 | 
147 |         return f1, f2, f3, f4, f5, f6
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     x = torch.randn(2,3,300,300)
152 |     model = Backbone()
153 |     features = model(x)
154 | 


--------------------------------------------------------------------------------
/models/module-3.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import os
  5 | 
  6 | class BasicConv(nn.Module):
  7 | 
  8 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True):
  9 |         super(BasicConv, self).__init__()
 10 |         self.out_channels = out_planes
 11 |         if bn:
 12 |             self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False)
 13 |             self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
 14 |             self.relu = nn.ReLU(inplace=True) if relu else None
 15 |         else:
 16 |             self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True)
 17 |             self.bn = None
 18 |             self.relu = nn.ReLU(inplace=True) if relu else None
 19 | 
 20 |     def forward(self, x):
 21 |         x = self.conv(x)
 22 |         if self.bn is not None:
 23 |             x = self.bn(x)
 24 |         if self.relu is not None:
 25 |             x = self.relu(x)
 26 |         return x
 27 | 
 28 | 
 29 | class BasicRFB(nn.Module):
 30 | 
 31 |     def __init__(self, in_planes, out_planes, stride=1, scale = 0.1, map_reduce=8, vision=1, groups=1):
 32 |         super(BasicRFB, self).__init__()
 33 |         self.scale = scale
 34 |         self.out_channels = out_planes
 35 |         inter_planes = in_planes // map_reduce
 36 | 
 37 |         self.branch0 = nn.Sequential(
 38 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 39 |                 BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
 40 |                 BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision+1, dilation=vision+1, relu=False, groups=groups)
 41 |                 )
 42 |         self.branch1 = nn.Sequential(
 43 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 44 |                 BasicConv(inter_planes, 2*inter_planes, kernel_size=(3,3), stride=stride, padding=(1,1), groups=groups),
 45 |                 BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups)
 46 |                 )
 47 |         self.branch2 = nn.Sequential(
 48 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 49 |                 BasicConv(inter_planes, (inter_planes//2)*3, kernel_size=3, stride=1, padding=1, groups=groups),
 50 |                 BasicConv((inter_planes//2)*3, 2*inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups),
 51 |                 BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups)
 52 |                 )
 53 | 
 54 |         self.ConvLinear = BasicConv(6*inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
 55 |         self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
 56 |         self.relu = nn.ReLU(inplace=False)
 57 | 
 58 |     def forward(self,x):
 59 |         x0 = self.branch0(x)
 60 |         x1 = self.branch1(x)
 61 |         x2 = self.branch2(x)
 62 | 
 63 |         out = torch.cat((x0,x1,x2),1)
 64 |         out = self.ConvLinear(out)
 65 |         short = self.shortcut(x)
 66 |         out = out*self.scale + short
 67 |         out = self.relu(out)
 68 | 
 69 |         return out
 70 | 
 71 | class Backbone(nn.Module):
 72 |     def __init__(self, bn=True):
 73 |         super(Backbone, self).__init__()
 74 | 
 75 |         self.conv1_1 = BasicConv(3,  32, kernel_size=3, padding=1, bn=bn)
 76 |         self.conv1_2 = BasicConv(32, 32, kernel_size=3, padding=1, bn=bn)
 77 |         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # 2
 78 | 
 79 |         self.conv2_1 = BasicConv(32, 64, kernel_size=3, padding=1, bn=bn)
 80 |         self.conv2_2 = BasicConv(64, 64, kernel_size=3, padding=1, bn=bn)
 81 |         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 4
 82 | 
 83 |         self.conv3_1 = BasicConv(64, 128, kernel_size=1, bn=bn)
 84 |         self.conv3_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn)
 85 |         self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=bn)  # 8
 86 | 
 87 |         self.conv4_1 = BasicConv(128, 128, kernel_size=1, bn=bn)
 88 |         self.conv4_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn)         #### f1 ####
 89 |         self.conv4_3 = BasicRFB(128,128,stride = 1,scale=1.0)
 90 |         self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)  # 16
 91 | 
 92 |         self.conv5_1 = BasicConv(128, 64, kernel_size=1, relu=False, bn=bn)
 93 |         self.conv5_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=1, bn=bn) #### f2 ####
 94 | 
 95 |         self.conv6_1 = BasicConv(128, 64, kernel_size=1, relu=False)
 96 |         self.conv6_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=2) #### f3 ####
 97 | 
 98 |         self.conv7_1 = BasicConv(128, 64, kernel_size=1, relu=False)
 99 |         self.conv7_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=2) #### f4 ####
100 | 
101 |         self.conv8_1 = BasicConv(128,64,kernel_size=1, relu=False)
102 |         self.conv8_2 = BasicConv(64,128,kernel_size=3)                        #### f5 ####
103 | 
104 |         self.conv9_1 = BasicConv(128,64,kernel_size=1, relu=False)
105 |         self.conv9_2 = BasicConv(64,64,kernel_size=3)                         #### f6 ####
106 | 
107 | 
108 |     def forward(self, x):
109 |         x = self.conv1_1(x)
110 |         x = self.conv1_2(x)
111 |         x = self.pool1(x)
112 | 
113 |         x = self.conv2_1(x)
114 |         x = self.conv2_2(x)
115 |         x = self.pool2(x)
116 | 
117 |         x = self.conv3_1(x)
118 |         x = self.conv3_2(x)
119 |         x = self.pool3(x)
120 | 
121 |         x = self.conv4_1(x)
122 |         x = self.conv4_2(x)
123 |         x = self.conv4_3(x)
124 |         f1 = x # stride = 8
125 |         x = self.pool4(x)
126 | 
127 |         x = self.conv5_1(x)
128 |         x = self.conv5_2(x)
129 |         f2 = x # stride = 16
130 | 
131 |         x = self.conv6_1(x)
132 |         x = self.conv6_2(x)
133 |         f3 = x # stride = 32
134 | 
135 |         x = self.conv7_1(x)
136 |         x = self.conv7_2(x)
137 |         f4 = x # stride = 64
138 | 
139 |         x = self.conv8_1(x)
140 |         x = self.conv8_2(x)
141 |         f5 = x # -2
142 | 
143 |         x = self.conv9_1(x)
144 |         x = self.conv9_2(x)
145 |         f6 = x # -2
146 | 
147 |         return f1, f2, f3, f4, f5, f6
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     x = torch.randn(2,3,300,300)
152 |     model = Backbone()
153 |     features = model(x)
154 | 


--------------------------------------------------------------------------------
/models/module.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import os
  5 | 
  6 | class BasicConv(nn.Module):
  7 | 
  8 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True):
  9 |         super(BasicConv, self).__init__()
 10 |         self.out_channels = out_planes
 11 |         if bn:
 12 |             self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False)
 13 |             self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
 14 |             self.relu = nn.ReLU(inplace=True) if relu else None
 15 |         else:
 16 |             self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True)
 17 |             self.bn = None
 18 |             self.relu = nn.ReLU(inplace=True) if relu else None
 19 | 
 20 |     def forward(self, x):
 21 |         x = self.conv(x)
 22 |         if self.bn is not None:
 23 |             x = self.bn(x)
 24 |         if self.relu is not None:
 25 |             x = self.relu(x)
 26 |         return x
 27 | 
 28 | 
 29 | class BasicRFB(nn.Module):
 30 | 
 31 |     def __init__(self, in_planes, out_planes, stride=1, scale = 0.1, map_reduce=8, vision=1, groups=1):
 32 |         super(BasicRFB, self).__init__()
 33 |         self.scale = scale
 34 |         self.out_channels = out_planes
 35 |         inter_planes = in_planes // map_reduce
 36 | 
 37 |         self.branch0 = nn.Sequential(
 38 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 39 |                 BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
 40 |                 BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision+1, dilation=vision+1, relu=False, groups=groups)
 41 |                 )
 42 |         self.branch1 = nn.Sequential(
 43 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 44 |                 BasicConv(inter_planes, 2*inter_planes, kernel_size=(3,3), stride=stride, padding=(1,1), groups=groups),
 45 |                 BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups)
 46 |                 )
 47 |         self.branch2 = nn.Sequential(
 48 |                 BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
 49 |                 BasicConv(inter_planes, (inter_planes//2)*3, kernel_size=3, stride=1, padding=1, groups=groups),
 50 |                 BasicConv((inter_planes//2)*3, 2*inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups),
 51 |                 BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups)
 52 |                 )
 53 | 
 54 |         self.ConvLinear = BasicConv(6*inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
 55 |         self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
 56 |         self.relu = nn.ReLU(inplace=False)
 57 | 
 58 |     def forward(self,x):
 59 |         x0 = self.branch0(x)
 60 |         x1 = self.branch1(x)
 61 |         x2 = self.branch2(x)
 62 | 
 63 |         out = torch.cat((x0,x1,x2),1)
 64 |         out = self.ConvLinear(out)
 65 |         short = self.shortcut(x)
 66 |         out = out*self.scale + short
 67 |         out = self.relu(out)
 68 | 
 69 |         return out
 70 | 
 71 | class Backbone(nn.Module):
 72 | 	
 73 |     def __init__(self, bn=True):
 74 |         super(Backbone, self).__init__()
 75 |         #self.conv1_1 = BasicConv(3,  32, kernel_size=3, padding=1, bn=bn)
 76 |         self.conv1_1 = nn.Sequential(
 77 |                 nn.Conv2d(3, 32, 3, stride=1, padding=1, bias=False),
 78 |                 nn.BatchNorm2d(32),
 79 |                 nn.ReLU(inplace=True),
 80 |         )
 81 | 
 82 |         #self.conv1_2 = BasicConv(32, 32, kernel_size=3, padding=1, bn=bn)
 83 |         self.conv1_2 = nn.Sequential(
 84 |                 nn.Conv2d(32,32, kernel_size=3, stride=1, padding=1,groups = 32, bias=False),
 85 |                 nn.BatchNorm2d(32),
 86 |                 nn.ReLU(inplace=True),
 87 | 
 88 |                 nn.Conv2d(32, 32, 1, 1, 0, bias=False),
 89 |                 nn.BatchNorm2d(32),
 90 |                 nn.ReLU(inplace=True),
 91 |         )
 92 |         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # 2
 93 | 
 94 |         #self.conv2_1 = BasicConv(32, 64, kernel_size=3, padding=1, bn=bn)
 95 |         self.conv2_1 = nn.Sequential(
 96 |                 nn.Conv2d(32,32, kernel_size=3, stride=1, padding=1,groups = 32, bias=False),
 97 |                 nn.BatchNorm2d(32),
 98 |                 nn.ReLU(inplace=True),
 99 | 
100 |                 nn.Conv2d(32, 64, 1, 1, 0, bias=False),
101 |                 nn.BatchNorm2d(64),
102 |                 nn.ReLU(inplace=True),
103 |         )
104 |         #self.conv2_2 = BasicConv(64, 64, kernel_size=3, padding=1, bn=bn)
105 |         self.conv2_2 = nn.Sequential(
106 |                 nn.Conv2d(64,64, kernel_size=3, stride=1, padding=1,groups = 64, bias=False),
107 |                 nn.BatchNorm2d(64),
108 |                 nn.ReLU(inplace=True),
109 | 
110 |                 nn.Conv2d(64, 64, 1, 1, 0, bias=False),
111 |                 nn.BatchNorm2d(64),
112 |                 nn.ReLU(inplace=True),
113 |         )
114 |         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 4
115 | 
116 |         self.conv3_1 = BasicConv(64, 128, kernel_size=1, bn=bn)
117 |         #self.conv3_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn)
118 |         self.conv3_2 = nn.Sequential(
119 |                 nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1,groups = 128, bias=False),
120 |                 nn.BatchNorm2d(128),
121 |                 nn.ReLU(inplace=True),
122 | 
123 |                 nn.Conv2d(128, 128, 1, 1, 0, bias=False),
124 |                 nn.BatchNorm2d(128),
125 |                 nn.ReLU(inplace=True),
126 |         )
127 |         self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=bn)  # 8
128 | 
129 |         self.conv4_1 = BasicConv(128, 128, kernel_size=1, bn=bn)    
130 |         #conv4_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn)         #### f1 ####
131 |         self.conv4_2 = nn.Sequential(
132 |                 nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1,groups = 128, bias=False),
133 |                 nn.BatchNorm2d(128),
134 |                 nn.ReLU(inplace=True),
135 | 
136 |                 nn.Conv2d(128, 128, 1, 1, 0, bias=False),
137 |                 nn.BatchNorm2d(128),
138 |                 nn.ReLU(inplace=True),
139 |         )
140 |         self.conv4_3 = BasicRFB(128,128,stride = 1,scale=1.0)
141 |         self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)  # 16
142 | 
143 |         self.conv5_1 = BasicConv(128, 64, kernel_size=1, relu=False, bn=bn)
144 |         #self.conv5_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=1, bn=bn) #### f2 ####
145 |         self.conv5_2 = nn.Sequential(
146 |                 nn.Conv2d(64,64, kernel_size=3, stride=1, padding=1,groups = 64, bias=False),
147 |                 nn.BatchNorm2d(64),
148 |                 nn.ReLU(inplace=True),
149 | 
150 |                 nn.Conv2d(64, 128, 1, 1, 0, bias=False),
151 |                 nn.BatchNorm2d(128),
152 |                 nn.ReLU(inplace=True),
153 |         )
154 | 		
155 |         self.conv6_1 = BasicConv(128, 64, kernel_size=1, relu=False)
156 |         #self.conv6_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=2) #### f3 ####
157 |         self.conv6_2 = nn.Sequential(
158 |                 nn.Conv2d(64,64, kernel_size=3, stride=2, padding=1,groups = 64, bias=False),
159 |                 nn.BatchNorm2d(64),
160 |                 nn.ReLU(inplace=True),
161 | 
162 |                 nn.Conv2d(64, 128, 1, 1, 0, bias=False),
163 |                 nn.BatchNorm2d(128),
164 |                 nn.ReLU(inplace=True),
165 |         )
166 | 		
167 |         self.conv7_1 = BasicConv(128, 64, kernel_size=1, relu=False)
168 |         #self.conv7_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=2) #### f4 ####
169 |         self.conv7_2 = nn.Sequential(
170 |                 nn.Conv2d(64,64, kernel_size=3, stride=2, padding=1,groups = 64, bias=False),
171 |                 nn.BatchNorm2d(64),
172 |                 nn.ReLU(inplace=True),
173 | 
174 |                 nn.Conv2d(64, 128, 1, 1, 0, bias=False),
175 |                 nn.BatchNorm2d(128),
176 |                 nn.ReLU(inplace=True),
177 |         )
178 | 
179 |         self.conv8_1 = BasicConv(128,64,kernel_size=1, relu=False)
180 |         #self.conv8_2 = BasicConv(64,128,kernel_size=3)                        #### f5 ####
181 |         self.conv8_2 = nn.Sequential(
182 |                 nn.Conv2d(64,64, kernel_size=3, stride=1, padding=0,groups = 64, bias=False),
183 |                 nn.BatchNorm2d(64),
184 |                 nn.ReLU(inplace=True),
185 | 
186 |                 nn.Conv2d(64, 128, 1, 1, 0, bias=False),
187 |                 nn.BatchNorm2d(128),
188 |                 nn.ReLU(inplace=True),
189 |         )
190 | 		
191 |         self.conv9_1 = BasicConv(128,64,kernel_size=1, relu=False)
192 |         #self.conv9_2 = BasicConv(64,64,kernel_size=3)                         #### f6 ####
193 |         self.conv9_2 = nn.Sequential(
194 |                 nn.Conv2d(64,64, kernel_size=3, stride=1, padding=0,groups = 64, bias=False),
195 |                 nn.BatchNorm2d(64),
196 |                 nn.ReLU(inplace=True),
197 | 
198 |                 nn.Conv2d(64, 64, 1, 1, 0, bias=False),
199 |                 nn.BatchNorm2d(64),
200 |                 nn.ReLU(inplace=True),
201 |         )		
202 | 		
203 | 
204 | 
205 | 
206 |     def forward(self, x):
207 |         x = self.conv1_1(x)
208 |         x = self.conv1_2(x)
209 |         x = self.pool1(x)
210 | 
211 |         x = self.conv2_1(x)
212 |         x = self.conv2_2(x)
213 |         x = self.pool2(x)
214 | 
215 |         x = self.conv3_1(x)
216 |         x = self.conv3_2(x)
217 |         x = self.pool3(x)
218 | 
219 |         x = self.conv4_1(x)
220 |         x = self.conv4_2(x)
221 |         x = self.conv4_3(x)
222 |         f1 = x # stride = 8
223 |         x = self.pool4(x)
224 | 
225 |         x = self.conv5_1(x)
226 |         x = self.conv5_2(x)
227 |         f2 = x # stride = 16
228 | 
229 |         x = self.conv6_1(x)
230 |         x = self.conv6_2(x)
231 |         f3 = x # stride = 32
232 | 
233 |         x = self.conv7_1(x)
234 |         x = self.conv7_2(x)
235 |         f4 = x # stride = 64
236 | 
237 |         x = self.conv8_1(x)
238 |         x = self.conv8_2(x)
239 |         f5 = x # -2
240 | 
241 |         x = self.conv9_1(x)
242 |         x = self.conv9_2(x)
243 |         f6 = x # -2
244 | 
245 |         return f1, f2, f3, f4, f5, f6
246 | 
247 | 
248 | if __name__ == '__main__':
249 |     x = torch.randn(2,3,300,300)
250 |     model = Backbone()
251 |     features = model(x)
252 | 


--------------------------------------------------------------------------------
/train_RFB.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | import os
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | import torch.backends.cudnn as cudnn
  8 | import torchvision.transforms as transforms
  9 | import torch.nn.init as init
 10 | import argparse
 11 | import numpy as np
 12 | from torch.autograd import Variable
 13 | import torch.utils.data as data
 14 | from data import VOCroot, VOC_Config, AnnotationTransform, VOCDetection, detection_collate, BaseTransform, preproc
 15 | from models.RFB_Net_vgg import build_net
 16 | from layers.modules import MultiBoxLoss
 17 | from layers.functions import PriorBox
 18 | import time
 19 | from datetime import datetime
 20 | from utils.visualize import *
 21 | from tensorboardX import SummaryWriter
 22 | 
 23 | parser = argparse.ArgumentParser(
 24 |     description='Receptive Field Block Net Training')
 25 | parser.add_argument('-max','--max_epoch', default=600,
 26 |                     type=int, help='max epoch for retraining')
 27 | parser.add_argument('-b', '--batch_size', default=32,
 28 |                     type=int, help='Batch size for training')
 29 | parser.add_argument('--ngpu', default=2, type=int, help='gpus')
 30 | parser.add_argument('--lr', '--learning-rate',
 31 |                     default=0.08, type=float, help='initial learning rate')
 32 | parser.add_argument('--save_folder', default='./weights/',
 33 |                     help='Location to save checkpoint models')
 34 | args = parser.parse_args()
 35 | 
 36 | if not os.path.exists(args.save_folder):
 37 |     os.mkdir(args.save_folder)
 38 | 
 39 | img_dim = 300
 40 | p = 0.5
 41 | train_sets = [('2007', 'person_trainval')]
 42 | cfg = VOC_Config
 43 | rgb_means = (104, 117, 123)
 44 | batch_size = args.batch_size
 45 | 
 46 | # tensorboard log directory
 47 | # LOG_DIR = 'runs'
 48 | log_path = os.path.join('runs', datetime.now().isoformat())
 49 | if not os.path.exists(log_path):
 50 |     os.makedirs(log_path)
 51 | writer = SummaryWriter(log_dir=log_path)
 52 | 
 53 | 
 54 | net = build_net('train', img_dim, num_classes=3)
 55 | 
 56 | if args.ngpu > 1:
 57 |     net = torch.nn.DataParallel(net)
 58 | 
 59 | net.cuda()
 60 | cudnn.benchmark = True
 61 | 
 62 | optimizer = optim.SGD(net.parameters(), lr=args.lr,
 63 |                       momentum=0.9, weight_decay=1e-4)
 64 | 
 65 | criterion = MultiBoxLoss(num_classes=3,
 66 |                          overlap_thresh=0.4,
 67 |                          prior_for_matching=True,
 68 |                          bkg_label=0,
 69 |                          neg_mining=True,
 70 |                          neg_pos=3,
 71 |                          neg_overlap=0.3,
 72 |                          encode_target=False)
 73 | 
 74 | priorbox = PriorBox(cfg)
 75 | with torch.no_grad():
 76 |     priors = priorbox.forward()
 77 |     priors = priors.cuda()
 78 | 
 79 | 
 80 | def train():
 81 |     net.train()
 82 |     # loss counters
 83 |     loc_loss = 0  # epoch
 84 |     conf_loss = 0
 85 |     epoch = 0
 86 |     print('Loading Dataset...')
 87 | 
 88 |     dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform())
 89 | 
 90 |     epoch_size = len(dataset) // args.batch_size
 91 |     max_iter = args.max_epoch * epoch_size
 92 | 
 93 |     stepvalues = (250 * epoch_size, 350 * epoch_size, 500 * epoch_size)
 94 |     step_index = 0
 95 |     start_iter = 0
 96 | # wangsong sing a song!
 97 |     lr = args.lr
 98 |     for iteration in range(start_iter, max_iter):
 99 |         if iteration % epoch_size == 0:
100 |             if (epoch > 10 and epoch % 10 == 0) or (epoch > 105 and epoch % 2 == 0):
101 |                 torch.save(net.state_dict(), args.save_folder + 'epoches_' +
102 |                            repr(epoch).zfill(3) + '.pth')
103 |             # create batch iterator
104 |             batch_iterator = iter(data.DataLoader(dataset, batch_size,
105 |                                                   shuffle=True, num_workers=8, collate_fn=detection_collate))
106 |             loc_loss = 0
107 |             conf_loss = 0
108 |             epoch += 1
109 | 
110 |         load_t0 = time.time()
111 |         if iteration in stepvalues:
112 |             step_index += 1
113 |         lr = adjust_learning_rate(optimizer, 0.2, epoch, step_index, iteration, epoch_size)
114 | 
115 | 
116 |         images, targets = next(batch_iterator)
117 | 
118 |         images = Variable(images.cuda())
119 |         targets = [Variable(anno.cuda()) for anno in targets]
120 | 
121 |         # forward
122 |         t0 = time.time()
123 |         out = net(images)
124 |         # backprop
125 |         optimizer.zero_grad()
126 |         loss_l, loss_c = criterion(out, priors, targets)
127 |         loss = loss_l + loss_c
128 |         loss.backward()
129 |         optimizer.step()
130 |         t1 = time.time()
131 |         loc_loss += loss_l.item()
132 |         conf_loss += loss_c.item()
133 |         load_t1 = time.time()
134 | 
135 |         # visualization
136 |         visualize_total_loss(writer, loss.item(), iteration)
137 |         visualize_loc_loss(writer, loss_l.item(), iteration)
138 |         visualize_conf_loss(writer, loss_c.item(), iteration)
139 | 
140 |         if iteration % 10 == 0:
141 |             print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size)
142 |                   + '|| Totel iter ' +
143 |                   repr(iteration) + ' || L: %.4f C: %.4f||' % (
144 |                 loss_l.item(),loss_c.item()) +
145 |                 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr))
146 | 
147 |     torch.save(net.state_dict(), args.save_folder + 'epoches_' +
148 |                repr(epoch).zfill(3) + '.pth')
149 | 
150 | 
151 | def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size):
152 |     """Sets the learning rate
153 |     # Adapted from PyTorch Imagenet example:
154 |     # https://github.com/pytorch/examples/blob/master/imagenet/main.py
155 |     """
156 |     if epoch < 11:
157 |         lr = 1e-8 + (args.lr-1e-8) * iteration / (epoch_size * 10)
158 |     else:
159 |         lr = args.lr * (gamma ** (step_index))
160 |     for param_group in optimizer.param_groups:
161 |         param_group['lr'] = lr
162 |     return lr
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     train()
167 | 


--------------------------------------------------------------------------------
/utils/box_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import math
  4 | import numpy as np
  5 | if torch.cuda.is_available():
  6 |     import torch.backends.cudnn as cudnn
  7 | 
  8 | 
  9 | def point_form(boxes):
 10 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
 11 |     representation for comparison to point form ground truth data.
 12 |     Args:
 13 |         boxes: (tensor) center-size default boxes from priorbox layers.
 14 |     Return:
 15 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 16 |     """
 17 |     return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
 18 |                      boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax
 19 | 
 20 | 
 21 | def center_size(boxes):
 22 |     """ Convert prior_boxes to (cx, cy, w, h)
 23 |     representation for comparison to center-size form ground truth data.
 24 |     Args:
 25 |         boxes: (tensor) point_form boxes
 26 |     Return:
 27 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 28 |     """
 29 |     return torch.cat((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
 30 |                      boxes[:, 2:] - boxes[:, :2], 1)  # w, h
 31 | 
 32 | 
 33 | def intersect(box_a, box_b):
 34 |     """ We resize both tensors to [A,B,2] without new malloc:
 35 |     [A,2] -> [A,1,2] -> [A,B,2]
 36 |     [B,2] -> [1,B,2] -> [A,B,2]
 37 |     Then we compute the area of intersect between box_a and box_b.
 38 |     Args:
 39 |       box_a: (tensor) bounding boxes, Shape: [A,4].
 40 |       box_b: (tensor) bounding boxes, Shape: [B,4].
 41 |     Return:
 42 |       (tensor) intersection area, Shape: [A,B].
 43 |     """
 44 |     A = box_a.size(0)
 45 |     B = box_b.size(0)
 46 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
 47 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
 48 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
 49 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
 50 |     inter = torch.clamp((max_xy - min_xy), min=0)
 51 |     return inter[:, :, 0] * inter[:, :, 1]
 52 | 
 53 | 
 54 | def jaccard(box_a, box_b):
 55 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 56 |     is simply the intersection over union of two boxes.  Here we operate on
 57 |     ground truth boxes and default boxes.
 58 |     E.g.:
 59 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 60 |     Args:
 61 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
 62 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
 63 |     Return:
 64 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
 65 |     """
 66 |     inter = intersect(box_a, box_b)
 67 |     area_a = ((box_a[:, 2]-box_a[:, 0]) *
 68 |               (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
 69 |     area_b = ((box_b[:, 2]-box_b[:, 0]) *
 70 |               (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
 71 |     union = area_a + area_b - inter
 72 |     return inter / union  # [A,B]
 73 | 
 74 | def matrix_iou(a,b):
 75 |     """
 76 |     return iou of a and b, numpy version for data augenmentation
 77 |     """
 78 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
 79 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
 80 | 
 81 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 82 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 83 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
 84 |     return area_i / (area_a[:, np.newaxis] + area_b - area_i)
 85 | 
 86 | 
 87 | def matrix_iof(a, b):
 88 |     """
 89 |     return iof of a and b, numpy version for data augenmentation
 90 |     """
 91 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
 92 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
 93 | 
 94 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 95 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 96 |     return area_i / np.maximum(area_a[:, np.newaxis], 1)
 97 | 
 98 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
 99 |     """Match each prior box with the ground truth box of the highest jaccard
100 |     overlap, encode the bounding boxes, then return the matched indices
101 |     corresponding to both confidence and location preds.
102 |     Args:
103 |         threshold: (float) The overlap threshold used when mathing boxes.
104 |         truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
105 |         priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
106 |         variances: (tensor) Variances corresponding to each prior coord,
107 |             Shape: [num_priors, 4].
108 |         labels: (tensor) All the class labels for the image, Shape: [num_obj].
109 |         loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
110 |         conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
111 |         idx: (int) current batch index
112 |     Return:
113 |         The matched indices corresponding to 1)location and 2)confidence preds.
114 |     """
115 |     # jaccard index
116 |     overlaps = jaccard(
117 |         truths,
118 |         point_form(priors)
119 |     )
120 |     # (Bipartite Matching)
121 |     # [1,num_objects] best prior for each ground truth
122 |     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
123 |     # [1,num_priors] best ground truth for each prior
124 |     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
125 |     best_truth_idx.squeeze_(0)
126 |     best_truth_overlap.squeeze_(0)
127 |     best_prior_idx.squeeze_(1)
128 |     best_prior_overlap.squeeze_(1)
129 |     best_truth_overlap.index_fill_(0, best_prior_idx, 2)  # ensure best prior
130 |     # TODO refactor: index  best_prior_idx with long tensor
131 |     # ensure every gt matches with its prior of max overlap
132 |     for j in range(best_prior_idx.size(0)):
133 |         best_truth_idx[best_prior_idx[j]] = j
134 |     matches = truths[best_truth_idx]          # Shape: [num_priors,4]
135 |     conf = labels[best_truth_idx]          # Shape: [num_priors]
136 |     conf[best_truth_overlap < threshold] = 0  # label as background
137 |     loc = encode(matches, priors, variances)
138 |     loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
139 |     conf_t[idx] = conf  # [num_priors] top class label for each prior
140 | 
141 | def encode(matched, priors, variances):
142 |     """Encode the variances from the priorbox layers into the ground truth boxes
143 |     we have matched (based on jaccard overlap) with the prior boxes.
144 |     Args:
145 |         matched: (tensor) Coords of ground truth for each prior in point-form
146 |             Shape: [num_priors, 4].
147 |         priors: (tensor) Prior boxes in center-offset form
148 |             Shape: [num_priors,4].
149 |         variances: (list[float]) Variances of priorboxes
150 |     Return:
151 |         encoded boxes (tensor), Shape: [num_priors, 4]
152 |     """
153 | 
154 |     # dist b/t match center and prior's center
155 |     g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
156 |     # encode variance
157 |     g_cxcy /= (variances[0] * priors[:, 2:])
158 |     # match wh / prior wh
159 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
160 |     g_wh = torch.log(g_wh) / variances[1]
161 |     # return target for smooth_l1_loss
162 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
163 | 
164 | 
165 | def encode_multi(matched, priors, offsets, variances):
166 |     """Encode the variances from the priorbox layers into the ground truth boxes
167 |     we have matched (based on jaccard overlap) with the prior boxes.
168 |     Args:
169 |         matched: (tensor) Coords of ground truth for each prior in point-form
170 |             Shape: [num_priors, 4].
171 |         priors: (tensor) Prior boxes in center-offset form
172 |             Shape: [num_priors,4].
173 |         variances: (list[float]) Variances of priorboxes
174 |     Return:
175 |         encoded boxes (tensor), Shape: [num_priors, 4]
176 |     """
177 | 
178 |     # dist b/t match center and prior's center
179 |     g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] - offsets[:,:2]
180 |     # encode variance
181 |     #g_cxcy /= (variances[0] * priors[:, 2:])
182 |     g_cxcy.div_(variances[0] * offsets[:, 2:])
183 |     # match wh / prior wh
184 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
185 |     g_wh = torch.log(g_wh) / variances[1]
186 |     # return target for smooth_l1_loss
187 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
188 | 
189 | # Adapted from https://github.com/Hakuyume/chainer-ssd
190 | def decode(loc, priors, variances):
191 |     """Decode locations from predictions using priors to undo
192 |     the encoding we did for offset regression at train time.
193 |     Args:
194 |         loc (tensor): location predictions for loc layers,
195 |             Shape: [num_priors,4]
196 |         priors (tensor): Prior boxes in center-offset form.
197 |             Shape: [num_priors,4].
198 |         variances: (list[float]) Variances of priorboxes
199 |     Return:
200 |         decoded bounding box predictions
201 |     """
202 | 
203 |     boxes = torch.cat((
204 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
205 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
206 |     boxes[:, :2] -= boxes[:, 2:] / 2
207 |     boxes[:, 2:] += boxes[:, :2]
208 |     return boxes
209 | 
210 | def decode_multi(loc, priors, offsets, variances):
211 |     """Decode locations from predictions using priors to undo
212 |     the encoding we did for offset regression at train time.
213 |     Args:
214 |         loc (tensor): location predictions for loc layers,
215 |             Shape: [num_priors,4]
216 |         priors (tensor): Prior boxes in center-offset form.
217 |             Shape: [num_priors,4].
218 |         variances: (list[float]) Variances of priorboxes
219 |     Return:
220 |         decoded bounding box predictions
221 |     """
222 | 
223 |     boxes = torch.cat((
224 |         priors[:, :2] + offsets[:,:2]+ loc[:, :2] * variances[0] * offsets[:, 2:],
225 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
226 |     boxes[:, :2] -= boxes[:, 2:] / 2
227 |     boxes[:, 2:] += boxes[:, :2]
228 |     return boxes
229 | 
230 | def log_sum_exp(x):
231 |     """Utility function for computing log_sum_exp while determining
232 |     This will be used to determine unaveraged confidence loss across
233 |     all examples in a batch.
234 |     Args:
235 |         x (Variable(tensor)): conf_preds from conf layers
236 |     """
237 |     x_max = x.data.max()
238 |     return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
239 | 
240 | 
241 | # Original author: Francisco Massa:
242 | # https://github.com/fmassa/object-detection.torch
243 | # Ported to PyTorch by Max deGroot (02/01/2017)
244 | def nms(boxes, scores, overlap=0.5, top_k=200):
245 |     """Apply non-maximum suppression at test time to avoid detecting too many
246 |     overlapping bounding boxes for a given object.
247 |     Args:
248 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
249 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
250 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
251 |         top_k: (int) The Maximum number of box preds to consider.
252 |     Return:
253 |         The indices of the kept boxes with respect to num_priors.
254 |     """
255 | 
256 |     keep = torch.Tensor(scores.size(0)).fill_(0).long()
257 |     if boxes.numel() == 0:
258 |         return keep
259 |     x1 = boxes[:, 0]
260 |     y1 = boxes[:, 1]
261 |     x2 = boxes[:, 2]
262 |     y2 = boxes[:, 3]
263 |     area = torch.mul(x2 - x1, y2 - y1)
264 |     v, idx = scores.sort(0)  # sort in ascending order
265 |     # I = I[v >= 0.01]
266 |     idx = idx[-top_k:]  # indices of the top-k largest vals
267 |     xx1 = boxes.new()
268 |     yy1 = boxes.new()
269 |     xx2 = boxes.new()
270 |     yy2 = boxes.new()
271 |     w = boxes.new()
272 |     h = boxes.new()
273 | 
274 |     # keep = torch.Tensor()
275 |     count = 0
276 |     while idx.numel() > 0:
277 |         i = idx[-1]  # index of current largest val
278 |         # keep.append(i)
279 |         keep[count] = i
280 |         count += 1
281 |         if idx.size(0) == 1:
282 |             break
283 |         idx = idx[:-1]  # remove kept element from view
284 |         # load bboxes of next highest vals
285 |         torch.index_select(x1, 0, idx, out=xx1)
286 |         torch.index_select(y1, 0, idx, out=yy1)
287 |         torch.index_select(x2, 0, idx, out=xx2)
288 |         torch.index_select(y2, 0, idx, out=yy2)
289 |         # store element-wise max with next highest score
290 |         xx1 = torch.clamp(xx1, min=x1[i])
291 |         yy1 = torch.clamp(yy1, min=y1[i])
292 |         xx2 = torch.clamp(xx2, max=x2[i])
293 |         yy2 = torch.clamp(yy2, max=y2[i])
294 |         w.resize_as_(xx2)
295 |         h.resize_as_(yy2)
296 |         w = xx2 - xx1
297 |         h = yy2 - yy1
298 |         # check sizes of xx1 and xx2.. after each iteration
299 |         w = torch.clamp(w, min=0.0)
300 |         h = torch.clamp(h, min=0.0)
301 |         inter = w*h
302 |         # IoU = i / (area(a) + area(b) - i)
303 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
304 |         union = (rem_areas - inter) + area[i]
305 |         IoU = inter/union  # store result in iou
306 |         # keep only elements with an IoU <= overlap
307 |         idx = idx[IoU.le(overlap)]
308 |     return keep, count
309 | 
310 | 
311 | 


--------------------------------------------------------------------------------
/utils/build.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 19 |     for dir in path.split(os.pathsep):
 20 |         binpath = pjoin(dir, name)
 21 |         if os.path.exists(binpath):
 22 |             return os.path.abspath(binpath)
 23 |     return None
 24 | 
 25 | 
 26 | def locate_cuda():
 27 |     """Locate the CUDA environment on the system
 28 | 
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 | 
 32 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 33 |     is based on finding 'nvcc' in the PATH.
 34 |     """
 35 | 
 36 |     # first check if the CUDAHOME env variable is in use
 37 |     if 'CUDAHOME' in os.environ:
 38 |         home = os.environ['CUDAHOME']
 39 |         nvcc = pjoin(home, 'bin', 'nvcc')
 40 |     else:
 41 |         # otherwise, search the PATH for NVCC
 42 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 43 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 44 |         if nvcc is None:
 45 |             raise EnvironmentError('The nvcc binary could not be '
 46 |                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 47 |         home = os.path.dirname(os.path.dirname(nvcc))
 48 | 
 49 |     cudaconfig = {'home': home, 'nvcc': nvcc,
 50 |                   'include': pjoin(home, 'include'),
 51 |                   'lib64': pjoin(home, 'lib64')}
 52 |     for k, v in cudaconfig.items():
 53 |         if not os.path.exists(v):
 54 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 55 | 
 56 |     return cudaconfig
 57 | 
 58 | 
 59 | CUDA = locate_cuda()
 60 | 
 61 | # Obtain the numpy include directory.  This logic works across numpy versions.
 62 | try:
 63 |     numpy_include = np.get_include()
 64 | except AttributeError:
 65 |     numpy_include = np.get_numpy_include()
 66 | 
 67 | 
 68 | def customize_compiler_for_nvcc(self):
 69 |     """inject deep into distutils to customize how the dispatch
 70 |     to gcc/nvcc works.
 71 | 
 72 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 73 |     injected in, and still have the right customizations (i.e.
 74 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 75 |     the OO route, I have this. Note, it's kindof like a wierd functional
 76 |     subclassing going on."""
 77 | 
 78 |     # tell the compiler it can processes .cu
 79 |     self.src_extensions.append('.cu')
 80 | 
 81 |     # save references to the default compiler_so and _comple methods
 82 |     default_compiler_so = self.compiler_so
 83 |     super = self._compile
 84 | 
 85 |     # now redefine the _compile method. This gets executed for each
 86 |     # object but distutils doesn't have the ability to change compilers
 87 |     # based on source extension: we add it.
 88 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 89 |         print(extra_postargs)
 90 |         if os.path.splitext(src)[1] == '.cu':
 91 |             # use the cuda for .cu files
 92 |             self.set_executable('compiler_so', CUDA['nvcc'])
 93 |             # use only a subset of the extra_postargs, which are 1-1 translated
 94 |             # from the extra_compile_args in the Extension class
 95 |             postargs = extra_postargs['nvcc']
 96 |         else:
 97 |             postargs = extra_postargs['gcc']
 98 | 
 99 |         super(obj, src, ext, cc_args, postargs, pp_opts)
100 |         # reset the default compiler_so, which we might have changed for cuda
101 |         self.compiler_so = default_compiler_so
102 | 
103 |     # inject our redefined _compile method into the class
104 |     self._compile = _compile
105 | 
106 | 
107 | # run the customize_compiler
108 | class custom_build_ext(build_ext):
109 |     def build_extensions(self):
110 |         customize_compiler_for_nvcc(self.compiler)
111 |         build_ext.build_extensions(self)
112 | 
113 | 
114 | ext_modules = [
115 |     Extension(
116 |         "nms.cpu_nms",
117 |         ["nms/cpu_nms.pyx"],
118 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 |         include_dirs=[numpy_include]
120 |     ),
121 |     Extension('nms.gpu_nms',
122 |               ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
123 |               library_dirs=[CUDA['lib64']],
124 |               libraries=['cudart'],
125 |               language='c++',
126 |               runtime_library_dirs=[CUDA['lib64']],
127 |               # this syntax is specific to this build system
128 |               # we're only going to use certain compiler args with nvcc and not with gcc
129 |               # the implementation of this trick is in customize_compiler() below
130 |               extra_compile_args={'gcc': ["-Wno-unused-function"],
131 |                                   'nvcc': ['-arch=sm_52',
132 |                                            '--ptxas-options=-v',
133 |                                            '-c',
134 |                                            '--compiler-options',
135 |                                            "'-fPIC'"]},
136 |               include_dirs=[numpy_include, CUDA['include']]
137 |               ),
138 | ]
139 | 
140 | setup(
141 |     name='mot_utils',
142 |     ext_modules=ext_modules,
143 |     # inject our custom trigger
144 |     cmdclass={'build_ext': custom_build_ext},
145 | )
146 | 


--------------------------------------------------------------------------------
/utils/build/temp.linux-aarch64-3.6/nms/cpu_nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-aarch64-3.6/nms/cpu_nms.o


--------------------------------------------------------------------------------
/utils/build/temp.linux-aarch64-3.6/nms/gpu_nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-aarch64-3.6/nms/gpu_nms.o


--------------------------------------------------------------------------------
/utils/build/temp.linux-aarch64-3.6/nms/nms_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-aarch64-3.6/nms/nms_kernel.o


--------------------------------------------------------------------------------
/utils/build/temp.linux-x86_64-3.6/nms/cpu_nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-x86_64-3.6/nms/cpu_nms.o


--------------------------------------------------------------------------------
/utils/build/temp.linux-x86_64-3.6/nms/gpu_nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-x86_64-3.6/nms/gpu_nms.o


--------------------------------------------------------------------------------
/utils/build/temp.linux-x86_64-3.6/nms/nms_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-x86_64-3.6/nms/nms_kernel.o


--------------------------------------------------------------------------------
/utils/nms/cpu_nms.cpython-36m-aarch64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/nms/cpu_nms.cpython-36m-aarch64-linux-gnu.so


--------------------------------------------------------------------------------
/utils/nms/cpu_nms.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/nms/cpu_nms.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
 71 |     cdef unsigned int N = boxes.shape[0]
 72 |     cdef float iw, ih, box_area
 73 |     cdef float ua
 74 |     cdef int pos = 0
 75 |     cdef float maxscore = 0
 76 |     cdef int maxpos = 0
 77 |     cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
 78 | 
 79 |     for i in range(N):
 80 |         maxscore = boxes[i, 4]
 81 |         maxpos = i
 82 | 
 83 |         tx1 = boxes[i,0]
 84 |         ty1 = boxes[i,1]
 85 |         tx2 = boxes[i,2]
 86 |         ty2 = boxes[i,3]
 87 |         ts = boxes[i,4]
 88 | 
 89 |         pos = i + 1
 90 | 	# get max box
 91 |         while pos < N:
 92 |             if maxscore < boxes[pos, 4]:
 93 |                 maxscore = boxes[pos, 4]
 94 |                 maxpos = pos
 95 |             pos = pos + 1
 96 | 
 97 | 	# add max box as a detection 
 98 |         boxes[i,0] = boxes[maxpos,0]
 99 |         boxes[i,1] = boxes[maxpos,1]
100 |         boxes[i,2] = boxes[maxpos,2]
101 |         boxes[i,3] = boxes[maxpos,3]
102 |         boxes[i,4] = boxes[maxpos,4]
103 | 
104 | 	# swap ith box with position of max box
105 |         boxes[maxpos,0] = tx1
106 |         boxes[maxpos,1] = ty1
107 |         boxes[maxpos,2] = tx2
108 |         boxes[maxpos,3] = ty2
109 |         boxes[maxpos,4] = ts
110 | 
111 |         tx1 = boxes[i,0]
112 |         ty1 = boxes[i,1]
113 |         tx2 = boxes[i,2]
114 |         ty2 = boxes[i,3]
115 |         ts = boxes[i,4]
116 | 
117 |         pos = i + 1
118 | 	# NMS iterations, note that N changes if detection boxes fall below threshold
119 |         while pos < N:
120 |             x1 = boxes[pos, 0]
121 |             y1 = boxes[pos, 1]
122 |             x2 = boxes[pos, 2]
123 |             y2 = boxes[pos, 3]
124 |             s = boxes[pos, 4]
125 | 
126 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
127 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
128 |             if iw > 0:
129 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
130 |                 if ih > 0:
131 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
132 |                     ov = iw * ih / ua #iou between max box and detection box
133 | 
134 |                     if method == 1: # linear
135 |                         if ov > Nt: 
136 |                             weight = 1 - ov
137 |                         else:
138 |                             weight = 1
139 |                     elif method == 2: # gaussian
140 |                         weight = np.exp(-(ov * ov)/sigma)
141 |                     else: # original NMS
142 |                         if ov > Nt: 
143 |                             weight = 0
144 |                         else:
145 |                             weight = 1
146 | 
147 |                     boxes[pos, 4] = weight*boxes[pos, 4]
148 | 		    
149 | 		    # if box score falls below threshold, discard the box by swapping with last box
150 | 		    # update N
151 |                     if boxes[pos, 4] < threshold:
152 |                         boxes[pos,0] = boxes[N-1, 0]
153 |                         boxes[pos,1] = boxes[N-1, 1]
154 |                         boxes[pos,2] = boxes[N-1, 2]
155 |                         boxes[pos,3] = boxes[N-1, 3]
156 |                         boxes[pos,4] = boxes[N-1, 4]
157 |                         N = N - 1
158 |                         pos = pos - 1
159 | 
160 |             pos = pos + 1
161 | 
162 |     keep = [i for i in range(N)]
163 |     return keep
164 | 


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.cpython-36m-aarch64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/nms/gpu_nms.cpython-36m-aarch64-linux-gnu.so


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/utils/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/utils/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from .nms.cpu_nms import cpu_nms, cpu_soft_nms
 9 | from .nms.gpu_nms import gpu_nms
10 | 
11 | 
12 | # def nms(dets, thresh, force_cpu=False):
13 | #     """Dispatch to either CPU or GPU NMS implementations."""
14 | #
15 | #     if dets.shape[0] == 0:
16 | #         return []
17 | #     if cfg.USE_GPU_NMS and not force_cpu:
18 | #         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | #     else:
20 | #         return cpu_nms(dets, thresh)
21 | 
22 | 
23 | def nms(dets, thresh, force_cpu=False):
24 |     """Dispatch to either CPU or GPU NMS implementations."""
25 | 
26 |     if dets.shape[0] == 0:
27 |         return []
28 |     if force_cpu:
29 |         #return cpu_soft_nms(dets, thresh, method = 0)
30 |         return cpu_nms(dets, thresh)
31 |     return gpu_nms(dets, thresh)
32 | 


--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |     def tic(self):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         self.start_time = time.time()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.time() - self.start_time
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 |         self.average_time = self.total_time / self.calls
30 |         if average:
31 |             return self.average_time
32 |         else:
33 |             return self.diff
34 | 
35 |     def clear(self):
36 |         self.total_time = 0.
37 |         self.calls = 0
38 |         self.start_time = 0.
39 |         self.diff = 0.
40 |         self.average_time = 0.
41 | 


--------------------------------------------------------------------------------
/utils/visualize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from torch.utils.data import DataLoader
 9 | from torch.autograd import Variable
10 | from termcolor import cprint
11 | 
12 | def print_info(info, _type=None):
13 |     if _type is not None:
14 |         if isinstance(info,str):
15 |             cprint(info, _type[0], attrs=[_type[1]])
16 |         elif isinstance(info,list):
17 |             for i in range(info):
18 |                 cprint(i, _type[0], attrs=[_type[1]])
19 |     else:
20 |         print(info)
21 | 
22 | def get_lastlayer_params(net):
23 |     """get last trainable layer of a net
24 |     Args:
25 |         network architectur
26 | 
27 |     Returns:
28 |         last layer weights and last layer bias
29 |     """
30 |     last_layer_weights = None
31 |     last_layer_bias = None
32 |     for name, para in net.named_parameters():
33 |         if 'weight' in name:
34 |             last_layer_weights = para
35 |         if 'bias' in name:
36 |             last_layer_bias = para
37 | 
38 |     return last_layer_weights, last_layer_bias
39 | 
40 | 
41 | def visualize_network(writer, net):
42 |     """visualize network architecture"""
43 |     input_tensor = torch.Tensor(3, 3, 512, 512)
44 |     input_tensor = input_tensor.to(next(net.parameters()))
45 |     writer.add_graph(net, Variable(input_tensor, requires_grad=True))
46 | 
47 | 
48 | def visualize_lastlayer(writer, net, n_iter):
49 |     """visualize last layer grads"""
50 |     weights, bias = get_lastlayer_params(net)
51 |     writer.add_scalar('LastLayerGradients/grad_norm2_weights', weights.grad.norm(), n_iter)
52 |     writer.add_scalar('LastLayerGradients/grad_norm2_bias', bias.grad.norm(), n_iter)
53 | 
54 | 
55 | def visualize_total_loss(writer, loss, n_iter):
56 |     """visualize training loss"""
57 |     writer.add_scalar('Train/total_loss', loss, n_iter)
58 | 
59 | def visualize_loc_loss(writer, loss, n_iter):
60 |     """visualize training loss"""
61 |     writer.add_scalar('Train/loc_loss', loss, n_iter)
62 | 
63 | def visualize_conf_loss(writer, loss, n_iter):
64 |     """visualize training loss"""
65 |     writer.add_scalar('Train/conf_loss', loss, n_iter)
66 | 
67 | def visualize_param_hist(writer, net, epoch):
68 |     """visualize histogram of params"""
69 |     for name, param in net.named_parameters():
70 |         layer, attr = os.path.splitext(name)
71 |         attr = attr[1:]
72 |         writer.add_histogram("{}/{}".format(layer, attr), param, epoch)
73 | 
74 | 
75 | def visualize_test_acc(writer, acc, epoch):
76 |     """visualize test acc"""
77 |     writer.add_scalar('Test/AP', acc, epoch)
78 | 


--------------------------------------------------------------------------------
/weights/epoches_100.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/weights/epoches_100.pth


--------------------------------------------------------------------------------