├── README.md ├── data ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── config.cpython-36.pyc │ ├── data_augment.cpython-36.pyc │ ├── voc0712.cpython-36.pyc │ └── voc_eval.cpython-36.pyc ├── config.py ├── data_augment.py ├── voc0712.py └── voc_eval.py ├── demo.py ├── layers ├── __init__.py ├── functions │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── detection.cpython-36.pyc │ │ └── prior_box.cpython-36.pyc │ ├── detection.py │ └── prior_box.py └── modules │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── multibox_loss.cpython-36.pyc │ └── multibox_loss.py ├── make.sh ├── models ├── RFB_Net_vgg-1.py ├── RFB_Net_vgg.py ├── __pycache__ │ ├── RFB_Net_vgg.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ └── module.cpython-36.pyc ├── module-1.py ├── module-3.py └── module.py ├── train_RFB.py ├── utils ├── box_utils.py ├── build.py ├── build │ ├── temp.linux-aarch64-3.6 │ │ └── nms │ │ │ ├── cpu_nms.o │ │ │ ├── gpu_nms.o │ │ │ └── nms_kernel.o │ └── temp.linux-x86_64-3.6 │ │ └── nms │ │ ├── cpu_nms.o │ │ ├── gpu_nms.o │ │ └── nms_kernel.o ├── nms │ ├── cpu_nms.c │ ├── cpu_nms.cpython-36m-aarch64-linux-gnu.so │ ├── cpu_nms.cpython-36m-x86_64-linux-gnu.so │ ├── cpu_nms.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.cpython-36m-aarch64-linux-gnu.so │ ├── gpu_nms.cpython-36m-x86_64-linux-gnu.so │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py ├── nms_wrapper.py ├── timer.py └── visualize.py └── weights └── epoches_100.pth /README.md: -------------------------------------------------------------------------------- 1 | # fast-object-detection-nano 2 | # 程序在nano上面的安装教程 3 | ## 1. 首先需要在nano上面配置pytorch 4 | https://blog.csdn.net/donkey_1993/article/details/102794617 5 | ## 2. 然后需要编译pytorch的torch2trt使用tensorrt加速 6 | https://github.com/NVIDIA-AI-IOT/torch2trt 7 | ## 3. 下载本工程,然后运行make.sh编译工程 8 | sudo bash make.sh 9 | ## 4. 修改demo.py里面的视频路径。 10 | sudo python3 demo.py 就可以跑起来了 11 | 12 | # 训练代码 13 | ## 直接使用的是voc2007的数据集。 14 | sudo python3 train_RFB.py 15 | 16 | # 感谢下面两位作者。 17 | https://github.com/ruinmessi/RFBNet 18 | https://github.com/songwsx/RFB-Person 19 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | # from .voc import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES 2 | from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES 3 | from .data_augment import * 4 | from .config import * 5 | -------------------------------------------------------------------------------- /data/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /data/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /data/__pycache__/data_augment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/data_augment.cpython-36.pyc -------------------------------------------------------------------------------- /data/__pycache__/voc0712.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/voc0712.cpython-36.pyc -------------------------------------------------------------------------------- /data/__pycache__/voc_eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/data/__pycache__/voc_eval.cpython-36.pyc -------------------------------------------------------------------------------- /data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | import os.path 3 | 4 | 5 | VOCroot = '/home/common/wangsong/VOC/VOCdevkit' 6 | 7 | #RFB CONFIGS 8 | VOC_Config = { 9 | 'feature_maps' : [38, 19, 10, 5, 3, 1], 10 | 11 | 'min_dim' : 300, 12 | 13 | 'steps' : [8, 16, 32, 64, 100, 300], 14 | 15 | 'min_sizes' : [26, 60, 111, 162, 213, 264], 16 | 17 | 'max_sizes' : [60, 111, 162, 213, 264, 315], 18 | 19 | 'aspect_ratios' : [[0.4, 1.0, 1.5], 20 | [0.4, 1.0, 1.6], 21 | [0.5, 1.1, 1.6], 22 | [0.5, 1.1, 1.6], 23 | [0.5, 1.1, 1.6], 24 | [0.7, 1.4]], 25 | 26 | 'max_ratios' : [0.8, 0.8, 0.8, 0.9, 1, 1], 27 | 28 | 'variance' : [0.1, 0.2], 29 | 30 | 'clip' : True, 31 | } 32 | 33 | 34 | -------------------------------------------------------------------------------- /data/data_augment.py: -------------------------------------------------------------------------------- 1 | """Data augmentation functionality. Passed as callable transformations to 2 | Dataset classes. 3 | 4 | The data augmentation procedures were interpreted from @weiliu89's SSD paper 5 | http://arxiv.org/abs/1512.02325 6 | """ 7 | 8 | import torch 9 | from torchvision import transforms 10 | import cv2 11 | import numpy as np 12 | import random 13 | import math 14 | from utils.box_utils import matrix_iou 15 | # import torch_transforms 16 | 17 | def _crop(image, boxes, labels): 18 | height, width, _ = image.shape 19 | 20 | if len(boxes)== 0: 21 | return image, boxes, labels 22 | 23 | while True: 24 | mode = random.choice(( 25 | None, 26 | (0.1, None), 27 | (0.3, None), 28 | (0.5, None), 29 | (0.7, None), 30 | (0.9, None), 31 | (None, None), 32 | )) 33 | 34 | if mode is None: 35 | return image, boxes, labels 36 | 37 | min_iou, max_iou = mode 38 | if min_iou is None: 39 | min_iou = float('-inf') 40 | if max_iou is None: 41 | max_iou = float('inf') 42 | 43 | for _ in range(50): 44 | scale = random.uniform(0.3,1.) 45 | min_ratio = max(0.5, scale*scale) 46 | max_ratio = min(2, 1. / scale / scale) 47 | ratio = math.sqrt(random.uniform(min_ratio, max_ratio)) 48 | w = int(scale * ratio * width) 49 | h = int((scale / ratio) * height) 50 | 51 | 52 | l = random.randrange(width - w) 53 | t = random.randrange(height - h) 54 | roi = np.array((l, t, l + w, t + h)) 55 | 56 | iou = matrix_iou(boxes, roi[np.newaxis]) 57 | 58 | if not (min_iou <= iou.min() and iou.max() <= max_iou): 59 | continue 60 | 61 | image_t = image[roi[1]:roi[3], roi[0]:roi[2]] 62 | 63 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2 64 | mask = np.logical_and(roi[:2] < centers, centers < roi[2:]) \ 65 | .all(axis=1) 66 | boxes_t = boxes[mask].copy() 67 | labels_t = labels[mask].copy() 68 | if len(boxes_t) == 0: 69 | continue 70 | 71 | boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2]) 72 | boxes_t[:, :2] -= roi[:2] 73 | boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:]) 74 | boxes_t[:, 2:] -= roi[:2] 75 | 76 | return image_t, boxes_t,labels_t 77 | 78 | 79 | def _distort(image): 80 | def _convert(image, alpha=1, beta=0): 81 | tmp = image.astype(float) * alpha + beta 82 | tmp[tmp < 0] = 0 83 | tmp[tmp > 255] = 255 84 | image[:] = tmp 85 | 86 | image = image.copy() 87 | 88 | if random.randrange(2): 89 | _convert(image, beta=random.uniform(-32, 32)) 90 | 91 | if random.randrange(2): 92 | _convert(image, alpha=random.uniform(0.5, 1.5)) 93 | 94 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 95 | 96 | if random.randrange(2): 97 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 98 | tmp %= 180 99 | image[:, :, 0] = tmp 100 | 101 | if random.randrange(2): 102 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 103 | 104 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 105 | 106 | return image 107 | 108 | 109 | def _expand(image, boxes,fill, p): 110 | if random.random() > p: 111 | return image, boxes 112 | 113 | height, width, depth = image.shape 114 | for _ in range(50): 115 | scale = random.uniform(1,4) 116 | 117 | min_ratio = max(0.5, 1./scale/scale) 118 | max_ratio = min(2, scale*scale) 119 | ratio = math.sqrt(random.uniform(min_ratio, max_ratio)) 120 | ws = scale*ratio 121 | hs = scale/ratio 122 | if ws < 1 or hs < 1: 123 | continue 124 | w = int(ws * width) 125 | h = int(hs * height) 126 | 127 | left = random.randint(0, w - width) 128 | top = random.randint(0, h - height) 129 | 130 | boxes_t = boxes.copy() 131 | boxes_t[:, :2] += (left, top) 132 | boxes_t[:, 2:] += (left, top) 133 | 134 | 135 | expand_image = np.empty( 136 | (h, w, depth), 137 | dtype=image.dtype) 138 | expand_image[:, :] = fill 139 | expand_image[top:top + height, left:left + width] = image 140 | image = expand_image 141 | 142 | return image, boxes_t 143 | 144 | 145 | def _mirror(image, boxes): 146 | _, width, _ = image.shape 147 | if random.randrange(2): 148 | image = image[:, ::-1] 149 | boxes = boxes.copy() 150 | boxes[:, 0::2] = width - boxes[:, 2::-2] 151 | return image, boxes 152 | 153 | 154 | def preproc_for_test(image, insize, mean): 155 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] 156 | interp_method = interp_methods[random.randrange(5)] 157 | image = cv2.resize(image, (insize, insize),interpolation=interp_method) 158 | image = image.astype(np.float32) 159 | image -= mean 160 | return image.transpose(2, 0, 1) 161 | 162 | 163 | class preproc(object): 164 | 165 | def __init__(self, resize, rgb_means, p): 166 | self.means = rgb_means 167 | self.resize = resize 168 | self.p = p 169 | 170 | def __call__(self, image, targets): 171 | boxes = targets[:,:-1].copy() 172 | labels = targets[:,-1].copy() 173 | if len(boxes) == 0: 174 | #boxes = np.empty((0, 4)) 175 | targets = np.zeros((1,5)) 176 | image = preproc_for_test(image, self.resize, self.means) 177 | return torch.from_numpy(image), targets 178 | 179 | image_o = image.copy() 180 | targets_o = targets.copy() 181 | height_o, width_o, _ = image_o.shape 182 | boxes_o = targets_o[:,:-1] 183 | labels_o = targets_o[:,-1] 184 | boxes_o[:, 0::2] /= width_o 185 | boxes_o[:, 1::2] /= height_o 186 | labels_o = np.expand_dims(labels_o,1) 187 | targets_o = np.hstack((boxes_o,labels_o)) 188 | 189 | image_t, boxes, labels = _crop(image, boxes, labels) 190 | image_t = _distort(image_t) 191 | image_t, boxes = _expand(image_t, boxes, self.means, self.p) 192 | image_t, boxes = _mirror(image_t, boxes) 193 | #image_t, boxes = _mirror(image, boxes) 194 | 195 | height, width, _ = image_t.shape 196 | image_t = preproc_for_test(image_t, self.resize, self.means) 197 | boxes = boxes.copy() 198 | boxes[:, 0::2] /= width 199 | boxes[:, 1::2] /= height 200 | b_w = (boxes[:, 2] - boxes[:, 0])*1. 201 | b_h = (boxes[:, 3] - boxes[:, 1])*1. 202 | mask_b= np.minimum(b_w, b_h) > 0.01 203 | boxes_t = boxes[mask_b] 204 | labels_t = labels[mask_b].copy() 205 | 206 | if len(boxes_t)==0: 207 | image = preproc_for_test(image_o, self.resize, self.means) 208 | return torch.from_numpy(image),targets_o 209 | 210 | labels_t = np.expand_dims(labels_t,1) 211 | targets_t = np.hstack((boxes_t,labels_t)) 212 | 213 | return torch.from_numpy(image_t), targets_t 214 | 215 | 216 | 217 | class BaseTransform(object): 218 | """Defines the transformations that should be applied to test PIL image 219 | for input into the network 220 | 221 | dimension -> tensorize -> color adj 222 | 223 | Arguments: 224 | resize (int): input dimension to SSD 225 | rgb_means ((int,int,int)): average RGB of the dataset 226 | (104,117,123) 227 | swap ((int,int,int)): final order of channels 228 | Returns: 229 | transform (transform) : callable transform to be applied to test/val 230 | data 231 | """ 232 | def __init__(self, resize, rgb_means, swap=(2, 0, 1)): 233 | self.means = rgb_means 234 | self.resize = resize 235 | self.swap = swap 236 | 237 | # assume input is cv2 img for now 238 | def __call__(self, img): 239 | 240 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] 241 | interp_method = interp_methods[0] 242 | img = cv2.resize(np.array(img), (self.resize, 243 | self.resize),interpolation = interp_method).astype(np.float32) 244 | img -= self.means 245 | img = img.transpose(self.swap) 246 | return torch.from_numpy(img) 247 | -------------------------------------------------------------------------------- /data/voc0712.py: -------------------------------------------------------------------------------- 1 | """VOC Dataset Classes 2 | 3 | Original author: Francisco Massa 4 | https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py 5 | 6 | Updated by: Ellis Brown, Max deGroot 7 | """ 8 | 9 | import os 10 | import pickle 11 | import os.path 12 | import sys 13 | import torch 14 | import torch.utils.data as data 15 | import torchvision.transforms as transforms 16 | from PIL import Image, ImageDraw, ImageFont 17 | import cv2 18 | import numpy as np 19 | from .voc_eval import voc_eval 20 | if sys.version_info[0] == 2: 21 | import xml.etree.cElementTree as ET 22 | else: 23 | import xml.etree.ElementTree as ET 24 | 25 | 26 | VOC_CLASSES = ( '__background__', # always index 0 27 | 'person') 28 | 29 | # for making bounding boxes pretty 30 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), 31 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128)) 32 | 33 | 34 | 35 | 36 | class AnnotationTransform(object): 37 | 38 | """Transforms a VOC annotation into a Tensor of bbox coords and label index 39 | Initilized with a dictionary lookup of classnames to indexes 40 | 41 | Arguments: 42 | class_to_ind (dict, optional): dictionary lookup of classnames -> indexes 43 | (default: alphabetic indexing of VOC's 20 classes) 44 | keep_difficult (bool, optional): keep difficult instances or not 45 | (default: False) 46 | height (int): height 47 | width (int): width 48 | """ 49 | 50 | def __init__(self, class_to_ind=None, keep_difficult=True): 51 | self.class_to_ind = class_to_ind or dict( 52 | zip(VOC_CLASSES, range(len(VOC_CLASSES)))) 53 | self.keep_difficult = keep_difficult 54 | 55 | def __call__(self, target): 56 | """ 57 | Arguments: 58 | target (annotation) : the target annotation to be made usable 59 | will be an ET.Element 60 | Returns: 61 | a list containing lists of bounding boxes [bbox coords, class name] 62 | """ 63 | res = np.empty((0,5)) 64 | for obj in target.iter('object'): 65 | difficult = int(obj.find('difficult').text) == 1 66 | if not self.keep_difficult and difficult: 67 | continue 68 | name = obj.find('name').text.lower().strip() 69 | if name != 'person': 70 | continue 71 | bbox = obj.find('bndbox') 72 | 73 | pts = ['xmin', 'ymin', 'xmax', 'ymax'] 74 | bndbox = [] 75 | for i, pt in enumerate(pts): 76 | cur_pt = int(bbox.find(pt).text) - 1 77 | # scale height or width 78 | #cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height 79 | bndbox.append(cur_pt) 80 | label_idx = self.class_to_ind[name] 81 | bndbox.append(label_idx) 82 | res = np.vstack((res,bndbox)) # [xmin, ymin, xmax, ymax, label_ind] 83 | # img_id = target.find('filename').text[:-4] 84 | 85 | return res # [[xmin, ymin, xmax, ymax, label_ind], ... ] 86 | 87 | 88 | class VOCDetection(data.Dataset): 89 | 90 | """VOC Detection Dataset Object 91 | 92 | input is image, target is annotation 93 | 94 | Arguments: 95 | root (string): filepath to VOCdevkit folder. 96 | image_set (string): imageset to use (eg. 'train', 'val', 'test') 97 | transform (callable, optional): transformation to perform on the 98 | input image 99 | target_transform (callable, optional): transformation to perform on the 100 | target `annotation` 101 | (eg: take in caption string, return tensor of word indices) 102 | dataset_name (string, optional): which dataset to load 103 | (default: 'VOC2007') 104 | """ 105 | 106 | def __init__(self, root, image_sets, preproc=None, target_transform=None, 107 | dataset_name='VOC0712'): 108 | self.root = root 109 | self.image_set = image_sets 110 | self.preproc = preproc 111 | self.target_transform = target_transform 112 | self.name = dataset_name 113 | self._annopath = os.path.join('%s', 'Annotations', '%s.xml') 114 | self._imgpath = os.path.join('%s', 'JPEGImages', '%s.jpg') 115 | self.ids = list() 116 | for (year, name) in image_sets: 117 | self._year = year 118 | rootpath = os.path.join(self.root, 'VOC' + year) 119 | for line in open(os.path.join(rootpath, 'ImageSets', 'Main', name + '.txt')): 120 | img_id, value = line.split() 121 | if value != '1': 122 | continue 123 | self.ids.append((rootpath, img_id)) 124 | 125 | def __getitem__(self, index): 126 | img_id = self.ids[index] 127 | target = ET.parse(self._annopath % img_id).getroot() 128 | img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) 129 | height, width, _ = img.shape 130 | 131 | if self.target_transform is not None: 132 | target = self.target_transform(target) 133 | 134 | 135 | if self.preproc is not None: 136 | img, target = self.preproc(img, target) 137 | 138 | 139 | return img, target 140 | 141 | def __len__(self): 142 | return len(self.ids) 143 | 144 | def pull_image(self, index): 145 | '''Returns the original image object at index in PIL form 146 | 147 | Note: not using self.__getitem__(), as any transformations passed in 148 | could mess up this functionality. 149 | 150 | Argument: 151 | index (int): index of img to show 152 | Return: 153 | PIL img 154 | ''' 155 | img_id = self.ids[index] 156 | return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) 157 | 158 | def pull_anno(self, index): 159 | '''Returns the original annotation of image at index 160 | 161 | Note: not using self.__getitem__(), as any transformations passed in 162 | could mess up this functionality. 163 | 164 | Argument: 165 | index (int): index of img to get annotation of 166 | Return: 167 | list: [img_id, [(label, bbox coords),...]] 168 | eg: ('001718', [('dog', (96, 13, 438, 332))]) 169 | ''' 170 | img_id = self.ids[index] 171 | anno = ET.parse(self._annopath % img_id).getroot() 172 | gt = self.target_transform(anno, 1, 1) 173 | return img_id[1], gt 174 | 175 | def pull_tensor(self, index): 176 | '''Returns the original image at an index in tensor form 177 | 178 | Note: not using self.__getitem__(), as any transformations passed in 179 | could mess up this functionality. 180 | 181 | Argument: 182 | index (int): index of img to show 183 | Return: 184 | tensorized version of img, squeezed 185 | ''' 186 | to_tensor = transforms.ToTensor() 187 | return torch.Tensor(self.pull_image(index)).unsqueeze_(0) 188 | 189 | def evaluate_detections(self, all_boxes, output_dir=None): 190 | """ 191 | all_boxes is a list of length number-of-classes. 192 | Each list element is a list of length number-of-images. 193 | Each of those list elements is either an empty list [] 194 | or a numpy array of detection. 195 | 196 | all_boxes[class][image] = [] or np.array of shape #dets x 5 197 | """ 198 | self._write_voc_results_file(all_boxes) 199 | self._do_python_eval(output_dir) 200 | 201 | def _get_voc_results_file_template(self): 202 | filename = 'comp4_det_test' + '_{:s}.txt' 203 | filedir = os.path.join( 204 | self.root, 'results', 'VOC' + self._year, 'Main') 205 | if not os.path.exists(filedir): 206 | os.makedirs(filedir) 207 | path = os.path.join(filedir, filename) 208 | return path 209 | 210 | def _write_voc_results_file(self, all_boxes): 211 | for cls_ind, cls in enumerate(VOC_CLASSES): 212 | cls_ind = cls_ind 213 | if cls == '__background__': 214 | continue 215 | print('Writing {} VOC results file'.format(cls)) 216 | filename = self._get_voc_results_file_template().format(cls) 217 | with open(filename, 'wt') as f: 218 | for im_ind, index in enumerate(self.ids): 219 | index = index[1] 220 | dets = all_boxes[cls_ind][im_ind] 221 | if dets == []: 222 | continue 223 | for k in range(dets.shape[0]): 224 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. 225 | format(index, dets[k, -1], 226 | dets[k, 0] + 1, dets[k, 1] + 1, 227 | dets[k, 2] + 1, dets[k, 3] + 1)) 228 | 229 | def _do_python_eval(self, output_dir='output'): 230 | rootpath = os.path.join(self.root, 'VOC' + self._year) 231 | name = self.image_set[0][1] 232 | annopath = os.path.join( 233 | rootpath, 234 | 'Annotations', 235 | '{:s}.xml') 236 | imagesetfile = os.path.join( 237 | rootpath, 238 | 'ImageSets', 239 | 'Main', 240 | name+'.txt') 241 | cachedir = os.path.join(self.root, 'annotations_cache') 242 | aps = [] 243 | # The PASCAL VOC metric changed in 2010 244 | use_07_metric = True if int(self._year) < 2010 else False 245 | print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) 246 | if output_dir is not None and not os.path.isdir(output_dir): 247 | os.mkdir(output_dir) 248 | for i, cls in enumerate(VOC_CLASSES): 249 | 250 | if cls == '__background__': 251 | continue 252 | 253 | filename = self._get_voc_results_file_template().format(cls) 254 | rec, prec, ap = voc_eval( 255 | filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5, 256 | use_07_metric=use_07_metric) 257 | aps += [ap] 258 | print('AP for {} = {:.4f}'.format(cls, ap)) 259 | if output_dir is not None: 260 | with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f: 261 | pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) 262 | print('Mean AP = {:.4f}'.format(np.mean(aps))) 263 | print('~~~~~~~~') 264 | print('Results:') 265 | for ap in aps: 266 | print('{:.3f}'.format(ap)) 267 | print('{:.3f}'.format(np.mean(aps))) 268 | print('~~~~~~~~') 269 | 270 | 271 | def detection_collate(batch): 272 | """Custom collate fn for dealing with batches of images that have a different 273 | number of associated object annotations (bounding boxes). 274 | 275 | Arguments: 276 | batch: (tuple) A tuple of tensor images and lists of annotations 277 | 278 | Return: 279 | A tuple containing: 280 | 1) (tensor) batch of images stacked on their 0 dim 281 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 282 | """ 283 | targets = [] 284 | imgs = [] 285 | for _, sample in enumerate(batch): 286 | for _, tup in enumerate(sample): 287 | if torch.is_tensor(tup): 288 | imgs.append(tup) 289 | elif isinstance(tup, type(np.empty(0))): 290 | annos = torch.from_numpy(tup).float() 291 | targets.append(annos) 292 | 293 | return (torch.stack(imgs, 0), targets) 294 | -------------------------------------------------------------------------------- /data/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | import xml.etree.ElementTree as ET 8 | import os 9 | import pickle 10 | import numpy as np 11 | import pdb 12 | 13 | 14 | def parse_rec(filename): 15 | """ Parse a PASCAL VOC xml file """ 16 | tree = ET.parse(filename) 17 | objects = [] 18 | for obj in tree.findall('object'): 19 | obj_struct = {} 20 | obj_struct['name'] = obj.find('name').text 21 | obj_struct['pose'] = obj.find('pose').text 22 | obj_struct['truncated'] = int(obj.find('truncated').text) 23 | obj_struct['difficult'] = int(obj.find('difficult').text) 24 | bbox = obj.find('bndbox') 25 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 26 | int(bbox.find('ymin').text), 27 | int(bbox.find('xmax').text), 28 | int(bbox.find('ymax').text)] 29 | objects.append(obj_struct) 30 | 31 | return objects 32 | 33 | 34 | 35 | def voc_ap(rec, prec, use_07_metric=False): 36 | """ ap = voc_ap(rec, prec, [use_07_metric]) 37 | Compute VOC AP given precision and recall. 38 | If use_07_metric is true, uses the 39 | VOC 07 11 point method (default:False). 40 | """ 41 | if use_07_metric: 42 | # 11 point metric 43 | ap = 0. 44 | for t in np.arange(0., 1.1, 0.1): 45 | if np.sum(rec >= t) == 0: 46 | p = 0 47 | else: 48 | p = np.max(prec[rec >= t]) 49 | ap = ap + p / 11. 50 | else: 51 | # correct AP calculation 52 | # first append sentinel values at the end 53 | mrec = np.concatenate(([0.], rec, [1.])) 54 | mpre = np.concatenate(([0.], prec, [0.])) 55 | 56 | # compute the precision envelope 57 | for i in range(mpre.size - 1, 0, -1): 58 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 59 | 60 | # to calculate area under PR curve, look for points 61 | # where X axis (recall) changes value 62 | i = np.where(mrec[1:] != mrec[:-1])[0] 63 | 64 | # and sum (\Delta recall) * prec 65 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 66 | return ap 67 | 68 | def voc_eval(detpath, 69 | annopath, 70 | imagesetfile, 71 | classname, 72 | cachedir, 73 | ovthresh=0.5, 74 | use_07_metric=False): 75 | """rec, prec, ap = voc_eval(detpath, 76 | annopath, 77 | imagesetfile, 78 | classname, 79 | [ovthresh], 80 | [use_07_metric]) 81 | 82 | Top level function that does the PASCAL VOC evaluation. 83 | 84 | detpath: Path to detections 85 | detpath.format(classname) should produce the detection results file. 86 | annopath: Path to annotations 87 | annopath.format(imagename) should be the xml annotations file. 88 | imagesetfile: Text file containing the list of images, one image per line. 89 | classname: Category name (duh) 90 | cachedir: Directory for caching the annotations 91 | [ovthresh]: Overlap threshold (default = 0.5) 92 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 93 | (default False) 94 | """ 95 | # assumes detections are in detpath.format(classname) 96 | # assumes annotations are in annopath.format(imagename) 97 | # assumes imagesetfile is a text file with each line an image name 98 | # cachedir caches the annotations in a pickle file 99 | 100 | # first load gt 101 | if not os.path.isdir(cachedir): 102 | os.mkdir(cachedir) 103 | cachefile = os.path.join(cachedir, 'annots.pkl') 104 | # read list of images 105 | with open(imagesetfile, 'r') as f: 106 | lines = f.readlines() 107 | #imagenames = [x.strip() for x in lines] 108 | imagenames = [] 109 | for line in lines: 110 | img_id, value = line.split() 111 | if value != '1': 112 | continue 113 | imagenames.append(img_id) 114 | 115 | if not os.path.isfile(cachefile): 116 | # load annots 117 | recs = {} 118 | for i, imagename in enumerate(imagenames): 119 | recs[imagename] = parse_rec(annopath.format(imagename)) 120 | if i % 100 == 0: 121 | print('Reading annotation for {:d}/{:d}'.format( 122 | i + 1, len(imagenames))) 123 | # save 124 | print('Saving cached annotations to {:s}'.format(cachefile)) 125 | with open(cachefile, 'wb') as f: 126 | pickle.dump(recs, f) 127 | else: 128 | # load 129 | with open(cachefile, 'rb') as f: 130 | recs = pickle.load(f) 131 | 132 | # extract gt objects for this class 133 | class_recs = {} 134 | npos = 0 135 | for imagename in imagenames: 136 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 137 | bbox = np.array([x['bbox'] for x in R]) 138 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 139 | det = [False] * len(R) 140 | npos = npos + sum(~difficult) 141 | class_recs[imagename] = {'bbox': bbox, 142 | 'difficult': difficult, 143 | 'det': det} 144 | 145 | # read dets 146 | detfile = detpath.format(classname) 147 | with open(detfile, 'r') as f: 148 | lines = f.readlines() 149 | 150 | splitlines = [x.strip().split(' ') for x in lines] 151 | image_ids = [x[0] for x in splitlines] 152 | confidence = np.array([float(x[1]) for x in splitlines]) 153 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 154 | 155 | # sort by confidence 156 | sorted_ind = np.argsort(-confidence) 157 | sorted_scores = np.sort(-confidence) 158 | BB = BB[sorted_ind, :] 159 | image_ids = [image_ids[x] for x in sorted_ind] 160 | 161 | # go down dets and mark TPs and FPs 162 | nd = len(image_ids) 163 | tp = np.zeros(nd) 164 | fp = np.zeros(nd) 165 | for d in range(nd): 166 | R = class_recs[image_ids[d]] 167 | bb = BB[d, :].astype(float) 168 | ovmax = -np.inf 169 | BBGT = R['bbox'].astype(float) 170 | 171 | if BBGT.size > 0: 172 | # compute overlaps 173 | # intersection 174 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 175 | iymin = np.maximum(BBGT[:, 1], bb[1]) 176 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 177 | iymax = np.minimum(BBGT[:, 3], bb[3]) 178 | iw = np.maximum(ixmax - ixmin + 1., 0.) 179 | ih = np.maximum(iymax - iymin + 1., 0.) 180 | inters = iw * ih 181 | 182 | # union 183 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 184 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 185 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 186 | 187 | overlaps = inters / uni 188 | ovmax = np.max(overlaps) 189 | jmax = np.argmax(overlaps) 190 | 191 | if ovmax > ovthresh: 192 | if not R['difficult'][jmax]: 193 | if not R['det'][jmax]: 194 | tp[d] = 1. 195 | R['det'][jmax] = 1 196 | else: 197 | fp[d] = 1. 198 | else: 199 | fp[d] = 1. 200 | 201 | # compute precision recall 202 | fp = np.cumsum(fp) 203 | tp = np.cumsum(tp) 204 | rec = tp / float(npos) 205 | # avoid divide by zero in case the first detection matches a difficult 206 | # ground truth 207 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 208 | ap = voc_ap(rec, prec, use_07_metric) 209 | 210 | return rec, prec, ap 211 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import os 4 | import pickle 5 | import argparse 6 | import torch 7 | import torch.nn as nn 8 | import torch.backends.cudnn as cudnn 9 | import torchvision.transforms as transforms 10 | import numpy as np 11 | from torch.autograd import Variable 12 | from data import VOCroot 13 | from data import AnnotationTransform,VOCDetection, BaseTransform, VOC_Config 14 | from models.RFB_Net_vgg import build_net 15 | import torch.utils.data as data 16 | from layers.functions import Detect,PriorBox 17 | from utils.nms_wrapper import nms 18 | from utils.timer import Timer 19 | import cv2 20 | import time 21 | from collections import deque 22 | from torch2trt import torch2trt 23 | parser = argparse.ArgumentParser(description='Receptive Field Block Net') 24 | parser.add_argument('--img_dir', default='images', type=str, 25 | help='Dir to save results') 26 | parser.add_argument('-m', '--trained_model', default='weights/epoches_100.pth', 27 | type=str, help='Trained state_dict file path to open') 28 | parser.add_argument('--cuda', default=True, type=bool, 29 | help='Use cuda to train model') 30 | parser.add_argument('--cpu', default=False, type=bool, 31 | help='Use cpu nms') 32 | args = parser.parse_args() 33 | 34 | cfg = VOC_Config 35 | img_dim = 300 36 | num_classes = 2 37 | rgb_means = (104, 117, 123) 38 | 39 | priorbox = PriorBox(cfg) 40 | with torch.no_grad(): 41 | priors = priorbox.forward() 42 | if args.cuda: 43 | priors = priors.cuda() 44 | 45 | class ObjectDetector: 46 | def __init__(self, net, detection, transform, num_classes=21, thresh=0.2, cuda=True): 47 | self.net = net 48 | self.detection = detection 49 | self.transform = transform 50 | self.num_classes = num_classes 51 | self.thresh = thresh 52 | self.cuda = cuda 53 | 54 | def predict(self, img): 55 | _t = {'im_detect': Timer(), 'misc': Timer()} 56 | scale = torch.Tensor([img.shape[1], img.shape[0], 57 | img.shape[1], img.shape[0]]) 58 | with torch.no_grad(): 59 | x = self.transform(img).unsqueeze(0) 60 | if self.cuda: 61 | x = x.cuda() 62 | scale = scale.cuda() 63 | _t['im_detect'].tic() 64 | out = model_trt(x) # forward pass 65 | #print(out) 66 | boxes, scores = self.detection.forward(out, priors) 67 | detect_time = _t['im_detect'].toc() 68 | boxes = boxes[0] 69 | scores = scores[0] 70 | # scale each detection back up to the image 71 | boxes *= scale 72 | boxes = boxes.cpu().numpy() 73 | scores = scores.cpu().numpy() 74 | _t['misc'].tic() 75 | all_boxes = [[] for _ in range(num_classes)] 76 | for j in range(1, num_classes): 77 | inds = np.where(scores[:, j] > self.thresh)[0] 78 | if len(inds) == 0: 79 | all_boxes[j] = np.zeros([0, 5], dtype=np.float32) 80 | continue 81 | c_bboxes = boxes[inds] 82 | c_scores = scores[inds, j] 83 | #print(scores[:, j]) 84 | c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( 85 | np.float32, copy=False) 86 | # keep = nms(c_bboxes,c_scores) 87 | keep = nms(c_dets, 0.2, force_cpu=args.cpu) 88 | c_dets = c_dets[keep, :] 89 | all_boxes[j] = c_dets 90 | nms_time = _t['misc'].toc() 91 | total_time = detect_time+nms_time 92 | #print('total time: ', total_time) 93 | return all_boxes, total_time 94 | 95 | if __name__ == '__main__': 96 | # load net 97 | net = build_net('test', img_dim, num_classes) # initialize detector 98 | state_dict = torch.load(args.trained_model) 99 | # create new OrderedDict that does not contain `module.` 100 | from collections import OrderedDict 101 | new_state_dict = OrderedDict() 102 | for k, v in state_dict.items(): 103 | head = k[:7] 104 | if head == 'module.': 105 | name = k[7:] # remove `module.` 106 | else: 107 | name = k 108 | new_state_dict[name] = v 109 | net.load_state_dict(new_state_dict) 110 | net.eval() 111 | print('Finished loading model!') 112 | if args.cuda: 113 | net = net.cuda() 114 | cudnn.benchmark = True 115 | else: 116 | net = net.cpu() 117 | detector = Detect(num_classes,0,cfg) 118 | transform = BaseTransform(img_dim, rgb_means, (2, 0, 1)) 119 | cap = cv2.VideoCapture('11.mp4') 120 | #cap1 = cv2.VideoCapture('rtsp://admin:uc123456@101.205.119.109:554/Streaming/Channels/301') 121 | ret,image = cap.read() 122 | x = transform(image).unsqueeze(0) 123 | x = x.cuda() 124 | model_trt = torch2trt(net,[x]) 125 | object_detector = ObjectDetector(model_trt, detector, transform) 126 | img_list = os.listdir(args.img_dir) 127 | frame_no = 0 128 | fourcc = cv2.VideoWriter_fourcc(*'MJPG') 129 | output = cv2.VideoWriter("demo1.avi", fourcc, 20, (1280, 720)) 130 | while True: 131 | start = time.time() 132 | frame_no +=1 133 | #print(frame_no) 134 | #try: 135 | ret,image = cap.read() 136 | #ret1,image1 = cap1.read() 137 | detect_bboxes, tim = object_detector.predict(image) 138 | for i in range(len(detect_bboxes[1])): 139 | pt = detect_bboxes[1][i] 140 | cv2.rectangle(image,(pt[0],pt[1]),(pt[2],pt[3]),(0,255,0),2) 141 | print(detect_bboxes) 142 | #detect_bboxes1, tim1 = object_detector.predict(image1) 143 | end = time.time() 144 | frame_time = end - start 145 | print(frame_time) 146 | cv2.imshow('result',image) 147 | # cv2.imshow('result1',image1) 148 | cv2.waitKey(1) 149 | output.write(image) 150 | #except Exception: 151 | # cap = cv2.VideoCapture('rtsp://admin:uc123456@101.205.119.109:554/Streaming/Channels/301') 152 | # cap1 = cv2.VideoCapture('rtsp://admin:uc123456@101.205.119.109:554/Streaming/Channels/301') 153 | # continue 154 | -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /layers/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection import Detect 2 | from .prior_box import PriorBox 3 | 4 | 5 | __all__ = ['Detect', 'PriorBox'] 6 | -------------------------------------------------------------------------------- /layers/functions/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/functions/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /layers/functions/__pycache__/detection.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/functions/__pycache__/detection.cpython-36.pyc -------------------------------------------------------------------------------- /layers/functions/__pycache__/prior_box.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/functions/__pycache__/prior_box.cpython-36.pyc -------------------------------------------------------------------------------- /layers/functions/detection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.backends.cudnn as cudnn 4 | from torch.autograd import Function 5 | from torch.autograd import Variable 6 | from utils.box_utils import decode 7 | 8 | 9 | class Detect(Function): 10 | """At test time, Detect is the final layer of SSD. Decode location preds, 11 | apply non-maximum suppression to location predictions based on conf 12 | scores and threshold to a top_k number of output predictions for both 13 | confidence score and locations. 14 | """ 15 | def __init__(self, num_classes, bkg_label, cfg): 16 | self.num_classes = num_classes 17 | self.background_label = bkg_label 18 | 19 | self.variance = cfg['variance'] 20 | 21 | def forward(self, predictions, prior): 22 | """ 23 | Args: 24 | loc_data: (tensor) Loc preds from loc layers 25 | Shape: [batch,num_priors*4] 26 | conf_data: (tensor) Shape: Conf preds from conf layers 27 | Shape: [batch*num_priors,num_classes] 28 | prior_data: (tensor) Prior boxes and variances from priorbox layers 29 | Shape: [1,num_priors,4] 30 | """ 31 | 32 | loc, conf = predictions 33 | 34 | loc_data = loc.data 35 | conf_data = conf.data 36 | prior_data = prior.data 37 | num = loc_data.size(0) # batch size 38 | self.num_priors = prior_data.size(0) 39 | self.boxes = torch.zeros(1, self.num_priors, 4) 40 | self.scores = torch.zeros(1, self.num_priors, self.num_classes) 41 | if loc_data.is_cuda: 42 | self.boxes = self.boxes.cuda() 43 | self.scores = self.scores.cuda() 44 | 45 | if num == 1: 46 | # size batch x num_classes x num_priors 47 | conf_preds = conf_data.unsqueeze(0) 48 | 49 | else: 50 | conf_preds = conf_data.view(num, num_priors, 51 | self.num_classes) 52 | self.boxes.expand_(num, self.num_priors, 4) 53 | self.scores.expand_(num, self.num_priors, self.num_classes) 54 | 55 | # Decode predictions into bboxes. 56 | for i in range(num): 57 | decoded_boxes = decode(loc_data[i], prior_data, self.variance) 58 | conf_scores = conf_preds[i].clone() 59 | 60 | self.boxes[i] = decoded_boxes 61 | self.scores[i] = conf_scores 62 | 63 | return self.boxes, self.scores 64 | -------------------------------------------------------------------------------- /layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.backends.cudnn as cudnn 4 | from math import sqrt as sqrt 5 | from itertools import product as product 6 | 7 | 8 | class PriorBox(object): 9 | """Compute priorbox coordinates in center-offset form for each source 10 | feature map. 11 | Note: 12 | This 'layer' has changed between versions of the original SSD 13 | paper, so we include both versions, but note v2 is the most tested and most 14 | recent version of the paper. 15 | 16 | """ 17 | def __init__(self, cfg): 18 | super(PriorBox, self).__init__() 19 | self.image_size = cfg['min_dim'] 20 | self.variance = cfg['variance'] or [0.1] 21 | self.feature_maps = cfg['feature_maps'] 22 | self.min_sizes = cfg['min_sizes'] 23 | self.max_sizes = cfg['max_sizes'] 24 | self.steps = cfg['steps'] 25 | self.aspect_ratios = cfg['aspect_ratios'] 26 | self.max_ratios = cfg['max_ratios'] 27 | 28 | self.clip = cfg['clip'] 29 | for v in self.variance: 30 | if v <= 0: 31 | raise ValueError('Variances must be greater than 0') 32 | 33 | def forward(self): 34 | mean = [] 35 | for k, f in enumerate(self.feature_maps): 36 | for i, j in product(range(f), repeat=2): 37 | f_k = self.image_size / self.steps[k] 38 | cx = (j + 0.5) / f_k 39 | cy = (i + 0.5) / f_k 40 | 41 | s_k = self.min_sizes[k]/self.image_size 42 | 43 | # aspect_ratio: 1 44 | # rel size: sqrt(s_k * s_(k+1)) 45 | s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size)) 46 | mean += [cx, cy, s_k_prime*sqrt(self.max_ratios[k]), s_k_prime/sqrt(self.max_ratios[k])] 47 | 48 | # rest of aspect ratios 49 | for ar in self.aspect_ratios[k]: 50 | mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)] 51 | 52 | 53 | # back to torch land 54 | output = torch.Tensor(mean).view(-1, 4) 55 | if self.clip: 56 | output.clamp_(max=1, min=0) 57 | return output 58 | 59 | if __name__ == '__main__': 60 | # RFB CONFIGS 61 | VOC_Config = { 62 | 'feature_maps': [38, 19, 10, 5, 3, 1], 63 | 64 | 'min_dim': 300, 65 | 66 | 'steps': [8, 16, 32, 64, 100, 300], 67 | 68 | 'min_sizes': [26, 60, 111, 162, 213, 264], 69 | 70 | 'max_sizes': [60, 111, 162, 213, 264, 315], 71 | 72 | 'aspect_ratios': [[0.4, 1.0, 1.5], 73 | [0.4, 1.0, 1.6], 74 | [0.5, 1.1, 1.6], 75 | [0.5, 1.1, 1.6], 76 | [0.5, 1.1, 1.6], 77 | [0.7, 1.4]], 78 | 79 | 'max_ratios': [0.8, 0.8, 0.8, 0.9, 1, 1], 80 | 81 | 'variance': [0.1, 0.2], 82 | 83 | 'clip': True, 84 | } 85 | 86 | priorbox = PriorBox(VOC_Config) 87 | 88 | with torch.no_grad(): 89 | priors = priorbox.forward() 90 | print(priors.shape) -------------------------------------------------------------------------------- /layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .multibox_loss import MultiBoxLoss 2 | 3 | __all__ = ['MultiBoxLoss'] 4 | -------------------------------------------------------------------------------- /layers/modules/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/modules/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /layers/modules/__pycache__/multibox_loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/layers/modules/__pycache__/multibox_loss.cpython-36.pyc -------------------------------------------------------------------------------- /layers/modules/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from utils.box_utils import match, log_sum_exp 6 | GPU = False 7 | if torch.cuda.is_available(): 8 | GPU = True 9 | 10 | 11 | class MultiBoxLoss(nn.Module): 12 | """SSD Weighted Loss Function 13 | Compute Targets: 14 | 1) Produce Confidence Target Indices by matching ground truth boxes 15 | with (default) 'priorboxes' that have jaccard index > threshold parameter 16 | (default threshold: 0.5). 17 | 2) Produce localization target by 'encoding' variance into offsets of ground 18 | truth boxes and their matched 'priorboxes'. 19 | 3) Hard negative mining to filter the excessive number of negative examples 20 | that comes with using a large number of default bounding boxes. 21 | (default negative:positive ratio 3:1) 22 | Objective Loss: 23 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 24 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 25 | weighted by α which is set to 1 by cross val. 26 | Args: 27 | c: class confidences, 28 | l: predicted boxes, 29 | g: ground truth boxes 30 | N: number of matched default boxes 31 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 32 | """ 33 | 34 | 35 | def __init__(self, num_classes,overlap_thresh,prior_for_matching,bkg_label,neg_mining,neg_pos,neg_overlap,encode_target): 36 | super(MultiBoxLoss, self).__init__() 37 | self.num_classes = num_classes 38 | self.threshold = overlap_thresh 39 | self.background_label = bkg_label 40 | self.encode_target = encode_target 41 | self.use_prior_for_matching = prior_for_matching 42 | self.do_neg_mining = neg_mining 43 | self.negpos_ratio = neg_pos 44 | self.neg_overlap = neg_overlap 45 | self.variance = [0.1,0.2] 46 | 47 | def forward(self, predictions, priors, targets): 48 | """Multibox Loss 49 | Args: 50 | predictions (tuple): A tuple containing loc preds, conf preds, 51 | and prior boxes from SSD net. 52 | conf shape: torch.size(batch_size,num_priors,num_classes) 53 | loc shape: torch.size(batch_size,num_priors,4) 54 | priors shape: torch.size(num_priors,4) 55 | 56 | ground_truth (tensor): Ground truth boxes and labels for a batch, 57 | shape: [batch_size,num_objs,5] (last idx is the label). 58 | """ 59 | 60 | loc_data, conf_data = predictions 61 | priors = priors 62 | num = loc_data.size(0) 63 | num_priors = (priors.size(0)) 64 | num_classes = self.num_classes 65 | 66 | # match priors (default boxes) and ground truth boxes 67 | loc_t = torch.Tensor(num, num_priors, 4) 68 | conf_t = torch.LongTensor(num, num_priors) 69 | for idx in range(num): 70 | truths = targets[idx][:,:-1].data 71 | labels = targets[idx][:,-1].data 72 | defaults = priors.data 73 | match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) 74 | if GPU: 75 | loc_t = loc_t.cuda() 76 | conf_t = conf_t.cuda() 77 | # wrap targets 78 | loc_t = Variable(loc_t, requires_grad=False) 79 | conf_t = Variable(conf_t,requires_grad=False) 80 | 81 | pos = conf_t > 0 82 | 83 | # Localization Loss (Smooth L1) 84 | # Shape: [batch,num_priors,4] 85 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 86 | loc_p = loc_data[pos_idx].view(-1,4) 87 | loc_t = loc_t[pos_idx].view(-1,4) 88 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') 89 | 90 | # Compute max conf across batch for hard negative mining 91 | batch_conf = conf_data.view(-1,self.num_classes) 92 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) 93 | 94 | # Hard Negative Mining 95 | loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now 96 | loss_c = loss_c.view(num, -1) 97 | _,loss_idx = loss_c.sort(1, descending=True) 98 | _,idx_rank = loss_idx.sort(1) 99 | num_pos = pos.long().sum(1,keepdim=True) 100 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 101 | neg = idx_rank < num_neg.expand_as(idx_rank) 102 | 103 | # Confidence Loss Including Positive and Negative Examples 104 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 105 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 106 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) 107 | targets_weighted = conf_t[(pos+neg).gt(0)] 108 | loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') 109 | 110 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 111 | 112 | N = max(num_pos.data.sum().float(), 1) 113 | loss_l/=N 114 | loss_c/=N 115 | return loss_l,loss_c 116 | -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd ./utils/ 3 | 4 | CUDA_PATH=/usr/local/cuda/ 5 | 6 | python3 build.py build_ext --inplace 7 | 8 | cd .. 9 | -------------------------------------------------------------------------------- /models/RFB_Net_vgg-1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from layers import * 6 | import torchvision.transforms as transforms 7 | import torchvision.models as models 8 | import torch.backends.cudnn as cudnn 9 | import os 10 | from models.module import BasicRFB, Backbone 11 | 12 | 13 | 14 | class RFBNet(nn.Module): 15 | 16 | def __init__(self, phase, size, head, num_classes): 17 | super(RFBNet, self).__init__() 18 | self.phase = phase 19 | self.num_classes = num_classes 20 | self.size = size 21 | 22 | self.base = Backbone() 23 | 24 | self.loc = nn.ModuleList(head[0]) 25 | self.conf = nn.ModuleList(head[1]) 26 | if self.phase == 'test': 27 | self.softmax = nn.Softmax(dim=-1) 28 | 29 | def forward(self, x): 30 | sources = list() 31 | loc = list() 32 | conf = list() 33 | 34 | f1, f2, f3, f4, f5, f6 = self.base(x) 35 | 36 | sources = [f1, f2, f3, f4, f5, f6] 37 | 38 | # apply multibox head to source layers 39 | for (x, l, c) in zip(sources, self.loc, self.conf): 40 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 41 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 42 | 43 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 44 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 45 | 46 | if self.phase == "test": 47 | output = ( 48 | loc.view(loc.size(0), -1, 4), # loc preds 49 | self.softmax(conf.view(-1, self.num_classes)), # conf preds 50 | ) 51 | else: 52 | output = ( 53 | loc.view(loc.size(0), -1, 4), 54 | conf.view(conf.size(0), -1, self.num_classes), 55 | ) 56 | return output 57 | 58 | def load_weights(self, base_file): 59 | other, ext = os.path.splitext(base_file) 60 | if ext == '.pkl' or '.pth': 61 | print('Loading weights into state dict...') 62 | self.load_state_dict(torch.load(base_file)) 63 | print('Finished!') 64 | else: 65 | print('Sorry only .pth and .pkl files supported.') 66 | 67 | def multibox(num_classes=2): 68 | # 需要注意,这里要跟 prior_box.py 对应上 69 | # number of boxes per feature map location,就是各个feature map上预定义的anchor数,可结合prior_box.py;理解 70 | anchor_num = [4, 4, 4, 4, 4, 3] # number of boxes per feature map location 71 | loc_layers = [] 72 | conf_layers = [] 73 | 74 | ############################ 第1个检测层 ############################ 75 | loc_layers += [nn.Conv2d(256, anchor_num[0] * 4, kernel_size=3, padding=1)] 76 | conf_layers += [nn.Conv2d(256, anchor_num[0] * num_classes, kernel_size=3, padding=1)] 77 | ############################ 第2个检测层 ############################ 78 | loc_layers += [nn.Conv2d(256, anchor_num[1] * 4, kernel_size=3, padding=1)] 79 | conf_layers += [nn.Conv2d(256, anchor_num[1] * num_classes, kernel_size=3, padding=1)] 80 | ############################ 第3个检测层 ############################ 81 | loc_layers += [nn.Conv2d(256, anchor_num[2] * 4, kernel_size=3, padding=1)] 82 | conf_layers += [nn.Conv2d(256, anchor_num[2] * num_classes, kernel_size=3, padding=1)] 83 | ############################ 第4个检测层 ############################ 84 | loc_layers += [nn.Conv2d(256, anchor_num[3] * 4, kernel_size=3, padding=1)] 85 | conf_layers += [nn.Conv2d(256, anchor_num[3] * num_classes, kernel_size=3, padding=1)] 86 | ############################ 第5个检测层 ############################ 87 | loc_layers += [nn.Conv2d(256, anchor_num[4] * 4, kernel_size=3, padding=1)] 88 | conf_layers += [nn.Conv2d(256, anchor_num[4] * num_classes, kernel_size=3, padding=1)] 89 | ############################ 第6个检测层 ############################ 90 | loc_layers += [nn.Conv2d(256, anchor_num[5] * 4, kernel_size=3, padding=1)] 91 | conf_layers += [nn.Conv2d(256, anchor_num[5] * num_classes, kernel_size=3, padding=1)] 92 | 93 | return (loc_layers, conf_layers) 94 | 95 | 96 | def build_net(phase, size=300, num_classes=2): 97 | if phase != "test" and phase != "train": 98 | print("Error: Phase not recognized") 99 | return 100 | if size != 300: 101 | print("Error: Sorry only RFBNet300 are supported!") 102 | return 103 | 104 | return RFBNet(phase, size, multibox(num_classes), num_classes) 105 | 106 | if __name__ == '__main__': 107 | # 0.966 MB 108 | x = torch.randn(2, 3, 300, 300) 109 | net = build_net('test') 110 | from torchsummary import summary 111 | summary(net, (3, 300, 300)) -------------------------------------------------------------------------------- /models/RFB_Net_vgg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from layers import * 6 | import torchvision.transforms as transforms 7 | import torchvision.models as models 8 | import torch.backends.cudnn as cudnn 9 | import os 10 | from models.module import BasicRFB, Backbone 11 | 12 | 13 | 14 | class RFBNet(nn.Module): 15 | 16 | def __init__(self, phase, size, head, num_classes): 17 | super(RFBNet, self).__init__() 18 | self.phase = phase 19 | self.num_classes = num_classes 20 | self.size = size 21 | 22 | self.base = Backbone() 23 | 24 | self.loc = nn.ModuleList(head[0]) 25 | self.conf = nn.ModuleList(head[1]) 26 | if self.phase == 'test': 27 | self.softmax = nn.Softmax(dim=1) 28 | 29 | def forward(self, x): 30 | sources = list() 31 | loc = list() 32 | conf = list() 33 | 34 | f1, f2, f3, f4, f5, f6 = self.base(x) 35 | 36 | sources = [f1, f2, f3, f4, f5, f6] 37 | 38 | # apply multibox head to source layers 39 | for (x, l, c) in zip(sources, self.loc, self.conf): 40 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 41 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 42 | 43 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 44 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 45 | 46 | if self.phase == "test": 47 | output = ( 48 | loc.view(loc.size(0), -1, 4), # loc preds 49 | conf.view(conf.size(0), -1, self.num_classes), 50 | #self.softmax(conf.view(-1, self.num_classes)), # conf preds 51 | ) 52 | else: 53 | output = ( 54 | loc.view(loc.size(0), -1, 4), 55 | conf.view(conf.size(0), -1, self.num_classes), 56 | ) 57 | return output 58 | 59 | def load_weights(self, base_file): 60 | other, ext = os.path.splitext(base_file) 61 | if ext == '.pkl' or '.pth': 62 | print('Loading weights into state dict...') 63 | self.load_state_dict(torch.load(base_file)) 64 | print('Finished!') 65 | else: 66 | print('Sorry only .pth and .pkl files supported.') 67 | 68 | def multibox(num_classes=2): 69 | # 需要注意,这里要跟 prior_box.py 对应上 70 | # number of boxes per feature map location,就是各个feature map上预定义的anchor数,可结合prior_box.py;理解 71 | anchor_num = [4, 4, 4, 4, 4, 3] # number of boxes per feature map location 72 | loc_layers = [] 73 | conf_layers = [] 74 | 75 | ############################ 第1个检测层 ############################ 76 | loc_layers += [nn.Conv2d(128, anchor_num[0] * 4, kernel_size=3, padding=1)] 77 | conf_layers += [nn.Conv2d(128, anchor_num[0] * num_classes, kernel_size=3, padding=1)] 78 | ############################ 第2个检测层 ############################ 79 | loc_layers += [nn.Conv2d(128, anchor_num[1] * 4, kernel_size=3, padding=1)] 80 | conf_layers += [nn.Conv2d(128, anchor_num[1] * num_classes, kernel_size=3, padding=1)] 81 | ############################ 第3个检测层 ############################ 82 | loc_layers += [nn.Conv2d(128, anchor_num[2] * 4, kernel_size=3, padding=1)] 83 | conf_layers += [nn.Conv2d(128, anchor_num[2] * num_classes, kernel_size=3, padding=1)] 84 | ############################ 第4个检测层 ############################ 85 | loc_layers += [nn.Conv2d(128, anchor_num[3] * 4, kernel_size=3, padding=1)] 86 | conf_layers += [nn.Conv2d(128, anchor_num[3] * num_classes, kernel_size=3, padding=1)] 87 | ############################ 第5个检测层 ############################ 88 | loc_layers += [nn.Conv2d(128, anchor_num[4] * 4, kernel_size=3, padding=1)] 89 | conf_layers += [nn.Conv2d(128, anchor_num[4] * num_classes, kernel_size=3, padding=1)] 90 | ############################ 第6个检测层 ############################ 91 | loc_layers += [nn.Conv2d(64, anchor_num[5] * 4, kernel_size=3, padding=1)] 92 | conf_layers += [nn.Conv2d(64, anchor_num[5] * num_classes, kernel_size=3, padding=1)] 93 | 94 | return (loc_layers, conf_layers) 95 | 96 | 97 | def build_net(phase, size=300, num_classes=2): 98 | if phase != "test" and phase != "train": 99 | print("Error: Phase not recognized") 100 | return 101 | if size != 300: 102 | print("Error: Sorry only RFBNet300 are supported!") 103 | return 104 | 105 | return RFBNet(phase, size, multibox(num_classes), num_classes) 106 | 107 | if __name__ == '__main__': 108 | # 0.99 MB 109 | x = torch.randn(2, 3, 300, 300) 110 | net = build_net('test') 111 | from torchsummary import summary 112 | summary(net, (3, 300, 300)) 113 | -------------------------------------------------------------------------------- /models/__pycache__/RFB_Net_vgg.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/models/__pycache__/RFB_Net_vgg.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/module.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/models/__pycache__/module.cpython-36.pyc -------------------------------------------------------------------------------- /models/module-1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import os 5 | 6 | class BasicConv(nn.Module): 7 | 8 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True): 9 | super(BasicConv, self).__init__() 10 | self.out_channels = out_planes 11 | if bn: 12 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False) 13 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) 14 | self.relu = nn.ReLU(inplace=True) if relu else None 15 | else: 16 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True) 17 | self.bn = None 18 | self.relu = nn.ReLU(inplace=True) if relu else None 19 | 20 | def forward(self, x): 21 | x = self.conv(x) 22 | if self.bn is not None: 23 | x = self.bn(x) 24 | if self.relu is not None: 25 | x = self.relu(x) 26 | return x 27 | 28 | 29 | class BasicRFB(nn.Module): 30 | 31 | def __init__(self, in_planes, out_planes, stride=1, scale = 0.1, map_reduce=8, vision=1, groups=1): 32 | super(BasicRFB, self).__init__() 33 | self.scale = scale 34 | self.out_channels = out_planes 35 | inter_planes = in_planes // map_reduce 36 | 37 | self.branch0 = nn.Sequential( 38 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 39 | BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups), 40 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision+1, dilation=vision+1, relu=False, groups=groups) 41 | ) 42 | self.branch1 = nn.Sequential( 43 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 44 | BasicConv(inter_planes, 2*inter_planes, kernel_size=(3,3), stride=stride, padding=(1,1), groups=groups), 45 | BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups) 46 | ) 47 | self.branch2 = nn.Sequential( 48 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 49 | BasicConv(inter_planes, (inter_planes//2)*3, kernel_size=3, stride=1, padding=1, groups=groups), 50 | BasicConv((inter_planes//2)*3, 2*inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups), 51 | BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups) 52 | ) 53 | 54 | self.ConvLinear = BasicConv(6*inter_planes, out_planes, kernel_size=1, stride=1, relu=False) 55 | self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False) 56 | self.relu = nn.ReLU(inplace=False) 57 | 58 | def forward(self,x): 59 | x0 = self.branch0(x) 60 | x1 = self.branch1(x) 61 | x2 = self.branch2(x) 62 | 63 | out = torch.cat((x0,x1,x2),1) 64 | out = self.ConvLinear(out) 65 | short = self.shortcut(x) 66 | out = out*self.scale + short 67 | out = self.relu(out) 68 | 69 | return out 70 | 71 | class Backbone(nn.Module): 72 | def __init__(self, bn=True): 73 | super(Backbone, self).__init__() 74 | 75 | self.conv1_1 = BasicConv(3, 32, kernel_size=3, padding=1, bn=bn) 76 | self.conv1_2 = BasicConv(32, 32, kernel_size=3, padding=1, bn=bn) 77 | self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 2 78 | 79 | self.conv2_1 = BasicConv(32, 64, kernel_size=3, padding=1, bn=bn) 80 | self.conv2_2 = BasicConv(64, 64, kernel_size=3, padding=1, bn=bn) 81 | self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 4 82 | 83 | self.conv3_1 = BasicConv(64, 128, kernel_size=1, bn=bn) 84 | self.conv3_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn) 85 | self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=bn) # 8 86 | 87 | self.conv4_1 = BasicConv(128, 256, kernel_size=1, bn=bn) 88 | self.conv4_2 = BasicConv(256, 256, kernel_size=3, padding=1, bn=bn) #### f1 #### 89 | self.conv4_3 = BasicRFB(256,256,stride = 1,scale=1.0) 90 | self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # 16 91 | 92 | self.conv5_1 = BasicConv(256, 128, kernel_size=1, relu=False, bn=bn) 93 | self.conv5_2 = BasicConv(128, 256, kernel_size=3, padding=1, stride=1, bn=bn) #### f2 #### 94 | 95 | self.conv6_1 = BasicConv(256, 128, kernel_size=1, relu=False) 96 | self.conv6_2 = BasicConv(128, 256, kernel_size=3, padding=1, stride=2) #### f3 #### 97 | 98 | self.conv7_1 = BasicConv(256, 128, kernel_size=1, relu=False) 99 | self.conv7_2 = BasicConv(128, 256, kernel_size=3, padding=1, stride=2) #### f4 #### 100 | 101 | self.conv8_1 = BasicConv(256,128,kernel_size=1, relu=False) 102 | self.conv8_2 = BasicConv(128,256,kernel_size=3) #### f5 #### 103 | 104 | self.conv9_1 = BasicConv(256,128,kernel_size=1, relu=False) 105 | self.conv9_2 = BasicConv(128,256,kernel_size=3) #### f6 #### 106 | 107 | 108 | def forward(self, x): 109 | x = self.conv1_1(x) 110 | x = self.conv1_2(x) 111 | x = self.pool1(x) 112 | 113 | x = self.conv2_1(x) 114 | x = self.conv2_2(x) 115 | x = self.pool2(x) 116 | 117 | x = self.conv3_1(x) 118 | x = self.conv3_2(x) 119 | x = self.pool3(x) 120 | 121 | x = self.conv4_1(x) 122 | x = self.conv4_2(x) 123 | x = self.conv4_3(x) 124 | f1 = x # stride = 8 125 | x = self.pool4(x) 126 | 127 | x = self.conv5_1(x) 128 | x = self.conv5_2(x) 129 | f2 = x # stride = 16 130 | 131 | x = self.conv6_1(x) 132 | x = self.conv6_2(x) 133 | f3 = x # stride = 32 134 | 135 | x = self.conv7_1(x) 136 | x = self.conv7_2(x) 137 | f4 = x # stride = 64 138 | 139 | x = self.conv8_1(x) 140 | x = self.conv8_2(x) 141 | f5 = x # -2 142 | 143 | x = self.conv9_1(x) 144 | x = self.conv9_2(x) 145 | f6 = x # -2 146 | 147 | return f1, f2, f3, f4, f5, f6 148 | 149 | 150 | if __name__ == '__main__': 151 | x = torch.randn(2,3,300,300) 152 | model = Backbone() 153 | features = model(x) 154 | -------------------------------------------------------------------------------- /models/module-3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import os 5 | 6 | class BasicConv(nn.Module): 7 | 8 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True): 9 | super(BasicConv, self).__init__() 10 | self.out_channels = out_planes 11 | if bn: 12 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False) 13 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) 14 | self.relu = nn.ReLU(inplace=True) if relu else None 15 | else: 16 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True) 17 | self.bn = None 18 | self.relu = nn.ReLU(inplace=True) if relu else None 19 | 20 | def forward(self, x): 21 | x = self.conv(x) 22 | if self.bn is not None: 23 | x = self.bn(x) 24 | if self.relu is not None: 25 | x = self.relu(x) 26 | return x 27 | 28 | 29 | class BasicRFB(nn.Module): 30 | 31 | def __init__(self, in_planes, out_planes, stride=1, scale = 0.1, map_reduce=8, vision=1, groups=1): 32 | super(BasicRFB, self).__init__() 33 | self.scale = scale 34 | self.out_channels = out_planes 35 | inter_planes = in_planes // map_reduce 36 | 37 | self.branch0 = nn.Sequential( 38 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 39 | BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups), 40 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision+1, dilation=vision+1, relu=False, groups=groups) 41 | ) 42 | self.branch1 = nn.Sequential( 43 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 44 | BasicConv(inter_planes, 2*inter_planes, kernel_size=(3,3), stride=stride, padding=(1,1), groups=groups), 45 | BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups) 46 | ) 47 | self.branch2 = nn.Sequential( 48 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 49 | BasicConv(inter_planes, (inter_planes//2)*3, kernel_size=3, stride=1, padding=1, groups=groups), 50 | BasicConv((inter_planes//2)*3, 2*inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups), 51 | BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups) 52 | ) 53 | 54 | self.ConvLinear = BasicConv(6*inter_planes, out_planes, kernel_size=1, stride=1, relu=False) 55 | self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False) 56 | self.relu = nn.ReLU(inplace=False) 57 | 58 | def forward(self,x): 59 | x0 = self.branch0(x) 60 | x1 = self.branch1(x) 61 | x2 = self.branch2(x) 62 | 63 | out = torch.cat((x0,x1,x2),1) 64 | out = self.ConvLinear(out) 65 | short = self.shortcut(x) 66 | out = out*self.scale + short 67 | out = self.relu(out) 68 | 69 | return out 70 | 71 | class Backbone(nn.Module): 72 | def __init__(self, bn=True): 73 | super(Backbone, self).__init__() 74 | 75 | self.conv1_1 = BasicConv(3, 32, kernel_size=3, padding=1, bn=bn) 76 | self.conv1_2 = BasicConv(32, 32, kernel_size=3, padding=1, bn=bn) 77 | self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 2 78 | 79 | self.conv2_1 = BasicConv(32, 64, kernel_size=3, padding=1, bn=bn) 80 | self.conv2_2 = BasicConv(64, 64, kernel_size=3, padding=1, bn=bn) 81 | self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 4 82 | 83 | self.conv3_1 = BasicConv(64, 128, kernel_size=1, bn=bn) 84 | self.conv3_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn) 85 | self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=bn) # 8 86 | 87 | self.conv4_1 = BasicConv(128, 128, kernel_size=1, bn=bn) 88 | self.conv4_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn) #### f1 #### 89 | self.conv4_3 = BasicRFB(128,128,stride = 1,scale=1.0) 90 | self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # 16 91 | 92 | self.conv5_1 = BasicConv(128, 64, kernel_size=1, relu=False, bn=bn) 93 | self.conv5_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=1, bn=bn) #### f2 #### 94 | 95 | self.conv6_1 = BasicConv(128, 64, kernel_size=1, relu=False) 96 | self.conv6_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=2) #### f3 #### 97 | 98 | self.conv7_1 = BasicConv(128, 64, kernel_size=1, relu=False) 99 | self.conv7_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=2) #### f4 #### 100 | 101 | self.conv8_1 = BasicConv(128,64,kernel_size=1, relu=False) 102 | self.conv8_2 = BasicConv(64,128,kernel_size=3) #### f5 #### 103 | 104 | self.conv9_1 = BasicConv(128,64,kernel_size=1, relu=False) 105 | self.conv9_2 = BasicConv(64,64,kernel_size=3) #### f6 #### 106 | 107 | 108 | def forward(self, x): 109 | x = self.conv1_1(x) 110 | x = self.conv1_2(x) 111 | x = self.pool1(x) 112 | 113 | x = self.conv2_1(x) 114 | x = self.conv2_2(x) 115 | x = self.pool2(x) 116 | 117 | x = self.conv3_1(x) 118 | x = self.conv3_2(x) 119 | x = self.pool3(x) 120 | 121 | x = self.conv4_1(x) 122 | x = self.conv4_2(x) 123 | x = self.conv4_3(x) 124 | f1 = x # stride = 8 125 | x = self.pool4(x) 126 | 127 | x = self.conv5_1(x) 128 | x = self.conv5_2(x) 129 | f2 = x # stride = 16 130 | 131 | x = self.conv6_1(x) 132 | x = self.conv6_2(x) 133 | f3 = x # stride = 32 134 | 135 | x = self.conv7_1(x) 136 | x = self.conv7_2(x) 137 | f4 = x # stride = 64 138 | 139 | x = self.conv8_1(x) 140 | x = self.conv8_2(x) 141 | f5 = x # -2 142 | 143 | x = self.conv9_1(x) 144 | x = self.conv9_2(x) 145 | f6 = x # -2 146 | 147 | return f1, f2, f3, f4, f5, f6 148 | 149 | 150 | if __name__ == '__main__': 151 | x = torch.randn(2,3,300,300) 152 | model = Backbone() 153 | features = model(x) 154 | -------------------------------------------------------------------------------- /models/module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import os 5 | 6 | class BasicConv(nn.Module): 7 | 8 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True): 9 | super(BasicConv, self).__init__() 10 | self.out_channels = out_planes 11 | if bn: 12 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False) 13 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) 14 | self.relu = nn.ReLU(inplace=True) if relu else None 15 | else: 16 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True) 17 | self.bn = None 18 | self.relu = nn.ReLU(inplace=True) if relu else None 19 | 20 | def forward(self, x): 21 | x = self.conv(x) 22 | if self.bn is not None: 23 | x = self.bn(x) 24 | if self.relu is not None: 25 | x = self.relu(x) 26 | return x 27 | 28 | 29 | class BasicRFB(nn.Module): 30 | 31 | def __init__(self, in_planes, out_planes, stride=1, scale = 0.1, map_reduce=8, vision=1, groups=1): 32 | super(BasicRFB, self).__init__() 33 | self.scale = scale 34 | self.out_channels = out_planes 35 | inter_planes = in_planes // map_reduce 36 | 37 | self.branch0 = nn.Sequential( 38 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 39 | BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups), 40 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision+1, dilation=vision+1, relu=False, groups=groups) 41 | ) 42 | self.branch1 = nn.Sequential( 43 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 44 | BasicConv(inter_planes, 2*inter_planes, kernel_size=(3,3), stride=stride, padding=(1,1), groups=groups), 45 | BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups) 46 | ) 47 | self.branch2 = nn.Sequential( 48 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False), 49 | BasicConv(inter_planes, (inter_planes//2)*3, kernel_size=3, stride=1, padding=1, groups=groups), 50 | BasicConv((inter_planes//2)*3, 2*inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups), 51 | BasicConv(2*inter_planes, 2*inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups) 52 | ) 53 | 54 | self.ConvLinear = BasicConv(6*inter_planes, out_planes, kernel_size=1, stride=1, relu=False) 55 | self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False) 56 | self.relu = nn.ReLU(inplace=False) 57 | 58 | def forward(self,x): 59 | x0 = self.branch0(x) 60 | x1 = self.branch1(x) 61 | x2 = self.branch2(x) 62 | 63 | out = torch.cat((x0,x1,x2),1) 64 | out = self.ConvLinear(out) 65 | short = self.shortcut(x) 66 | out = out*self.scale + short 67 | out = self.relu(out) 68 | 69 | return out 70 | 71 | class Backbone(nn.Module): 72 | 73 | def __init__(self, bn=True): 74 | super(Backbone, self).__init__() 75 | #self.conv1_1 = BasicConv(3, 32, kernel_size=3, padding=1, bn=bn) 76 | self.conv1_1 = nn.Sequential( 77 | nn.Conv2d(3, 32, 3, stride=1, padding=1, bias=False), 78 | nn.BatchNorm2d(32), 79 | nn.ReLU(inplace=True), 80 | ) 81 | 82 | #self.conv1_2 = BasicConv(32, 32, kernel_size=3, padding=1, bn=bn) 83 | self.conv1_2 = nn.Sequential( 84 | nn.Conv2d(32,32, kernel_size=3, stride=1, padding=1,groups = 32, bias=False), 85 | nn.BatchNorm2d(32), 86 | nn.ReLU(inplace=True), 87 | 88 | nn.Conv2d(32, 32, 1, 1, 0, bias=False), 89 | nn.BatchNorm2d(32), 90 | nn.ReLU(inplace=True), 91 | ) 92 | self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 2 93 | 94 | #self.conv2_1 = BasicConv(32, 64, kernel_size=3, padding=1, bn=bn) 95 | self.conv2_1 = nn.Sequential( 96 | nn.Conv2d(32,32, kernel_size=3, stride=1, padding=1,groups = 32, bias=False), 97 | nn.BatchNorm2d(32), 98 | nn.ReLU(inplace=True), 99 | 100 | nn.Conv2d(32, 64, 1, 1, 0, bias=False), 101 | nn.BatchNorm2d(64), 102 | nn.ReLU(inplace=True), 103 | ) 104 | #self.conv2_2 = BasicConv(64, 64, kernel_size=3, padding=1, bn=bn) 105 | self.conv2_2 = nn.Sequential( 106 | nn.Conv2d(64,64, kernel_size=3, stride=1, padding=1,groups = 64, bias=False), 107 | nn.BatchNorm2d(64), 108 | nn.ReLU(inplace=True), 109 | 110 | nn.Conv2d(64, 64, 1, 1, 0, bias=False), 111 | nn.BatchNorm2d(64), 112 | nn.ReLU(inplace=True), 113 | ) 114 | self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 4 115 | 116 | self.conv3_1 = BasicConv(64, 128, kernel_size=1, bn=bn) 117 | #self.conv3_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn) 118 | self.conv3_2 = nn.Sequential( 119 | nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1,groups = 128, bias=False), 120 | nn.BatchNorm2d(128), 121 | nn.ReLU(inplace=True), 122 | 123 | nn.Conv2d(128, 128, 1, 1, 0, bias=False), 124 | nn.BatchNorm2d(128), 125 | nn.ReLU(inplace=True), 126 | ) 127 | self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=bn) # 8 128 | 129 | self.conv4_1 = BasicConv(128, 128, kernel_size=1, bn=bn) 130 | #conv4_2 = BasicConv(128, 128, kernel_size=3, padding=1, bn=bn) #### f1 #### 131 | self.conv4_2 = nn.Sequential( 132 | nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1,groups = 128, bias=False), 133 | nn.BatchNorm2d(128), 134 | nn.ReLU(inplace=True), 135 | 136 | nn.Conv2d(128, 128, 1, 1, 0, bias=False), 137 | nn.BatchNorm2d(128), 138 | nn.ReLU(inplace=True), 139 | ) 140 | self.conv4_3 = BasicRFB(128,128,stride = 1,scale=1.0) 141 | self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # 16 142 | 143 | self.conv5_1 = BasicConv(128, 64, kernel_size=1, relu=False, bn=bn) 144 | #self.conv5_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=1, bn=bn) #### f2 #### 145 | self.conv5_2 = nn.Sequential( 146 | nn.Conv2d(64,64, kernel_size=3, stride=1, padding=1,groups = 64, bias=False), 147 | nn.BatchNorm2d(64), 148 | nn.ReLU(inplace=True), 149 | 150 | nn.Conv2d(64, 128, 1, 1, 0, bias=False), 151 | nn.BatchNorm2d(128), 152 | nn.ReLU(inplace=True), 153 | ) 154 | 155 | self.conv6_1 = BasicConv(128, 64, kernel_size=1, relu=False) 156 | #self.conv6_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=2) #### f3 #### 157 | self.conv6_2 = nn.Sequential( 158 | nn.Conv2d(64,64, kernel_size=3, stride=2, padding=1,groups = 64, bias=False), 159 | nn.BatchNorm2d(64), 160 | nn.ReLU(inplace=True), 161 | 162 | nn.Conv2d(64, 128, 1, 1, 0, bias=False), 163 | nn.BatchNorm2d(128), 164 | nn.ReLU(inplace=True), 165 | ) 166 | 167 | self.conv7_1 = BasicConv(128, 64, kernel_size=1, relu=False) 168 | #self.conv7_2 = BasicConv(64, 128, kernel_size=3, padding=1, stride=2) #### f4 #### 169 | self.conv7_2 = nn.Sequential( 170 | nn.Conv2d(64,64, kernel_size=3, stride=2, padding=1,groups = 64, bias=False), 171 | nn.BatchNorm2d(64), 172 | nn.ReLU(inplace=True), 173 | 174 | nn.Conv2d(64, 128, 1, 1, 0, bias=False), 175 | nn.BatchNorm2d(128), 176 | nn.ReLU(inplace=True), 177 | ) 178 | 179 | self.conv8_1 = BasicConv(128,64,kernel_size=1, relu=False) 180 | #self.conv8_2 = BasicConv(64,128,kernel_size=3) #### f5 #### 181 | self.conv8_2 = nn.Sequential( 182 | nn.Conv2d(64,64, kernel_size=3, stride=1, padding=0,groups = 64, bias=False), 183 | nn.BatchNorm2d(64), 184 | nn.ReLU(inplace=True), 185 | 186 | nn.Conv2d(64, 128, 1, 1, 0, bias=False), 187 | nn.BatchNorm2d(128), 188 | nn.ReLU(inplace=True), 189 | ) 190 | 191 | self.conv9_1 = BasicConv(128,64,kernel_size=1, relu=False) 192 | #self.conv9_2 = BasicConv(64,64,kernel_size=3) #### f6 #### 193 | self.conv9_2 = nn.Sequential( 194 | nn.Conv2d(64,64, kernel_size=3, stride=1, padding=0,groups = 64, bias=False), 195 | nn.BatchNorm2d(64), 196 | nn.ReLU(inplace=True), 197 | 198 | nn.Conv2d(64, 64, 1, 1, 0, bias=False), 199 | nn.BatchNorm2d(64), 200 | nn.ReLU(inplace=True), 201 | ) 202 | 203 | 204 | 205 | 206 | def forward(self, x): 207 | x = self.conv1_1(x) 208 | x = self.conv1_2(x) 209 | x = self.pool1(x) 210 | 211 | x = self.conv2_1(x) 212 | x = self.conv2_2(x) 213 | x = self.pool2(x) 214 | 215 | x = self.conv3_1(x) 216 | x = self.conv3_2(x) 217 | x = self.pool3(x) 218 | 219 | x = self.conv4_1(x) 220 | x = self.conv4_2(x) 221 | x = self.conv4_3(x) 222 | f1 = x # stride = 8 223 | x = self.pool4(x) 224 | 225 | x = self.conv5_1(x) 226 | x = self.conv5_2(x) 227 | f2 = x # stride = 16 228 | 229 | x = self.conv6_1(x) 230 | x = self.conv6_2(x) 231 | f3 = x # stride = 32 232 | 233 | x = self.conv7_1(x) 234 | x = self.conv7_2(x) 235 | f4 = x # stride = 64 236 | 237 | x = self.conv8_1(x) 238 | x = self.conv8_2(x) 239 | f5 = x # -2 240 | 241 | x = self.conv9_1(x) 242 | x = self.conv9_2(x) 243 | f6 = x # -2 244 | 245 | return f1, f2, f3, f4, f5, f6 246 | 247 | 248 | if __name__ == '__main__': 249 | x = torch.randn(2,3,300,300) 250 | model = Backbone() 251 | features = model(x) 252 | -------------------------------------------------------------------------------- /train_RFB.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import os 4 | import torch 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | import torch.backends.cudnn as cudnn 8 | import torchvision.transforms as transforms 9 | import torch.nn.init as init 10 | import argparse 11 | import numpy as np 12 | from torch.autograd import Variable 13 | import torch.utils.data as data 14 | from data import VOCroot, VOC_Config, AnnotationTransform, VOCDetection, detection_collate, BaseTransform, preproc 15 | from models.RFB_Net_vgg import build_net 16 | from layers.modules import MultiBoxLoss 17 | from layers.functions import PriorBox 18 | import time 19 | from datetime import datetime 20 | from utils.visualize import * 21 | from tensorboardX import SummaryWriter 22 | 23 | parser = argparse.ArgumentParser( 24 | description='Receptive Field Block Net Training') 25 | parser.add_argument('-max','--max_epoch', default=600, 26 | type=int, help='max epoch for retraining') 27 | parser.add_argument('-b', '--batch_size', default=32, 28 | type=int, help='Batch size for training') 29 | parser.add_argument('--ngpu', default=2, type=int, help='gpus') 30 | parser.add_argument('--lr', '--learning-rate', 31 | default=0.08, type=float, help='initial learning rate') 32 | parser.add_argument('--save_folder', default='./weights/', 33 | help='Location to save checkpoint models') 34 | args = parser.parse_args() 35 | 36 | if not os.path.exists(args.save_folder): 37 | os.mkdir(args.save_folder) 38 | 39 | img_dim = 300 40 | p = 0.5 41 | train_sets = [('2007', 'person_trainval')] 42 | cfg = VOC_Config 43 | rgb_means = (104, 117, 123) 44 | batch_size = args.batch_size 45 | 46 | # tensorboard log directory 47 | # LOG_DIR = 'runs' 48 | log_path = os.path.join('runs', datetime.now().isoformat()) 49 | if not os.path.exists(log_path): 50 | os.makedirs(log_path) 51 | writer = SummaryWriter(log_dir=log_path) 52 | 53 | 54 | net = build_net('train', img_dim, num_classes=3) 55 | 56 | if args.ngpu > 1: 57 | net = torch.nn.DataParallel(net) 58 | 59 | net.cuda() 60 | cudnn.benchmark = True 61 | 62 | optimizer = optim.SGD(net.parameters(), lr=args.lr, 63 | momentum=0.9, weight_decay=1e-4) 64 | 65 | criterion = MultiBoxLoss(num_classes=3, 66 | overlap_thresh=0.4, 67 | prior_for_matching=True, 68 | bkg_label=0, 69 | neg_mining=True, 70 | neg_pos=3, 71 | neg_overlap=0.3, 72 | encode_target=False) 73 | 74 | priorbox = PriorBox(cfg) 75 | with torch.no_grad(): 76 | priors = priorbox.forward() 77 | priors = priors.cuda() 78 | 79 | 80 | def train(): 81 | net.train() 82 | # loss counters 83 | loc_loss = 0 # epoch 84 | conf_loss = 0 85 | epoch = 0 86 | print('Loading Dataset...') 87 | 88 | dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) 89 | 90 | epoch_size = len(dataset) // args.batch_size 91 | max_iter = args.max_epoch * epoch_size 92 | 93 | stepvalues = (250 * epoch_size, 350 * epoch_size, 500 * epoch_size) 94 | step_index = 0 95 | start_iter = 0 96 | # wangsong sing a song! 97 | lr = args.lr 98 | for iteration in range(start_iter, max_iter): 99 | if iteration % epoch_size == 0: 100 | if (epoch > 10 and epoch % 10 == 0) or (epoch > 105 and epoch % 2 == 0): 101 | torch.save(net.state_dict(), args.save_folder + 'epoches_' + 102 | repr(epoch).zfill(3) + '.pth') 103 | # create batch iterator 104 | batch_iterator = iter(data.DataLoader(dataset, batch_size, 105 | shuffle=True, num_workers=8, collate_fn=detection_collate)) 106 | loc_loss = 0 107 | conf_loss = 0 108 | epoch += 1 109 | 110 | load_t0 = time.time() 111 | if iteration in stepvalues: 112 | step_index += 1 113 | lr = adjust_learning_rate(optimizer, 0.2, epoch, step_index, iteration, epoch_size) 114 | 115 | 116 | images, targets = next(batch_iterator) 117 | 118 | images = Variable(images.cuda()) 119 | targets = [Variable(anno.cuda()) for anno in targets] 120 | 121 | # forward 122 | t0 = time.time() 123 | out = net(images) 124 | # backprop 125 | optimizer.zero_grad() 126 | loss_l, loss_c = criterion(out, priors, targets) 127 | loss = loss_l + loss_c 128 | loss.backward() 129 | optimizer.step() 130 | t1 = time.time() 131 | loc_loss += loss_l.item() 132 | conf_loss += loss_c.item() 133 | load_t1 = time.time() 134 | 135 | # visualization 136 | visualize_total_loss(writer, loss.item(), iteration) 137 | visualize_loc_loss(writer, loss_l.item(), iteration) 138 | visualize_conf_loss(writer, loss_c.item(), iteration) 139 | 140 | if iteration % 10 == 0: 141 | print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) 142 | + '|| Totel iter ' + 143 | repr(iteration) + ' || L: %.4f C: %.4f||' % ( 144 | loss_l.item(),loss_c.item()) + 145 | 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) 146 | 147 | torch.save(net.state_dict(), args.save_folder + 'epoches_' + 148 | repr(epoch).zfill(3) + '.pth') 149 | 150 | 151 | def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size): 152 | """Sets the learning rate 153 | # Adapted from PyTorch Imagenet example: 154 | # https://github.com/pytorch/examples/blob/master/imagenet/main.py 155 | """ 156 | if epoch < 11: 157 | lr = 1e-8 + (args.lr-1e-8) * iteration / (epoch_size * 10) 158 | else: 159 | lr = args.lr * (gamma ** (step_index)) 160 | for param_group in optimizer.param_groups: 161 | param_group['lr'] = lr 162 | return lr 163 | 164 | 165 | if __name__ == '__main__': 166 | train() 167 | -------------------------------------------------------------------------------- /utils/box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | import numpy as np 5 | if torch.cuda.is_available(): 6 | import torch.backends.cudnn as cudnn 7 | 8 | 9 | def point_form(boxes): 10 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax) 11 | representation for comparison to point form ground truth data. 12 | Args: 13 | boxes: (tensor) center-size default boxes from priorbox layers. 14 | Return: 15 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 16 | """ 17 | return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin 18 | boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax 19 | 20 | 21 | def center_size(boxes): 22 | """ Convert prior_boxes to (cx, cy, w, h) 23 | representation for comparison to center-size form ground truth data. 24 | Args: 25 | boxes: (tensor) point_form boxes 26 | Return: 27 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 28 | """ 29 | return torch.cat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy 30 | boxes[:, 2:] - boxes[:, :2], 1) # w, h 31 | 32 | 33 | def intersect(box_a, box_b): 34 | """ We resize both tensors to [A,B,2] without new malloc: 35 | [A,2] -> [A,1,2] -> [A,B,2] 36 | [B,2] -> [1,B,2] -> [A,B,2] 37 | Then we compute the area of intersect between box_a and box_b. 38 | Args: 39 | box_a: (tensor) bounding boxes, Shape: [A,4]. 40 | box_b: (tensor) bounding boxes, Shape: [B,4]. 41 | Return: 42 | (tensor) intersection area, Shape: [A,B]. 43 | """ 44 | A = box_a.size(0) 45 | B = box_b.size(0) 46 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), 47 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) 48 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), 49 | box_b[:, :2].unsqueeze(0).expand(A, B, 2)) 50 | inter = torch.clamp((max_xy - min_xy), min=0) 51 | return inter[:, :, 0] * inter[:, :, 1] 52 | 53 | 54 | def jaccard(box_a, box_b): 55 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 56 | is simply the intersection over union of two boxes. Here we operate on 57 | ground truth boxes and default boxes. 58 | E.g.: 59 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 60 | Args: 61 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 62 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 63 | Return: 64 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 65 | """ 66 | inter = intersect(box_a, box_b) 67 | area_a = ((box_a[:, 2]-box_a[:, 0]) * 68 | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] 69 | area_b = ((box_b[:, 2]-box_b[:, 0]) * 70 | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] 71 | union = area_a + area_b - inter 72 | return inter / union # [A,B] 73 | 74 | def matrix_iou(a,b): 75 | """ 76 | return iou of a and b, numpy version for data augenmentation 77 | """ 78 | lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) 79 | rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) 80 | 81 | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) 82 | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) 83 | area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) 84 | return area_i / (area_a[:, np.newaxis] + area_b - area_i) 85 | 86 | 87 | def matrix_iof(a, b): 88 | """ 89 | return iof of a and b, numpy version for data augenmentation 90 | """ 91 | lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) 92 | rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) 93 | 94 | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) 95 | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) 96 | return area_i / np.maximum(area_a[:, np.newaxis], 1) 97 | 98 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): 99 | """Match each prior box with the ground truth box of the highest jaccard 100 | overlap, encode the bounding boxes, then return the matched indices 101 | corresponding to both confidence and location preds. 102 | Args: 103 | threshold: (float) The overlap threshold used when mathing boxes. 104 | truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. 105 | priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. 106 | variances: (tensor) Variances corresponding to each prior coord, 107 | Shape: [num_priors, 4]. 108 | labels: (tensor) All the class labels for the image, Shape: [num_obj]. 109 | loc_t: (tensor) Tensor to be filled w/ endcoded location targets. 110 | conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. 111 | idx: (int) current batch index 112 | Return: 113 | The matched indices corresponding to 1)location and 2)confidence preds. 114 | """ 115 | # jaccard index 116 | overlaps = jaccard( 117 | truths, 118 | point_form(priors) 119 | ) 120 | # (Bipartite Matching) 121 | # [1,num_objects] best prior for each ground truth 122 | best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) 123 | # [1,num_priors] best ground truth for each prior 124 | best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) 125 | best_truth_idx.squeeze_(0) 126 | best_truth_overlap.squeeze_(0) 127 | best_prior_idx.squeeze_(1) 128 | best_prior_overlap.squeeze_(1) 129 | best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior 130 | # TODO refactor: index best_prior_idx with long tensor 131 | # ensure every gt matches with its prior of max overlap 132 | for j in range(best_prior_idx.size(0)): 133 | best_truth_idx[best_prior_idx[j]] = j 134 | matches = truths[best_truth_idx] # Shape: [num_priors,4] 135 | conf = labels[best_truth_idx] # Shape: [num_priors] 136 | conf[best_truth_overlap < threshold] = 0 # label as background 137 | loc = encode(matches, priors, variances) 138 | loc_t[idx] = loc # [num_priors,4] encoded offsets to learn 139 | conf_t[idx] = conf # [num_priors] top class label for each prior 140 | 141 | def encode(matched, priors, variances): 142 | """Encode the variances from the priorbox layers into the ground truth boxes 143 | we have matched (based on jaccard overlap) with the prior boxes. 144 | Args: 145 | matched: (tensor) Coords of ground truth for each prior in point-form 146 | Shape: [num_priors, 4]. 147 | priors: (tensor) Prior boxes in center-offset form 148 | Shape: [num_priors,4]. 149 | variances: (list[float]) Variances of priorboxes 150 | Return: 151 | encoded boxes (tensor), Shape: [num_priors, 4] 152 | """ 153 | 154 | # dist b/t match center and prior's center 155 | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] 156 | # encode variance 157 | g_cxcy /= (variances[0] * priors[:, 2:]) 158 | # match wh / prior wh 159 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] 160 | g_wh = torch.log(g_wh) / variances[1] 161 | # return target for smooth_l1_loss 162 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] 163 | 164 | 165 | def encode_multi(matched, priors, offsets, variances): 166 | """Encode the variances from the priorbox layers into the ground truth boxes 167 | we have matched (based on jaccard overlap) with the prior boxes. 168 | Args: 169 | matched: (tensor) Coords of ground truth for each prior in point-form 170 | Shape: [num_priors, 4]. 171 | priors: (tensor) Prior boxes in center-offset form 172 | Shape: [num_priors,4]. 173 | variances: (list[float]) Variances of priorboxes 174 | Return: 175 | encoded boxes (tensor), Shape: [num_priors, 4] 176 | """ 177 | 178 | # dist b/t match center and prior's center 179 | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] - offsets[:,:2] 180 | # encode variance 181 | #g_cxcy /= (variances[0] * priors[:, 2:]) 182 | g_cxcy.div_(variances[0] * offsets[:, 2:]) 183 | # match wh / prior wh 184 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] 185 | g_wh = torch.log(g_wh) / variances[1] 186 | # return target for smooth_l1_loss 187 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] 188 | 189 | # Adapted from https://github.com/Hakuyume/chainer-ssd 190 | def decode(loc, priors, variances): 191 | """Decode locations from predictions using priors to undo 192 | the encoding we did for offset regression at train time. 193 | Args: 194 | loc (tensor): location predictions for loc layers, 195 | Shape: [num_priors,4] 196 | priors (tensor): Prior boxes in center-offset form. 197 | Shape: [num_priors,4]. 198 | variances: (list[float]) Variances of priorboxes 199 | Return: 200 | decoded bounding box predictions 201 | """ 202 | 203 | boxes = torch.cat(( 204 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 205 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 206 | boxes[:, :2] -= boxes[:, 2:] / 2 207 | boxes[:, 2:] += boxes[:, :2] 208 | return boxes 209 | 210 | def decode_multi(loc, priors, offsets, variances): 211 | """Decode locations from predictions using priors to undo 212 | the encoding we did for offset regression at train time. 213 | Args: 214 | loc (tensor): location predictions for loc layers, 215 | Shape: [num_priors,4] 216 | priors (tensor): Prior boxes in center-offset form. 217 | Shape: [num_priors,4]. 218 | variances: (list[float]) Variances of priorboxes 219 | Return: 220 | decoded bounding box predictions 221 | """ 222 | 223 | boxes = torch.cat(( 224 | priors[:, :2] + offsets[:,:2]+ loc[:, :2] * variances[0] * offsets[:, 2:], 225 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 226 | boxes[:, :2] -= boxes[:, 2:] / 2 227 | boxes[:, 2:] += boxes[:, :2] 228 | return boxes 229 | 230 | def log_sum_exp(x): 231 | """Utility function for computing log_sum_exp while determining 232 | This will be used to determine unaveraged confidence loss across 233 | all examples in a batch. 234 | Args: 235 | x (Variable(tensor)): conf_preds from conf layers 236 | """ 237 | x_max = x.data.max() 238 | return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max 239 | 240 | 241 | # Original author: Francisco Massa: 242 | # https://github.com/fmassa/object-detection.torch 243 | # Ported to PyTorch by Max deGroot (02/01/2017) 244 | def nms(boxes, scores, overlap=0.5, top_k=200): 245 | """Apply non-maximum suppression at test time to avoid detecting too many 246 | overlapping bounding boxes for a given object. 247 | Args: 248 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. 249 | scores: (tensor) The class predscores for the img, Shape:[num_priors]. 250 | overlap: (float) The overlap thresh for suppressing unnecessary boxes. 251 | top_k: (int) The Maximum number of box preds to consider. 252 | Return: 253 | The indices of the kept boxes with respect to num_priors. 254 | """ 255 | 256 | keep = torch.Tensor(scores.size(0)).fill_(0).long() 257 | if boxes.numel() == 0: 258 | return keep 259 | x1 = boxes[:, 0] 260 | y1 = boxes[:, 1] 261 | x2 = boxes[:, 2] 262 | y2 = boxes[:, 3] 263 | area = torch.mul(x2 - x1, y2 - y1) 264 | v, idx = scores.sort(0) # sort in ascending order 265 | # I = I[v >= 0.01] 266 | idx = idx[-top_k:] # indices of the top-k largest vals 267 | xx1 = boxes.new() 268 | yy1 = boxes.new() 269 | xx2 = boxes.new() 270 | yy2 = boxes.new() 271 | w = boxes.new() 272 | h = boxes.new() 273 | 274 | # keep = torch.Tensor() 275 | count = 0 276 | while idx.numel() > 0: 277 | i = idx[-1] # index of current largest val 278 | # keep.append(i) 279 | keep[count] = i 280 | count += 1 281 | if idx.size(0) == 1: 282 | break 283 | idx = idx[:-1] # remove kept element from view 284 | # load bboxes of next highest vals 285 | torch.index_select(x1, 0, idx, out=xx1) 286 | torch.index_select(y1, 0, idx, out=yy1) 287 | torch.index_select(x2, 0, idx, out=xx2) 288 | torch.index_select(y2, 0, idx, out=yy2) 289 | # store element-wise max with next highest score 290 | xx1 = torch.clamp(xx1, min=x1[i]) 291 | yy1 = torch.clamp(yy1, min=y1[i]) 292 | xx2 = torch.clamp(xx2, max=x2[i]) 293 | yy2 = torch.clamp(yy2, max=y2[i]) 294 | w.resize_as_(xx2) 295 | h.resize_as_(yy2) 296 | w = xx2 - xx1 297 | h = yy2 - yy1 298 | # check sizes of xx1 and xx2.. after each iteration 299 | w = torch.clamp(w, min=0.0) 300 | h = torch.clamp(h, min=0.0) 301 | inter = w*h 302 | # IoU = i / (area(a) + area(b) - i) 303 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 304 | union = (rem_areas - inter) + area[i] 305 | IoU = inter/union # store result in iou 306 | # keep only elements with an IoU <= overlap 307 | idx = idx[IoU.le(overlap)] 308 | return keep, count 309 | 310 | 311 | -------------------------------------------------------------------------------- /utils/build.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 19 | for dir in path.split(os.pathsep): 20 | binpath = pjoin(dir, name) 21 | if os.path.exists(binpath): 22 | return os.path.abspath(binpath) 23 | return None 24 | 25 | 26 | def locate_cuda(): 27 | """Locate the CUDA environment on the system 28 | 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | 32 | Starts by looking for the CUDAHOME env variable. If not found, everything 33 | is based on finding 'nvcc' in the PATH. 34 | """ 35 | 36 | # first check if the CUDAHOME env variable is in use 37 | if 'CUDAHOME' in os.environ: 38 | home = os.environ['CUDAHOME'] 39 | nvcc = pjoin(home, 'bin', 'nvcc') 40 | else: 41 | # otherwise, search the PATH for NVCC 42 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 43 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 44 | if nvcc is None: 45 | raise EnvironmentError('The nvcc binary could not be ' 46 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 47 | home = os.path.dirname(os.path.dirname(nvcc)) 48 | 49 | cudaconfig = {'home': home, 'nvcc': nvcc, 50 | 'include': pjoin(home, 'include'), 51 | 'lib64': pjoin(home, 'lib64')} 52 | for k, v in cudaconfig.items(): 53 | if not os.path.exists(v): 54 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 55 | 56 | return cudaconfig 57 | 58 | 59 | CUDA = locate_cuda() 60 | 61 | # Obtain the numpy include directory. This logic works across numpy versions. 62 | try: 63 | numpy_include = np.get_include() 64 | except AttributeError: 65 | numpy_include = np.get_numpy_include() 66 | 67 | 68 | def customize_compiler_for_nvcc(self): 69 | """inject deep into distutils to customize how the dispatch 70 | to gcc/nvcc works. 71 | 72 | If you subclass UnixCCompiler, it's not trivial to get your subclass 73 | injected in, and still have the right customizations (i.e. 74 | distutils.sysconfig.customize_compiler) run on it. So instead of going 75 | the OO route, I have this. Note, it's kindof like a wierd functional 76 | subclassing going on.""" 77 | 78 | # tell the compiler it can processes .cu 79 | self.src_extensions.append('.cu') 80 | 81 | # save references to the default compiler_so and _comple methods 82 | default_compiler_so = self.compiler_so 83 | super = self._compile 84 | 85 | # now redefine the _compile method. This gets executed for each 86 | # object but distutils doesn't have the ability to change compilers 87 | # based on source extension: we add it. 88 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 89 | print(extra_postargs) 90 | if os.path.splitext(src)[1] == '.cu': 91 | # use the cuda for .cu files 92 | self.set_executable('compiler_so', CUDA['nvcc']) 93 | # use only a subset of the extra_postargs, which are 1-1 translated 94 | # from the extra_compile_args in the Extension class 95 | postargs = extra_postargs['nvcc'] 96 | else: 97 | postargs = extra_postargs['gcc'] 98 | 99 | super(obj, src, ext, cc_args, postargs, pp_opts) 100 | # reset the default compiler_so, which we might have changed for cuda 101 | self.compiler_so = default_compiler_so 102 | 103 | # inject our redefined _compile method into the class 104 | self._compile = _compile 105 | 106 | 107 | # run the customize_compiler 108 | class custom_build_ext(build_ext): 109 | def build_extensions(self): 110 | customize_compiler_for_nvcc(self.compiler) 111 | build_ext.build_extensions(self) 112 | 113 | 114 | ext_modules = [ 115 | Extension( 116 | "nms.cpu_nms", 117 | ["nms/cpu_nms.pyx"], 118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 119 | include_dirs=[numpy_include] 120 | ), 121 | Extension('nms.gpu_nms', 122 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 123 | library_dirs=[CUDA['lib64']], 124 | libraries=['cudart'], 125 | language='c++', 126 | runtime_library_dirs=[CUDA['lib64']], 127 | # this syntax is specific to this build system 128 | # we're only going to use certain compiler args with nvcc and not with gcc 129 | # the implementation of this trick is in customize_compiler() below 130 | extra_compile_args={'gcc': ["-Wno-unused-function"], 131 | 'nvcc': ['-arch=sm_52', 132 | '--ptxas-options=-v', 133 | '-c', 134 | '--compiler-options', 135 | "'-fPIC'"]}, 136 | include_dirs=[numpy_include, CUDA['include']] 137 | ), 138 | ] 139 | 140 | setup( 141 | name='mot_utils', 142 | ext_modules=ext_modules, 143 | # inject our custom trigger 144 | cmdclass={'build_ext': custom_build_ext}, 145 | ) 146 | -------------------------------------------------------------------------------- /utils/build/temp.linux-aarch64-3.6/nms/cpu_nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-aarch64-3.6/nms/cpu_nms.o -------------------------------------------------------------------------------- /utils/build/temp.linux-aarch64-3.6/nms/gpu_nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-aarch64-3.6/nms/gpu_nms.o -------------------------------------------------------------------------------- /utils/build/temp.linux-aarch64-3.6/nms/nms_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-aarch64-3.6/nms/nms_kernel.o -------------------------------------------------------------------------------- /utils/build/temp.linux-x86_64-3.6/nms/cpu_nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-x86_64-3.6/nms/cpu_nms.o -------------------------------------------------------------------------------- /utils/build/temp.linux-x86_64-3.6/nms/gpu_nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-x86_64-3.6/nms/gpu_nms.o -------------------------------------------------------------------------------- /utils/build/temp.linux-x86_64-3.6/nms/nms_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/build/temp.linux-x86_64-3.6/nms/nms_kernel.o -------------------------------------------------------------------------------- /utils/nms/cpu_nms.cpython-36m-aarch64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/nms/cpu_nms.cpython-36m-aarch64-linux-gnu.so -------------------------------------------------------------------------------- /utils/nms/cpu_nms.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/nms/cpu_nms.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /utils/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0): 71 | cdef unsigned int N = boxes.shape[0] 72 | cdef float iw, ih, box_area 73 | cdef float ua 74 | cdef int pos = 0 75 | cdef float maxscore = 0 76 | cdef int maxpos = 0 77 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov 78 | 79 | for i in range(N): 80 | maxscore = boxes[i, 4] 81 | maxpos = i 82 | 83 | tx1 = boxes[i,0] 84 | ty1 = boxes[i,1] 85 | tx2 = boxes[i,2] 86 | ty2 = boxes[i,3] 87 | ts = boxes[i,4] 88 | 89 | pos = i + 1 90 | # get max box 91 | while pos < N: 92 | if maxscore < boxes[pos, 4]: 93 | maxscore = boxes[pos, 4] 94 | maxpos = pos 95 | pos = pos + 1 96 | 97 | # add max box as a detection 98 | boxes[i,0] = boxes[maxpos,0] 99 | boxes[i,1] = boxes[maxpos,1] 100 | boxes[i,2] = boxes[maxpos,2] 101 | boxes[i,3] = boxes[maxpos,3] 102 | boxes[i,4] = boxes[maxpos,4] 103 | 104 | # swap ith box with position of max box 105 | boxes[maxpos,0] = tx1 106 | boxes[maxpos,1] = ty1 107 | boxes[maxpos,2] = tx2 108 | boxes[maxpos,3] = ty2 109 | boxes[maxpos,4] = ts 110 | 111 | tx1 = boxes[i,0] 112 | ty1 = boxes[i,1] 113 | tx2 = boxes[i,2] 114 | ty2 = boxes[i,3] 115 | ts = boxes[i,4] 116 | 117 | pos = i + 1 118 | # NMS iterations, note that N changes if detection boxes fall below threshold 119 | while pos < N: 120 | x1 = boxes[pos, 0] 121 | y1 = boxes[pos, 1] 122 | x2 = boxes[pos, 2] 123 | y2 = boxes[pos, 3] 124 | s = boxes[pos, 4] 125 | 126 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 127 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 128 | if iw > 0: 129 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 130 | if ih > 0: 131 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 132 | ov = iw * ih / ua #iou between max box and detection box 133 | 134 | if method == 1: # linear 135 | if ov > Nt: 136 | weight = 1 - ov 137 | else: 138 | weight = 1 139 | elif method == 2: # gaussian 140 | weight = np.exp(-(ov * ov)/sigma) 141 | else: # original NMS 142 | if ov > Nt: 143 | weight = 0 144 | else: 145 | weight = 1 146 | 147 | boxes[pos, 4] = weight*boxes[pos, 4] 148 | 149 | # if box score falls below threshold, discard the box by swapping with last box 150 | # update N 151 | if boxes[pos, 4] < threshold: 152 | boxes[pos,0] = boxes[N-1, 0] 153 | boxes[pos,1] = boxes[N-1, 1] 154 | boxes[pos,2] = boxes[N-1, 2] 155 | boxes[pos,3] = boxes[N-1, 3] 156 | boxes[pos,4] = boxes[N-1, 4] 157 | N = N - 1 158 | pos = pos - 1 159 | 160 | pos = pos + 1 161 | 162 | keep = [i for i in range(N)] 163 | return keep 164 | -------------------------------------------------------------------------------- /utils/nms/gpu_nms.cpython-36m-aarch64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/nms/gpu_nms.cpython-36m-aarch64-linux-gnu.so -------------------------------------------------------------------------------- /utils/nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/utils/nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /utils/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /utils/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /utils/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /utils/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .nms.cpu_nms import cpu_nms, cpu_soft_nms 9 | from .nms.gpu_nms import gpu_nms 10 | 11 | 12 | # def nms(dets, thresh, force_cpu=False): 13 | # """Dispatch to either CPU or GPU NMS implementations.""" 14 | # 15 | # if dets.shape[0] == 0: 16 | # return [] 17 | # if cfg.USE_GPU_NMS and not force_cpu: 18 | # return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # else: 20 | # return cpu_nms(dets, thresh) 21 | 22 | 23 | def nms(dets, thresh, force_cpu=False): 24 | """Dispatch to either CPU or GPU NMS implementations.""" 25 | 26 | if dets.shape[0] == 0: 27 | return [] 28 | if force_cpu: 29 | #return cpu_soft_nms(dets, thresh, method = 0) 30 | return cpu_nms(dets, thresh) 31 | return gpu_nms(dets, thresh) 32 | -------------------------------------------------------------------------------- /utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /utils/visualize.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from torch.utils.data import DataLoader 9 | from torch.autograd import Variable 10 | from termcolor import cprint 11 | 12 | def print_info(info, _type=None): 13 | if _type is not None: 14 | if isinstance(info,str): 15 | cprint(info, _type[0], attrs=[_type[1]]) 16 | elif isinstance(info,list): 17 | for i in range(info): 18 | cprint(i, _type[0], attrs=[_type[1]]) 19 | else: 20 | print(info) 21 | 22 | def get_lastlayer_params(net): 23 | """get last trainable layer of a net 24 | Args: 25 | network architectur 26 | 27 | Returns: 28 | last layer weights and last layer bias 29 | """ 30 | last_layer_weights = None 31 | last_layer_bias = None 32 | for name, para in net.named_parameters(): 33 | if 'weight' in name: 34 | last_layer_weights = para 35 | if 'bias' in name: 36 | last_layer_bias = para 37 | 38 | return last_layer_weights, last_layer_bias 39 | 40 | 41 | def visualize_network(writer, net): 42 | """visualize network architecture""" 43 | input_tensor = torch.Tensor(3, 3, 512, 512) 44 | input_tensor = input_tensor.to(next(net.parameters())) 45 | writer.add_graph(net, Variable(input_tensor, requires_grad=True)) 46 | 47 | 48 | def visualize_lastlayer(writer, net, n_iter): 49 | """visualize last layer grads""" 50 | weights, bias = get_lastlayer_params(net) 51 | writer.add_scalar('LastLayerGradients/grad_norm2_weights', weights.grad.norm(), n_iter) 52 | writer.add_scalar('LastLayerGradients/grad_norm2_bias', bias.grad.norm(), n_iter) 53 | 54 | 55 | def visualize_total_loss(writer, loss, n_iter): 56 | """visualize training loss""" 57 | writer.add_scalar('Train/total_loss', loss, n_iter) 58 | 59 | def visualize_loc_loss(writer, loss, n_iter): 60 | """visualize training loss""" 61 | writer.add_scalar('Train/loc_loss', loss, n_iter) 62 | 63 | def visualize_conf_loss(writer, loss, n_iter): 64 | """visualize training loss""" 65 | writer.add_scalar('Train/conf_loss', loss, n_iter) 66 | 67 | def visualize_param_hist(writer, net, epoch): 68 | """visualize histogram of params""" 69 | for name, param in net.named_parameters(): 70 | layer, attr = os.path.splitext(name) 71 | attr = attr[1:] 72 | writer.add_histogram("{}/{}".format(layer, attr), param, epoch) 73 | 74 | 75 | def visualize_test_acc(writer, acc, epoch): 76 | """visualize test acc""" 77 | writer.add_scalar('Test/AP', acc, epoch) 78 | -------------------------------------------------------------------------------- /weights/epoches_100.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziweizhan/fast-object-detection-nano/8bcb12ed0374318d3910e3aa242f4d1b8e1837f8/weights/epoches_100.pth --------------------------------------------------------------------------------