├── __init__.py ├── helper ├── __init__.py └── voc_provider.py ├── utils ├── __init__.py ├── image_processing.py ├── vis_det.py └── rand_sampler.py ├── dataset ├── __init__.py ├── imdb.py └── pascal_voc.py ├── evaluator ├── __init__.py └── eval_voc.py ├── dataprovider ├── __init__.py └── det.py ├── .gitignore ├── README.md └── .idea └── vcs.xml /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /helper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataprovider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.idea 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | a package for object detection with deep neural network 2 | 3 | # detetection_utils 4 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /dataset/imdb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Imdb(object): 4 | def __init__(self, name): 5 | self.name = name 6 | self.classes = [] 7 | self.num_classes = 0 8 | self.image_set_index = [] 9 | self.num_images = 0 10 | self.labels = None 11 | self.padding = 0 12 | 13 | def image_path_from_index(self, index): 14 | """ 15 | load image full path given specified index 16 | 17 | Parameters: 18 | ---------- 19 | index : int 20 | index of image requested in dataset 21 | 22 | Returns: 23 | ---------- 24 | full path of specified image 25 | """ 26 | raise NotImplementedError 27 | 28 | def label_from_index(self, index): 29 | """ 30 | load ground-truth of image given specified index 31 | 32 | Parameters: 33 | ---------- 34 | index : int 35 | index of image requested in dataset 36 | 37 | Returns: 38 | ---------- 39 | object ground-truths, in format 40 | numpy.array([id, xmin, ymin, xmax, ymax]...) 41 | """ 42 | raise NotImplementedError 43 | -------------------------------------------------------------------------------- /helper/voc_provider.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from utils.rand_sampler import RandCropper, RandPadder 4 | from dataprovider.det import DetIter 5 | from dataset.pascal_voc import PascalVoc 6 | 7 | train_cfg = { 8 | "root_dir": os.path.join(os.path.dirname(__file__), '..'), 9 | "random_samplers": 10 | [ 11 | RandCropper(min_scale=1., max_trials=1, max_sample=1), 12 | RandCropper(min_scale=.3, min_aspect_ratio=.5, max_aspect_ratio=2., min_overlap=.1), 13 | RandCropper(min_scale=.3, min_aspect_ratio=.5, max_aspect_ratio=2., min_overlap=.3), 14 | RandCropper(min_scale=.3, min_aspect_ratio=.5, max_aspect_ratio=2., min_overlap=.5), 15 | RandCropper(min_scale=.3, min_aspect_ratio=.5, max_aspect_ratio=2., min_overlap=.7), 16 | RandPadder(max_scale=2., min_aspect_ratio=.5, max_aspect_ratio=2., min_gt_scale=.05), 17 | RandPadder(max_scale=3., min_aspect_ratio=.5, max_aspect_ratio=2., min_gt_scale=.05), 18 | RandPadder(max_scale=4., min_aspect_ratio=.5, max_aspect_ratio=2., min_gt_scale=.05) 19 | ], 20 | "random_flip": True, 21 | "shuffle": True, 22 | "random_seed": None 23 | } 24 | 25 | # # validation 26 | # cfg.VALID = edict() 27 | # cfg.VALID.RAND_SAMPLERS = [] 28 | # cfg.VALID.RAND_MIRROR = False 29 | # cfg.VALID.INIT_SHUFFLE = False 30 | # cfg.VALID.EPOCH_SHUFFLE = False 31 | # cfg.VALID.RAND_SEED = None 32 | 33 | ssd_prov = DetIter( 34 | imdb=PascalVoc("trainval", "2007", "/unsullied/sharefs/yugang/Dataset/VOC", is_train=True), 35 | batch_size=32, 36 | data_shape=(300, 300), 37 | mean_pixels=[104, 117, 123], 38 | rand_samplers=train_cfg['random_samplers'], 39 | rand_flip=train_cfg['random_flip'], 40 | shuffle=train_cfg['shuffle'], 41 | rand_seed=train_cfg['random_seed'] 42 | ) -------------------------------------------------------------------------------- /utils/image_processing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | def rescale(im, target_size, max_size): 5 | """ 6 | only resize input image to target size and return scale 7 | 8 | Parameters: 9 | ---------- 10 | im : numpy.array 11 | BGR image input by opencv 12 | target_size: int 13 | one dimensional size (the short side) 14 | max_size: int 15 | one dimensional max size (the long side) 16 | 17 | Returns: 18 | ---------- 19 | numpy.array, rescaled image 20 | """ 21 | im_shape = im.shape 22 | im_size_min = np.min(im_shape[0:2]) 23 | im_size_max = np.min(im_shape[0:2]) 24 | im_scale = float(target_size) / float(im_size_min) 25 | # prevent bigger axis from being more than max_size: 26 | if np.round(im_scale * im_size_max) > max_size: 27 | im_scale = float(max_size) / float(im_size_max) 28 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) 29 | return im, im_scale 30 | 31 | def resize(im, target_size, interp_method=cv2.INTER_LINEAR): 32 | """ 33 | resize image to target size regardless of aspect ratio 34 | 35 | Parameters: 36 | ---------- 37 | im : numpy.array 38 | BGR image input by opencv 39 | target_size : tuple (int, int) 40 | (h, w) two dimensional size 41 | Returns: 42 | ---------- 43 | numpy.array, resized image 44 | """ 45 | return cv2.resize(im, target_size, interpolation=interp_method) 46 | 47 | def transform(im, pixel_means): 48 | """ 49 | transform into mxnet tensor 50 | substract pixel size and transform to correct format 51 | 52 | Parameters: 53 | ---------- 54 | im : numpy.array 55 | [height, width, channel] in BGR 56 | pixel_means : list 57 | [[[R, G, B pixel means]]] 58 | 59 | Returns: 60 | ---------- 61 | numpy.array as in shape [channel, height, width] 62 | """ 63 | im = im.copy() 64 | im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)] 65 | im = im.astype(float) 66 | im -= pixel_means 67 | # put channel first 68 | channel_swap = (2, 0, 1) 69 | im_tensor = im.transpose(channel_swap) 70 | return im_tensor 71 | 72 | 73 | def transform_inverse(im_tensor, pixel_means): 74 | """ 75 | transform from mxnet im_tensor to ordinary RGB image 76 | im_tensor is limited to one image 77 | 78 | Parameters: 79 | ---------- 80 | im_tensor : numpy.array 81 | in shape [batch, channel, height, width] 82 | pixel_means: list 83 | [[[R, G, B pixel means]]] 84 | 85 | Returns: 86 | ---------- 87 | im [height, width, channel(RGB)] 88 | """ 89 | assert im_tensor.shape[0] == 1 90 | im_tensor = im_tensor.copy() 91 | # put channel back 92 | channel_swap = (0, 2, 3, 1) 93 | im_tensor = im_tensor.transpose(channel_swap) 94 | im = im_tensor[0] 95 | assert im.shape[2] == 3 96 | im += pixel_means 97 | im = im.astype(np.uint8) 98 | return im 99 | -------------------------------------------------------------------------------- /utils/vis_det.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | @file: vis_det.py 6 | """ 7 | import matplotlib.pyplot as plt 8 | import random 9 | import numpy as np 10 | import cv2 11 | 12 | 13 | def visualize_detection(img, dets, is_show_label=True, classes=None, thresh=0.5): 14 | """ 15 | visualize detections in one image 16 | 17 | Parameters: 18 | ---------- 19 | img : numpy.array image, in bgr format 20 | dets : numpy.array ssd detections, numpy.array([[x1, y1, x2, y2, cls_id, score]...]) 21 | classes : tuple or list of str class names 22 | thresh : float, score threshold 23 | """ 24 | plt.imshow(img) 25 | colors = dict() 26 | for det in dets: 27 | bb = det[:4].astype(int) 28 | 29 | if is_show_label: 30 | cls_id = int(det[4]) 31 | score = det[5] 32 | if cls_id == 0: 33 | continue 34 | if score > thresh: 35 | if cls_id not in colors: 36 | colors[cls_id] = (random.random(), random.random(), random.random()) 37 | rect = plt.Rectangle((bb[0], bb[1]), bb[2] - bb[0], 38 | bb[3] - bb[1], fill=False, 39 | edgecolor=colors[cls_id], 40 | linewidth=3.5) 41 | plt.gca().add_patch(rect) 42 | if classes and len(classes) > cls_id: 43 | cls_name = classes[cls_id] 44 | else: 45 | cls_name = str(cls_id) 46 | plt.gca().text(bb[0], bb[1] - 2, 47 | '{:s} {:.3f}'.format(cls_name, score), 48 | bbox=dict(facecolor=colors[cls_id], alpha=0.5), 49 | fontsize=12, color='white') 50 | else: 51 | rect = plt.Rectangle((bb[0], bb[1]), bb[2] - bb[0], 52 | bb[3] - bb[1], fill=False, 53 | edgecolor=(1, 0, 0), 54 | linewidth=3.5) 55 | plt.gca().add_patch(rect) 56 | plt.show() 57 | 58 | 59 | # visualize_old: use opencv api 60 | def visualize_detection_old(img, dets, is_show_label=True, classes=None, thresh=0.5, name='detection'): 61 | """ 62 | visualize detections in one image 63 | 64 | Parameters: 65 | ---------- 66 | img : numpy.array image, in bgr format 67 | dets : numpy.array ssd detections, numpy.array([[x1, y1, x2, y2, cls_id, score]...]) 68 | classes : tuple or list of str class names 69 | thresh : float, score threshold 70 | """ 71 | im = np.array(img) 72 | colors = dict() 73 | font = cv2.FONT_HERSHEY_SIMPLEX 74 | 75 | for det in dets: 76 | bb = det[:4].astype(int) 77 | if is_show_label: 78 | cls_id = int(det[4]) 79 | score = det[5] 80 | 81 | if cls_id == 0: 82 | continue 83 | if score > thresh: 84 | if cls_id not in colors: 85 | colors[cls_id] = (random.random() * 255, random.random() * 255, random.random() * 255) 86 | 87 | cv2.rectangle(im, (bb[0], bb[1]), (bb[2], bb[3]), colors[cls_id], 3) 88 | 89 | if classes and len(classes) > cls_id: 90 | cls_name = classes[cls_id] 91 | else: 92 | cls_name = str(cls_id) 93 | cv2.putText(im, '{:s} {:.3f}'.format(cls_name, score), (bb[0], bb[1] - 2), \ 94 | font, 0.5, colors[cls_id], 1) 95 | else: 96 | cv2.rectangle(im, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 2) 97 | 98 | cv2.imshow(name, im) 99 | while True: 100 | c = cv2.waitKey(100000) 101 | if c == ord('d'): 102 | return 103 | elif c == ord('n'): 104 | 105 | -------------------------------------------------------------------------------- /dataprovider/det.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from utils.image_processing import resize, transform 4 | from utils.rand_sampler import RandSampler 5 | 6 | class DetIter(object): 7 | """ 8 | DetIter() 9 | """ 10 | def __init__(self, imdb, batch_size, data_shape, mean_pixels=[128, 128, 128], 11 | rand_samplers=[], rand_flip=False, shuffle=False, rand_seed=None): 12 | 13 | self._imdb = imdb 14 | self.batch_size = batch_size 15 | if isinstance(data_shape, int): 16 | data_shape = (data_shape, data_shape) 17 | self._data_shape = data_shape 18 | self._mean_pixels = mean_pixels 19 | self.is_train = self._imdb.is_train 20 | 21 | # image shuffle 22 | if rand_seed: 23 | np.random.seed(rand_seed) 24 | self._shuffle = shuffle 25 | self._size = self._imdb.num_images 26 | self._current = 0 27 | self._fake_index = np.arange(self._size) 28 | if self._shuffle: 29 | np.random.shuffle(self._fake_index) 30 | 31 | # augmentation 32 | self._rand_flip = rand_flip 33 | if not rand_samplers: 34 | self._rand_samplers = [] 35 | else: 36 | if not isinstance(rand_samplers, list): 37 | rand_samplers = [rand_samplers] 38 | assert isinstance(rand_samplers[0], RandSampler), "Invalid rand sampler" 39 | self._rand_samplers = rand_samplers 40 | 41 | def _get_batch(self): 42 | batch_data = [] 43 | batch_label = [] 44 | indices = [] 45 | for i in range(self.batch_size): 46 | if (self._current + i) >= self._size: 47 | if not self.is_train: 48 | continue 49 | # use padding from middle in each epoch 50 | idx = (self._current + i + self._size / 2) % self._size 51 | fake_index = self._fake_index[idx] 52 | else: 53 | fake_index = self._fake_index[self._current + i] 54 | 55 | im_path = self._imdb.image_path_from_index(fake_index) 56 | img_id = self._imdb.image_set_index[fake_index] 57 | img = cv2.imread(im_path) 58 | gt = self._imdb.label_from_index(fake_index).copy() if self.is_train else None 59 | data, label = self._data_augmentation(img, gt) 60 | batch_data.append(data) 61 | if self.is_train: 62 | batch_label.append(label) 63 | indices.append(img_id) 64 | 65 | # pad data if not fully occupied 66 | for i in range(self.batch_size - len(batch_data)): 67 | assert len(batch_data) > 0 68 | batch_data.append(batch_data[0] * 0) 69 | indices.append(-1) 70 | 71 | self._current += self.batch_size 72 | return { 73 | 'data': np.array(batch_data), 74 | 'label': np.array(batch_label) if self.is_train else None, 75 | 'id': indices, 76 | } 77 | 78 | def _data_augmentation(self, data, label): 79 | if self.is_train and self._rand_samplers: 80 | rand_crops = [] 81 | for rs in self._rand_samplers: 82 | rand_crops += rs.sample(label) 83 | num_rand_crops = len(rand_crops) 84 | 85 | if num_rand_crops > 0: 86 | index = int(np.random.uniform(0, 1) * num_rand_crops) 87 | width = data.shape[1] 88 | height = data.shape[0] 89 | crop = rand_crops[index][0] 90 | xmin = int(crop[0] * width) 91 | ymin = int(crop[1] * height) 92 | xmax = int(crop[2] * width) 93 | ymax = int(crop[3] * height) 94 | if xmin >= 0 and ymin >= 0 and xmax <= width and ymax <= height: 95 | data = data[ymin:ymax, xmin:xmax, :] 96 | else: 97 | # padding mode 98 | new_width = xmax - xmin 99 | new_height = ymax - ymin 100 | offset_x = 0 - xmin 101 | offset_y = 0 - ymin 102 | data_bak = data 103 | data = np.full((new_height, new_width, 3), 128.) 104 | data[offset_y:offset_y+height, offset_x:offset_x + width, :] = data_bak 105 | label = rand_crops[index][1] 106 | 107 | if self.is_train and self._rand_flip: 108 | if np.random.uniform(0, 1) > 0.5: 109 | data = cv2.flip(data, 1) 110 | valid_mask = np.where(label[:, 0] > -1)[0] 111 | tmp = 1.0 - label[valid_mask, 1] 112 | label[valid_mask, 1] = 1.0 - label[valid_mask, 3] 113 | label[valid_mask, 3] = tmp 114 | 115 | if self.is_train: 116 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, \ 117 | cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] 118 | else: 119 | interp_methods = [cv2.INTER_LINEAR] 120 | interp_method = interp_methods[int(np.random.uniform(0, 1) * len(interp_methods))] 121 | data = resize(data, self._data_shape, interp_method) 122 | data = transform(data, self._mean_pixels) 123 | return data, label 124 | 125 | def meg_get_batch(self): 126 | batch = self._get_batch() 127 | return {'img': batch['data'], 'gt_boxes': batch['label'], 'imgID': batch['id']} -------------------------------------------------------------------------------- /evaluator/eval_voc.py: -------------------------------------------------------------------------------- 1 | """ 2 | given a pascal voc imdb, compute mAP 3 | """ 4 | 5 | import numpy as np 6 | import os 7 | import cPickle 8 | 9 | 10 | def parse_voc_rec(filename): 11 | """ 12 | parse pascal voc record into a dictionary 13 | :param filename: xml file path 14 | :return: list of dict 15 | """ 16 | import xml.etree.ElementTree as ET 17 | tree = ET.parse(filename) 18 | objects = [] 19 | for obj in tree.findall('object'): 20 | obj_dict = dict() 21 | obj_dict['name'] = obj.find('name').text 22 | obj_dict['difficult'] = int(obj.find('difficult').text) 23 | bbox = obj.find('bndbox') 24 | obj_dict['bbox'] = [int(bbox.find('xmin').text), 25 | int(bbox.find('ymin').text), 26 | int(bbox.find('xmax').text), 27 | int(bbox.find('ymax').text)] 28 | objects.append(obj_dict) 29 | return objects 30 | 31 | 32 | def voc_ap(rec, prec, use_07_metric=False): 33 | """ 34 | average precision calculations 35 | [precision integrated to recall] 36 | :param rec: recall 37 | :param prec: precision 38 | :param use_07_metric: 2007 metric is 11-recall-point based AP 39 | :return: average precision 40 | """ 41 | if use_07_metric: 42 | ap = 0. 43 | for t in np.arange(0., 1.1, 0.1): 44 | if np.sum(rec >= t) == 0: 45 | p = 0 46 | else: 47 | p = np.max(prec[rec >= t]) 48 | ap += p / 11. 49 | else: 50 | # append sentinel values at both ends 51 | mrec = np.concatenate([0.], rec, [1.]) 52 | mpre = np.concatenate([0.], prec, [0.]) 53 | 54 | # compute precision integration ladder 55 | for i in range(mpre.size - 1, 0, -1): 56 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 57 | 58 | # look for recall value changes 59 | i = np.where(mrec[1:] != mrec[:-1])[0] 60 | 61 | # sum (\delta recall) * prec 62 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 63 | return ap 64 | 65 | 66 | def voc_eval(detpath, annopath, imageset_file, classname, cache_dir, ovthresh=0.5, use_07_metric=False): 67 | """ 68 | pascal voc evaluation 69 | :param detpath: detection results detpath.format(classname) 70 | :param annopath: annotations annopath.format(classname) 71 | :param imageset_file: text file containing list of images 72 | :param classname: category name 73 | :param cache_dir: caching annotations 74 | :param ovthresh: overlap threshold 75 | :param use_07_metric: whether to use voc07's 11 point ap computation 76 | :return: rec, prec, ap 77 | """ 78 | if not os.path.isdir(cache_dir): 79 | os.mkdir(cache_dir) 80 | cache_file = os.path.join(cache_dir, 'annotations.pkl') 81 | with open(imageset_file, 'r') as f: 82 | lines = f.readlines() 83 | image_filenames = [x.strip() for x in lines] 84 | 85 | # load annotations from cache 86 | if not os.path.isfile(cache_file): 87 | recs = {} 88 | for ind, image_filename in enumerate(image_filenames): 89 | recs[image_filename] = parse_voc_rec(annopath.format(image_filename)) 90 | if ind % 100 == 0: 91 | print('reading annotations for {:d}/{:d}'.format(ind + 1, len(image_filenames))) 92 | print('saving annotations cache to {:s}'.format(cache_file)) 93 | with open(cache_file, 'w') as f: 94 | cPickle.dump(recs, f) 95 | else: 96 | with open(cache_file, 'r') as f: 97 | recs = cPickle.load(f) 98 | 99 | # extract objects in :param classname: 100 | class_recs = {} 101 | npos = 0 102 | for image_filename in image_filenames: 103 | objects = [obj for obj in recs[image_filename] if obj['name'] == classname] 104 | bbox = np.array([x['bbox'] for x in objects]) 105 | difficult = np.array([x['difficult'] for x in objects]).astype(np.bool) 106 | det = [False] * len(objects) # stand for detected 107 | npos = npos + sum(~difficult) 108 | class_recs[image_filename] = {'bbox': bbox, 109 | 'difficult': difficult, 110 | 'det': det} 111 | 112 | # read detections 113 | detfile = detpath.format(classname) 114 | with open(detfile, 'r') as f: 115 | lines = f.readlines() 116 | 117 | splitlines = [x.strip().split(' ') for x in lines] 118 | image_ids = [x[0] for x in splitlines] 119 | confidence = np.array([float(x[1]) for x in splitlines]) 120 | bbox = np.array([[float(z) for z in x[2:]] for x in splitlines]) 121 | 122 | # sort by confidence 123 | sorted_inds = np.argsort(-confidence) 124 | sorted_scores = np.sort(-confidence) 125 | bbox = bbox[sorted_inds, :] 126 | image_ids = [image_ids[x] for x in sorted_inds] 127 | 128 | # go down detections and mark true positives and false positives 129 | nd = len(image_ids) 130 | tp = np.zeros(nd) 131 | fp = np.zeros(nd) 132 | for d in range(nd): 133 | r = class_recs[image_ids[d]] 134 | bb = bbox[d, :].astype(float) 135 | ovmax = -np.inf 136 | bbgt = r['bbox'].astype(float) 137 | 138 | if bbgt.size > 0: 139 | # compute overlaps 140 | # intersection 141 | ixmin = np.maximum(bbgt[:, 0], bb[0]) 142 | iymin = np.maximum(bbgt[:, 1], bb[1]) 143 | ixmax = np.minimum(bbgt[:, 2], bb[2]) 144 | iymax = np.minimum(bbgt[:, 3], bb[3]) 145 | iw = np.maximum(ixmax - ixmin + 1., 0.) 146 | ih = np.maximum(iymax - iymin + 1., 0.) 147 | inters = iw * ih 148 | 149 | # union 150 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 151 | (bbgt[:, 2] - bbgt[:, 0] + 1.) * 152 | (bbgt[:, 3] - bbgt[:, 1] + 1.) - inters) 153 | 154 | overlaps = inters / uni 155 | ovmax = np.max(overlaps) 156 | jmax = np.argmax(overlaps) 157 | 158 | if ovmax > ovthresh: 159 | if not r['difficult'][jmax]: 160 | if not r['det'][jmax]: 161 | tp[d] = 1. 162 | r['det'][jmax] = 1 163 | else: 164 | fp[d] = 1. 165 | else: 166 | fp[d] = 1. 167 | 168 | # compute precision recall 169 | fp = np.cumsum(fp) 170 | tp = np.cumsum(tp) 171 | rec = tp / float(npos) 172 | # avoid division by zero in case first detection matches a difficult ground ruth 173 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 174 | ap = voc_ap(rec, prec, use_07_metric) 175 | 176 | return rec, prec, ap 177 | -------------------------------------------------------------------------------- /dataset/pascal_voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from .imdb import Imdb 4 | import xml.etree.ElementTree as ET 5 | from evaluator.eval_voc import voc_eval 6 | import cv2 7 | 8 | class PascalVoc(Imdb): 9 | """ 10 | Implementation of Imdb for Pascal VOC datasets 11 | 12 | Parameters: 13 | ---------- 14 | image_set : str 15 | set to be used, can be train, val, trainval, test 16 | year : str 17 | year of dataset, can be 2007, 2010, 2012... 18 | devkit_path : str 19 | devkit path of VOC dataset 20 | shuffle : boolean 21 | whether to initial shuffle the image list 22 | is_train : boolean 23 | if true, will load annotations 24 | """ 25 | def __init__(self, image_set, year, devkit_path, shuffle=False, is_train=False): 26 | super(PascalVoc, self).__init__('voc_' + year + '_' + image_set) 27 | self.image_set = image_set 28 | self.year = year 29 | self.devkit_path = devkit_path 30 | self.data_path = os.path.join(devkit_path, 'VOC' + year) 31 | self.extension = '.jpg' 32 | self.is_train = is_train 33 | 34 | self.classes = ['aeroplane', 'bicycle', 'bird', 'boat', 35 | 'bottle', 'bus', 'car', 'cat', 'chair', 36 | 'cow', 'diningtable', 'dog', 'horse', 37 | 'motorbike', 'person', 'pottedplant', 38 | 'sheep', 'sofa', 'train', 'tvmonitor'] 39 | 40 | self.config = {'use_difficult': True, 41 | 'comp_id': 'comp4', 42 | 'padding': 56} 43 | 44 | self.num_classes = len(self.classes) 45 | self.image_set_index = self._load_image_set_index(shuffle) 46 | self.num_images = len(self.image_set_index) 47 | if self.is_train: 48 | self.labels = self._load_image_labels() 49 | 50 | @property 51 | def cache_path(self): 52 | """ 53 | make a directory to store all caches 54 | 55 | Returns: 56 | --------- 57 | cache path 58 | """ 59 | cache_path = os.path.join(os.path.dirname(__file__), '..', 'cache') 60 | if not os.path.exists(cache_path): 61 | os.mkdir(cache_path) 62 | return cache_path 63 | 64 | def _load_image_set_index(self, shuffle): 65 | """ 66 | find out which indexes correspond to given image set (train or val) 67 | 68 | Parameters: 69 | ---------- 70 | shuffle : boolean 71 | whether to shuffle the image list 72 | Returns: 73 | ---------- 74 | entire list of images specified in the setting 75 | """ 76 | image_set_index_file = os.path.join(self.data_path, 'ImageSets', 'Main', self.image_set + '.txt') 77 | assert os.path.exists(image_set_index_file), 'Path does not exist: {}'.format(image_set_index_file) 78 | with open(image_set_index_file) as f: 79 | image_set_index = [x.strip() for x in f.readlines()] 80 | if shuffle: 81 | np.random.shuffle(image_set_index) 82 | return image_set_index 83 | 84 | def image_path_from_index(self, index): 85 | """ 86 | given image index, find out full path 87 | 88 | Parameters: 89 | ---------- 90 | index: int 91 | index of a specific image 92 | Returns: 93 | ---------- 94 | full path of this image 95 | """ 96 | assert self.image_set_index is not None, "Dataset not initialized" 97 | name = self.image_set_index[index] 98 | image_file = os.path.join(self.data_path, 'JPEGImages', name + self.extension) 99 | assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file) 100 | return image_file 101 | 102 | def label_from_index(self, index): 103 | """ 104 | given image index, return preprocessed ground-truth 105 | 106 | Parameters: 107 | ---------- 108 | index: int 109 | index of a specific image 110 | Returns: 111 | ---------- 112 | ground-truths of this image 113 | """ 114 | assert self.labels is not None, "Labels not processed" 115 | return self.labels[index, :, :] 116 | 117 | def _label_path_from_index(self, index): 118 | """ 119 | given image index, find out annotation path 120 | 121 | Parameters: 122 | ---------- 123 | index: int 124 | index of a specific image 125 | 126 | Returns: 127 | ---------- 128 | full path of annotation file 129 | """ 130 | label_file = os.path.join(self.data_path, 'Annotations', index + '.xml') 131 | assert os.path.exists(label_file), 'Path does not exist: {}'.format(image_file) 132 | return label_file 133 | 134 | def _load_image_labels(self): 135 | """ 136 | preprocess all ground-truths 137 | 138 | Returns: 139 | ---------- 140 | labels packed in [num_images x max_num_objects x 5] tensor 141 | """ 142 | temp = [] 143 | max_objects = 0 144 | 145 | # load ground-truth from xml annotations 146 | for idx in self.image_set_index: 147 | label_file = self._label_path_from_index(idx) 148 | tree = ET.parse(label_file) 149 | root = tree.getroot() 150 | size = root.find('size') 151 | width = float(size.find('width').text) 152 | height = float(size.find('height').text) 153 | label = [] 154 | 155 | for obj in root.iter('object'): 156 | difficult = int(obj.find('difficult').text) 157 | if not self.config['use_difficult'] and difficult == 1: 158 | continue 159 | cls_name = obj.find('name').text 160 | if cls_name not in self.classes: 161 | continue 162 | cls_id = self.classes.index(cls_name) 163 | xml_box = obj.find('bndbox') 164 | xmin = float(xml_box.find('xmin').text) / width 165 | ymin = float(xml_box.find('ymin').text) / height 166 | xmax = float(xml_box.find('xmax').text) / width 167 | ymax = float(xml_box.find('ymax').text) / height 168 | label.append([cls_id, xmin, ymin, xmax, ymax]) 169 | temp.append(np.array(label)) 170 | max_objects = max(max_objects, len(label)) 171 | 172 | # add padding to labels so that the dimensions match in each batch 173 | # TODO: design a better way to handle label padding 174 | assert max_objects > 0, "No objects found for any of the images" 175 | assert max_objects <= self.config['padding'], "# obj exceed padding" 176 | self.padding = self.config['padding'] 177 | labels = [] 178 | for label in temp: 179 | label = np.lib.pad(label, ((0, self.padding-label.shape[0]), (0,0)), \ 180 | 'constant', constant_values=(-1, -1)) 181 | labels.append(label) 182 | 183 | return np.array(labels) 184 | 185 | def evaluate_detections(self, detections): 186 | """ 187 | top level evaluations 188 | Parameters: 189 | ---------- 190 | detections: list 191 | result list, each entry is a matrix of detections 192 | Returns: 193 | ---------- 194 | None 195 | """ 196 | # make all these folders for results 197 | result_dir = os.path.join(self.devkit_path, 'results') 198 | if not os.path.exists(result_dir): 199 | os.mkdir(result_dir) 200 | year_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year) 201 | if not os.path.exists(year_folder): 202 | os.mkdir(year_folder) 203 | res_file_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year, 'Main') 204 | if not os.path.exists(res_file_folder): 205 | os.mkdir(res_file_folder) 206 | 207 | self.write_pascal_results(detections) 208 | self.do_python_eval() 209 | 210 | def get_result_file_template(self): 211 | """ 212 | this is a template 213 | VOCdevkit/results/VOC2007/Main/_det_test_aeroplane.txt 214 | 215 | Returns: 216 | ---------- 217 | a string template 218 | """ 219 | res_file_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year, 'Main') 220 | comp_id = self.config['comp_id'] 221 | filename = comp_id + '_det_' + self.image_set + '_{:s}.txt' 222 | path = os.path.join(res_file_folder, filename) 223 | return path 224 | 225 | def write_pascal_results(self, all_boxes): 226 | """ 227 | write results files in pascal devkit path 228 | Parameters: 229 | ---------- 230 | all_boxes: list 231 | boxes to be processed [bbox, confidence] 232 | Returns: 233 | ---------- 234 | None 235 | """ 236 | for cls_ind, cls in enumerate(self.classes): 237 | print('Writing {} VOC results file'.format(cls)) 238 | filename = self.get_result_file_template().format(cls) 239 | with open(filename, 'wt') as f: 240 | for im_ind, index in enumerate(self.image_set_index): 241 | dets = all_boxes[im_ind] 242 | if dets.shape[0] < 1: 243 | continue 244 | h, w = self._get_imsize(self.image_path_from_index(im_ind)) 245 | # the VOCdevkit expects 1-based indices 246 | for k in range(dets.shape[0]): 247 | if (int(dets[k, 0]) == cls_ind): 248 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. 249 | format(index, dets[k, 1], 250 | int(dets[k, 2] * w) + 1, int(dets[k, 3] * h) + 1, 251 | int(dets[k, 4] * w) + 1, int(dets[k, 5] * h) + 1)) 252 | 253 | def do_python_eval(self): 254 | """ 255 | python evaluation wrapper 256 | 257 | Returns: 258 | ---------- 259 | None 260 | """ 261 | annopath = os.path.join(self.data_path, 'Annotations', '{:s}.xml') 262 | imageset_file = os.path.join(self.data_path, 'ImageSets', 'Main', self.image_set + '.txt') 263 | cache_dir = os.path.join(self.cache_path, self.name) 264 | aps = [] 265 | # The PASCAL VOC metric changed in 2010 266 | use_07_metric = True if int(self.year) < 2010 else False 267 | print('VOC07 metric? ' + ('Y' if use_07_metric else 'No')) 268 | for cls_ind, cls in enumerate(self.classes): 269 | filename = self.get_result_file_template().format(cls) 270 | rec, prec, ap = voc_eval(filename, annopath, imageset_file, cls, cache_dir, 271 | ovthresh=0.5, use_07_metric=use_07_metric) 272 | aps += [ap] 273 | print('AP for {} = {:.4f}'.format(cls, ap)) 274 | print('Mean AP = {:.4f}'.format(np.mean(aps))) 275 | 276 | def _get_imsize(self, im_name): 277 | """ 278 | get image size info 279 | Returns: 280 | ---------- 281 | tuple of (height, width) 282 | """ 283 | img = cv2.imread(im_name) 284 | return (img.shape[0], img.shape[1]) 285 | -------------------------------------------------------------------------------- /utils/rand_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | class RandSampler(object): 5 | """ 6 | Random sampler base class, used for data augmentation 7 | 8 | Parameters: 9 | ---------- 10 | max_trials : int 11 | maximum trials, if exceed this number, give up anyway 12 | max_sample : int 13 | maximum random crop samples to be generated 14 | """ 15 | def __init__(self, max_trials, max_sample): 16 | assert max_trials > 0 17 | self.max_trials = int(max_trials) 18 | assert max_sample >= 0 19 | self.max_sample = int(max_sample) 20 | 21 | def sample(self, label): 22 | """ 23 | Interface for calling sampling function 24 | 25 | Parameters: 26 | ---------- 27 | label : numpy.array (n x 5 matrix) 28 | ground-truths 29 | 30 | Returns: 31 | ---------- 32 | list of (crop_box, label) tuples, if failed, return empty list [] 33 | """ 34 | return NotImplementedError 35 | 36 | 37 | class RandCropper(RandSampler): 38 | """ 39 | Random cropping original images with various settings 40 | 41 | Parameters: 42 | ---------- 43 | min_scale : float 44 | minimum crop scale, (0, 1] 45 | max_scale : float 46 | maximum crop scale, (0, 1], must larger than min_scale 47 | min_aspect_ratio : float 48 | minimum crop aspect ratio, (0, 1] 49 | max_aspect_ratio : float 50 | maximum crop aspect ratio, [1, inf) 51 | min_overlap : float 52 | hreshold of minimum overlap between a rand crop and any gt 53 | max_trials : int 54 | maximum trials, if exceed this number, give up anyway 55 | max_sample : int 56 | maximum random crop samples to be generated 57 | """ 58 | def __init__(self, min_scale=1., max_scale=1., 59 | min_aspect_ratio=1., max_aspect_ratio=1., 60 | min_overlap=0., max_trials=50, max_sample=1): 61 | super(RandCropper, self).__init__(max_trials, max_sample) 62 | assert min_scale <= max_scale, "min_scale must <= max_scale" 63 | assert 0 < min_scale and min_scale <= 1, "min_scale must in (0, 1]" 64 | assert 0 < max_scale and max_scale <= 1, "max_scale must in (0, 1]" 65 | self.min_scale = min_scale 66 | self.max_scale = max_scale 67 | assert 0 < min_aspect_ratio and min_aspect_ratio <= 1, "min_ratio must in (0, 1]" 68 | assert 1 <= max_aspect_ratio , "max_ratio must >= 1" 69 | self.min_aspect_ratio = min_aspect_ratio 70 | self.max_aspect_ratio = max_aspect_ratio 71 | assert 0 <= min_overlap and min_overlap <= 1, "min_overlap must in [0,1]" 72 | self.min_overlap = min_overlap 73 | 74 | self.config = {'gt_constraint' : 'center'} 75 | 76 | def sample(self, label): 77 | """ 78 | generate random cropping boxes according to parameters 79 | if satifactory crops generated, apply to ground-truth as well 80 | 81 | Parameters: 82 | ---------- 83 | label : numpy.array (n x 5 matrix) 84 | ground-truths 85 | 86 | Returns: 87 | ---------- 88 | list of (crop_box, label) tuples, if failed, return empty list [] 89 | """ 90 | samples = [] 91 | count = 0 92 | for trial in range(self.max_trials): 93 | if count >= self.max_sample: 94 | return samples 95 | scale = np.random.uniform(self.min_scale, self.max_scale) 96 | min_ratio = max(self.min_aspect_ratio, scale * scale) 97 | max_ratio = min(self.max_aspect_ratio, 1. / scale / scale) 98 | ratio = math.sqrt( np.random.uniform(min_ratio, max_ratio) ) 99 | width = scale * ratio 100 | height = scale / ratio 101 | left = np.random.uniform(0., 1 - width) 102 | top = np.random.uniform(0., 1 - height) 103 | rand_box = (left, top, left + width, top + height) 104 | valid_mask = np.where(label[:, 0] > -1)[0] 105 | gt = label[valid_mask, :] 106 | ious = self._check_satisfy(rand_box, gt) 107 | if ious is not None: 108 | # transform gt labels after crop, discard bad ones 109 | l, t, r, b = rand_box 110 | new_gt_boxes = [] 111 | new_width = r - l 112 | new_height = b - t 113 | for i in range(valid_mask.size): 114 | if ious[i] > 0: 115 | xmin = max(0., (gt[i, 1] - l) / new_width) 116 | ymin = max(0., (gt[i, 2] - t) / new_height) 117 | xmax = min(1., (gt[i, 3] - l) / new_width) 118 | ymax = min(1., (gt[i, 4] - t) / new_height) 119 | new_gt_boxes.append([gt[i, 0], xmin, ymin, xmax, ymax]) 120 | if not new_gt_boxes: 121 | continue 122 | new_gt_boxes = np.array(new_gt_boxes) 123 | label = np.lib.pad(new_gt_boxes, 124 | ((0, label.shape[0]-new_gt_boxes.shape[0]), (0,0)), \ 125 | 'constant', constant_values=(-1, -1)) 126 | samples.append((rand_box, label)) 127 | count += 1 128 | return samples 129 | 130 | def _check_satisfy(self, rand_box, gt_boxes): 131 | """ 132 | check if overlap with any gt box is larger than threshold 133 | """ 134 | l, t, r, b = rand_box 135 | num_gt = gt_boxes.shape[0] 136 | ls = np.ones(num_gt) * l 137 | ts = np.ones(num_gt) * t 138 | rs = np.ones(num_gt) * r 139 | bs = np.ones(num_gt) * b 140 | mask = np.where(ls < gt_boxes[:, 1])[0] 141 | ls[mask] = gt_boxes[mask, 1] 142 | mask = np.where(ts < gt_boxes[:, 2])[0] 143 | ts[mask] = gt_boxes[mask, 2] 144 | mask = np.where(rs > gt_boxes[:, 3])[0] 145 | rs[mask] = gt_boxes[mask, 3] 146 | mask = np.where(bs > gt_boxes[:, 4])[0] 147 | bs[mask] = gt_boxes[mask, 4] 148 | w = rs - ls 149 | w[w < 0] = 0 150 | h = bs - ts 151 | h[h < 0] = 0 152 | inter_area = h * w 153 | union_area = np.ones(num_gt) * max(0, r - l) * max(0, b - t) 154 | union_area += (gt_boxes[:, 3] - gt_boxes[:, 1]) * (gt_boxes[:, 4] - gt_boxes[:, 2]) 155 | union_area -= inter_area 156 | ious = inter_area / union_area 157 | ious[union_area <= 0] = 0 158 | max_iou = np.amax(ious) 159 | if max_iou < self.min_overlap: 160 | return None 161 | # check ground-truth constraint 162 | if self.config['gt_constraint'] == 'center': 163 | for i in range(ious.shape[0]): 164 | if ious[i] > 0: 165 | gt_x = (gt_boxes[i, 1] + gt_boxes[i, 3]) / 2.0 166 | gt_y = (gt_boxes[i, 2] + gt_boxes[i, 4]) / 2.0 167 | if gt_x < l or gt_x > r or gt_y < t or gt_y > b: 168 | return None 169 | elif self.config['gt_constraint'] == 'corner': 170 | for i in range(ious.shape[0]): 171 | if ious[i] > 0: 172 | if gt_boxes[i, 1] < l or gt_boxes[i, 3] > r \ 173 | or gt_boxes[i, 2] < t or gt_boxes[i, 4] > b: 174 | return None 175 | return ious 176 | 177 | class RandPadder(RandSampler): 178 | """ 179 | Random cropping original images with various settings 180 | 181 | Parameters: 182 | ---------- 183 | min_scale : float 184 | minimum crop scale, [1, inf) 185 | max_scale : float 186 | maximum crop scale, [1, inf), must larger than min_scale 187 | min_aspect_ratio : float 188 | minimum crop aspect ratio, (0, 1] 189 | max_aspect_ratio : float 190 | maximum crop aspect ratio, [1, inf) 191 | min_gt_scale : float 192 | minimum ground-truth scale to be satisfied after padding, 193 | either width or height, [0, 1] 194 | max_trials : int 195 | maximum trials, if exceed this number, give up anyway 196 | max_sample : int 197 | maximum random crop samples to be generated 198 | """ 199 | def __init__(self, min_scale=1., max_scale=1., min_aspect_ratio=1., \ 200 | max_aspect_ratio=1., min_gt_scale=.01, max_trials=50, 201 | max_sample=1): 202 | super(RandPadder, self).__init__(max_trials, max_sample) 203 | assert min_scale <= max_scale, "min_scale must <= max_scale" 204 | assert min_scale >= 1, "min_scale must in (0, 1]" 205 | self.min_scale = min_scale 206 | self.max_scale = max_scale 207 | assert 0 < min_aspect_ratio and min_aspect_ratio <= 1, "min_ratio must in (0, 1]" 208 | assert 1 <= max_aspect_ratio , "max_ratio must >= 1" 209 | self.min_aspect_ratio = min_aspect_ratio 210 | self.max_aspect_ratio = max_aspect_ratio 211 | assert 0 <= min_gt_scale and min_gt_scale <= 1, "min_gt_scale must in [0, 1]" 212 | self.min_gt_scale = min_gt_scale 213 | 214 | def sample(self, label): 215 | """ 216 | generate random padding boxes according to parameters 217 | if satifactory padding generated, apply to ground-truth as well 218 | 219 | Parameters: 220 | ---------- 221 | label : numpy.array (n x 5 matrix) 222 | ground-truths 223 | 224 | Returns: 225 | ---------- 226 | list of (crop_box, label) tuples, if failed, return empty list [] 227 | """ 228 | samples = [] 229 | count = 0 230 | for trial in range(self.max_trials): 231 | if count >= self.max_sample: 232 | return samples 233 | scale = np.random.uniform(self.min_scale, self.max_scale) 234 | min_ratio = max(self.min_aspect_ratio, scale * scale) 235 | max_ratio = min(self.max_aspect_ratio, 1. / scale / scale) 236 | ratio = math.sqrt(np.random.uniform(min_ratio, max_ratio)) 237 | width = scale * ratio 238 | if width < 1: 239 | continue 240 | height = scale / ratio 241 | if height < 1: 242 | continue 243 | left = np.random.uniform(0., 1 - width) 244 | top = np.random.uniform(0., 1 - height) 245 | right = left + width 246 | bot = top + height 247 | rand_box = (left, top, right, bot) 248 | valid_mask = np.where(label[:, 0] > -1)[0] 249 | gt = label[valid_mask, :] 250 | new_gt_boxes = [] 251 | for i in range(gt.shape[0]): 252 | xmin = (gt[i, 1] - left) / width 253 | ymin = (gt[i, 2] - top) / height 254 | xmax = (gt[i, 3] - left) / width 255 | ymax = (gt[i, 4] - top) / height 256 | new_size = min(xmax - xmin, ymax - ymin) 257 | if new_size < self.min_gt_scale: 258 | new_gt_boxes = [] 259 | break 260 | new_gt_boxes.append([gt[i, 0], xmin, ymin, xmax, ymax]) 261 | if not new_gt_boxes: 262 | continue 263 | new_gt_boxes = np.array(new_gt_boxes) 264 | label = np.lib.pad(new_gt_boxes, 265 | ((0, label.shape[0]-new_gt_boxes.shape[0]), (0,0)), \ 266 | 'constant', constant_values=(-1, -1)) 267 | samples.append((rand_box, label)) 268 | count += 1 269 | return samples 270 | --------------------------------------------------------------------------------