├── __init__.py
├── helper
├── __init__.py
└── voc_provider.py
├── utils
├── __init__.py
├── image_processing.py
├── vis_det.py
└── rand_sampler.py
├── dataset
├── __init__.py
├── imdb.py
└── pascal_voc.py
├── evaluator
├── __init__.py
└── eval_voc.py
├── dataprovider
├── __init__.py
└── det.py
├── .gitignore
├── README.md
└── .idea
└── vcs.xml
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/helper/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dataprovider/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.idea
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | a package for object detection with deep neural network
2 |
3 | # detetection_utils
4 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/dataset/imdb.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class Imdb(object):
4 | def __init__(self, name):
5 | self.name = name
6 | self.classes = []
7 | self.num_classes = 0
8 | self.image_set_index = []
9 | self.num_images = 0
10 | self.labels = None
11 | self.padding = 0
12 |
13 | def image_path_from_index(self, index):
14 | """
15 | load image full path given specified index
16 |
17 | Parameters:
18 | ----------
19 | index : int
20 | index of image requested in dataset
21 |
22 | Returns:
23 | ----------
24 | full path of specified image
25 | """
26 | raise NotImplementedError
27 |
28 | def label_from_index(self, index):
29 | """
30 | load ground-truth of image given specified index
31 |
32 | Parameters:
33 | ----------
34 | index : int
35 | index of image requested in dataset
36 |
37 | Returns:
38 | ----------
39 | object ground-truths, in format
40 | numpy.array([id, xmin, ymin, xmax, ymax]...)
41 | """
42 | raise NotImplementedError
43 |
--------------------------------------------------------------------------------
/helper/voc_provider.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from utils.rand_sampler import RandCropper, RandPadder
4 | from dataprovider.det import DetIter
5 | from dataset.pascal_voc import PascalVoc
6 |
7 | train_cfg = {
8 | "root_dir": os.path.join(os.path.dirname(__file__), '..'),
9 | "random_samplers":
10 | [
11 | RandCropper(min_scale=1., max_trials=1, max_sample=1),
12 | RandCropper(min_scale=.3, min_aspect_ratio=.5, max_aspect_ratio=2., min_overlap=.1),
13 | RandCropper(min_scale=.3, min_aspect_ratio=.5, max_aspect_ratio=2., min_overlap=.3),
14 | RandCropper(min_scale=.3, min_aspect_ratio=.5, max_aspect_ratio=2., min_overlap=.5),
15 | RandCropper(min_scale=.3, min_aspect_ratio=.5, max_aspect_ratio=2., min_overlap=.7),
16 | RandPadder(max_scale=2., min_aspect_ratio=.5, max_aspect_ratio=2., min_gt_scale=.05),
17 | RandPadder(max_scale=3., min_aspect_ratio=.5, max_aspect_ratio=2., min_gt_scale=.05),
18 | RandPadder(max_scale=4., min_aspect_ratio=.5, max_aspect_ratio=2., min_gt_scale=.05)
19 | ],
20 | "random_flip": True,
21 | "shuffle": True,
22 | "random_seed": None
23 | }
24 |
25 | # # validation
26 | # cfg.VALID = edict()
27 | # cfg.VALID.RAND_SAMPLERS = []
28 | # cfg.VALID.RAND_MIRROR = False
29 | # cfg.VALID.INIT_SHUFFLE = False
30 | # cfg.VALID.EPOCH_SHUFFLE = False
31 | # cfg.VALID.RAND_SEED = None
32 |
33 | ssd_prov = DetIter(
34 | imdb=PascalVoc("trainval", "2007", "/unsullied/sharefs/yugang/Dataset/VOC", is_train=True),
35 | batch_size=32,
36 | data_shape=(300, 300),
37 | mean_pixels=[104, 117, 123],
38 | rand_samplers=train_cfg['random_samplers'],
39 | rand_flip=train_cfg['random_flip'],
40 | shuffle=train_cfg['shuffle'],
41 | rand_seed=train_cfg['random_seed']
42 | )
--------------------------------------------------------------------------------
/utils/image_processing.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | def rescale(im, target_size, max_size):
5 | """
6 | only resize input image to target size and return scale
7 |
8 | Parameters:
9 | ----------
10 | im : numpy.array
11 | BGR image input by opencv
12 | target_size: int
13 | one dimensional size (the short side)
14 | max_size: int
15 | one dimensional max size (the long side)
16 |
17 | Returns:
18 | ----------
19 | numpy.array, rescaled image
20 | """
21 | im_shape = im.shape
22 | im_size_min = np.min(im_shape[0:2])
23 | im_size_max = np.min(im_shape[0:2])
24 | im_scale = float(target_size) / float(im_size_min)
25 | # prevent bigger axis from being more than max_size:
26 | if np.round(im_scale * im_size_max) > max_size:
27 | im_scale = float(max_size) / float(im_size_max)
28 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
29 | return im, im_scale
30 |
31 | def resize(im, target_size, interp_method=cv2.INTER_LINEAR):
32 | """
33 | resize image to target size regardless of aspect ratio
34 |
35 | Parameters:
36 | ----------
37 | im : numpy.array
38 | BGR image input by opencv
39 | target_size : tuple (int, int)
40 | (h, w) two dimensional size
41 | Returns:
42 | ----------
43 | numpy.array, resized image
44 | """
45 | return cv2.resize(im, target_size, interpolation=interp_method)
46 |
47 | def transform(im, pixel_means):
48 | """
49 | transform into mxnet tensor
50 | substract pixel size and transform to correct format
51 |
52 | Parameters:
53 | ----------
54 | im : numpy.array
55 | [height, width, channel] in BGR
56 | pixel_means : list
57 | [[[R, G, B pixel means]]]
58 |
59 | Returns:
60 | ----------
61 | numpy.array as in shape [channel, height, width]
62 | """
63 | im = im.copy()
64 | im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)]
65 | im = im.astype(float)
66 | im -= pixel_means
67 | # put channel first
68 | channel_swap = (2, 0, 1)
69 | im_tensor = im.transpose(channel_swap)
70 | return im_tensor
71 |
72 |
73 | def transform_inverse(im_tensor, pixel_means):
74 | """
75 | transform from mxnet im_tensor to ordinary RGB image
76 | im_tensor is limited to one image
77 |
78 | Parameters:
79 | ----------
80 | im_tensor : numpy.array
81 | in shape [batch, channel, height, width]
82 | pixel_means: list
83 | [[[R, G, B pixel means]]]
84 |
85 | Returns:
86 | ----------
87 | im [height, width, channel(RGB)]
88 | """
89 | assert im_tensor.shape[0] == 1
90 | im_tensor = im_tensor.copy()
91 | # put channel back
92 | channel_swap = (0, 2, 3, 1)
93 | im_tensor = im_tensor.transpose(channel_swap)
94 | im = im_tensor[0]
95 | assert im.shape[2] == 3
96 | im += pixel_means
97 | im = im.astype(np.uint8)
98 | return im
99 |
--------------------------------------------------------------------------------
/utils/vis_det.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """
3 | @author: zeming li
4 | @contact: zengarden2009@gmail.com
5 | @file: vis_det.py
6 | """
7 | import matplotlib.pyplot as plt
8 | import random
9 | import numpy as np
10 | import cv2
11 |
12 |
13 | def visualize_detection(img, dets, is_show_label=True, classes=None, thresh=0.5):
14 | """
15 | visualize detections in one image
16 |
17 | Parameters:
18 | ----------
19 | img : numpy.array image, in bgr format
20 | dets : numpy.array ssd detections, numpy.array([[x1, y1, x2, y2, cls_id, score]...])
21 | classes : tuple or list of str class names
22 | thresh : float, score threshold
23 | """
24 | plt.imshow(img)
25 | colors = dict()
26 | for det in dets:
27 | bb = det[:4].astype(int)
28 |
29 | if is_show_label:
30 | cls_id = int(det[4])
31 | score = det[5]
32 | if cls_id == 0:
33 | continue
34 | if score > thresh:
35 | if cls_id not in colors:
36 | colors[cls_id] = (random.random(), random.random(), random.random())
37 | rect = plt.Rectangle((bb[0], bb[1]), bb[2] - bb[0],
38 | bb[3] - bb[1], fill=False,
39 | edgecolor=colors[cls_id],
40 | linewidth=3.5)
41 | plt.gca().add_patch(rect)
42 | if classes and len(classes) > cls_id:
43 | cls_name = classes[cls_id]
44 | else:
45 | cls_name = str(cls_id)
46 | plt.gca().text(bb[0], bb[1] - 2,
47 | '{:s} {:.3f}'.format(cls_name, score),
48 | bbox=dict(facecolor=colors[cls_id], alpha=0.5),
49 | fontsize=12, color='white')
50 | else:
51 | rect = plt.Rectangle((bb[0], bb[1]), bb[2] - bb[0],
52 | bb[3] - bb[1], fill=False,
53 | edgecolor=(1, 0, 0),
54 | linewidth=3.5)
55 | plt.gca().add_patch(rect)
56 | plt.show()
57 |
58 |
59 | # visualize_old: use opencv api
60 | def visualize_detection_old(img, dets, is_show_label=True, classes=None, thresh=0.5, name='detection'):
61 | """
62 | visualize detections in one image
63 |
64 | Parameters:
65 | ----------
66 | img : numpy.array image, in bgr format
67 | dets : numpy.array ssd detections, numpy.array([[x1, y1, x2, y2, cls_id, score]...])
68 | classes : tuple or list of str class names
69 | thresh : float, score threshold
70 | """
71 | im = np.array(img)
72 | colors = dict()
73 | font = cv2.FONT_HERSHEY_SIMPLEX
74 |
75 | for det in dets:
76 | bb = det[:4].astype(int)
77 | if is_show_label:
78 | cls_id = int(det[4])
79 | score = det[5]
80 |
81 | if cls_id == 0:
82 | continue
83 | if score > thresh:
84 | if cls_id not in colors:
85 | colors[cls_id] = (random.random() * 255, random.random() * 255, random.random() * 255)
86 |
87 | cv2.rectangle(im, (bb[0], bb[1]), (bb[2], bb[3]), colors[cls_id], 3)
88 |
89 | if classes and len(classes) > cls_id:
90 | cls_name = classes[cls_id]
91 | else:
92 | cls_name = str(cls_id)
93 | cv2.putText(im, '{:s} {:.3f}'.format(cls_name, score), (bb[0], bb[1] - 2), \
94 | font, 0.5, colors[cls_id], 1)
95 | else:
96 | cv2.rectangle(im, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 2)
97 |
98 | cv2.imshow(name, im)
99 | while True:
100 | c = cv2.waitKey(100000)
101 | if c == ord('d'):
102 | return
103 | elif c == ord('n'):
104 |
105 |
--------------------------------------------------------------------------------
/dataprovider/det.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from utils.image_processing import resize, transform
4 | from utils.rand_sampler import RandSampler
5 |
6 | class DetIter(object):
7 | """
8 | DetIter()
9 | """
10 | def __init__(self, imdb, batch_size, data_shape, mean_pixels=[128, 128, 128],
11 | rand_samplers=[], rand_flip=False, shuffle=False, rand_seed=None):
12 |
13 | self._imdb = imdb
14 | self.batch_size = batch_size
15 | if isinstance(data_shape, int):
16 | data_shape = (data_shape, data_shape)
17 | self._data_shape = data_shape
18 | self._mean_pixels = mean_pixels
19 | self.is_train = self._imdb.is_train
20 |
21 | # image shuffle
22 | if rand_seed:
23 | np.random.seed(rand_seed)
24 | self._shuffle = shuffle
25 | self._size = self._imdb.num_images
26 | self._current = 0
27 | self._fake_index = np.arange(self._size)
28 | if self._shuffle:
29 | np.random.shuffle(self._fake_index)
30 |
31 | # augmentation
32 | self._rand_flip = rand_flip
33 | if not rand_samplers:
34 | self._rand_samplers = []
35 | else:
36 | if not isinstance(rand_samplers, list):
37 | rand_samplers = [rand_samplers]
38 | assert isinstance(rand_samplers[0], RandSampler), "Invalid rand sampler"
39 | self._rand_samplers = rand_samplers
40 |
41 | def _get_batch(self):
42 | batch_data = []
43 | batch_label = []
44 | indices = []
45 | for i in range(self.batch_size):
46 | if (self._current + i) >= self._size:
47 | if not self.is_train:
48 | continue
49 | # use padding from middle in each epoch
50 | idx = (self._current + i + self._size / 2) % self._size
51 | fake_index = self._fake_index[idx]
52 | else:
53 | fake_index = self._fake_index[self._current + i]
54 |
55 | im_path = self._imdb.image_path_from_index(fake_index)
56 | img_id = self._imdb.image_set_index[fake_index]
57 | img = cv2.imread(im_path)
58 | gt = self._imdb.label_from_index(fake_index).copy() if self.is_train else None
59 | data, label = self._data_augmentation(img, gt)
60 | batch_data.append(data)
61 | if self.is_train:
62 | batch_label.append(label)
63 | indices.append(img_id)
64 |
65 | # pad data if not fully occupied
66 | for i in range(self.batch_size - len(batch_data)):
67 | assert len(batch_data) > 0
68 | batch_data.append(batch_data[0] * 0)
69 | indices.append(-1)
70 |
71 | self._current += self.batch_size
72 | return {
73 | 'data': np.array(batch_data),
74 | 'label': np.array(batch_label) if self.is_train else None,
75 | 'id': indices,
76 | }
77 |
78 | def _data_augmentation(self, data, label):
79 | if self.is_train and self._rand_samplers:
80 | rand_crops = []
81 | for rs in self._rand_samplers:
82 | rand_crops += rs.sample(label)
83 | num_rand_crops = len(rand_crops)
84 |
85 | if num_rand_crops > 0:
86 | index = int(np.random.uniform(0, 1) * num_rand_crops)
87 | width = data.shape[1]
88 | height = data.shape[0]
89 | crop = rand_crops[index][0]
90 | xmin = int(crop[0] * width)
91 | ymin = int(crop[1] * height)
92 | xmax = int(crop[2] * width)
93 | ymax = int(crop[3] * height)
94 | if xmin >= 0 and ymin >= 0 and xmax <= width and ymax <= height:
95 | data = data[ymin:ymax, xmin:xmax, :]
96 | else:
97 | # padding mode
98 | new_width = xmax - xmin
99 | new_height = ymax - ymin
100 | offset_x = 0 - xmin
101 | offset_y = 0 - ymin
102 | data_bak = data
103 | data = np.full((new_height, new_width, 3), 128.)
104 | data[offset_y:offset_y+height, offset_x:offset_x + width, :] = data_bak
105 | label = rand_crops[index][1]
106 |
107 | if self.is_train and self._rand_flip:
108 | if np.random.uniform(0, 1) > 0.5:
109 | data = cv2.flip(data, 1)
110 | valid_mask = np.where(label[:, 0] > -1)[0]
111 | tmp = 1.0 - label[valid_mask, 1]
112 | label[valid_mask, 1] = 1.0 - label[valid_mask, 3]
113 | label[valid_mask, 3] = tmp
114 |
115 | if self.is_train:
116 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, \
117 | cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
118 | else:
119 | interp_methods = [cv2.INTER_LINEAR]
120 | interp_method = interp_methods[int(np.random.uniform(0, 1) * len(interp_methods))]
121 | data = resize(data, self._data_shape, interp_method)
122 | data = transform(data, self._mean_pixels)
123 | return data, label
124 |
125 | def meg_get_batch(self):
126 | batch = self._get_batch()
127 | return {'img': batch['data'], 'gt_boxes': batch['label'], 'imgID': batch['id']}
--------------------------------------------------------------------------------
/evaluator/eval_voc.py:
--------------------------------------------------------------------------------
1 | """
2 | given a pascal voc imdb, compute mAP
3 | """
4 |
5 | import numpy as np
6 | import os
7 | import cPickle
8 |
9 |
10 | def parse_voc_rec(filename):
11 | """
12 | parse pascal voc record into a dictionary
13 | :param filename: xml file path
14 | :return: list of dict
15 | """
16 | import xml.etree.ElementTree as ET
17 | tree = ET.parse(filename)
18 | objects = []
19 | for obj in tree.findall('object'):
20 | obj_dict = dict()
21 | obj_dict['name'] = obj.find('name').text
22 | obj_dict['difficult'] = int(obj.find('difficult').text)
23 | bbox = obj.find('bndbox')
24 | obj_dict['bbox'] = [int(bbox.find('xmin').text),
25 | int(bbox.find('ymin').text),
26 | int(bbox.find('xmax').text),
27 | int(bbox.find('ymax').text)]
28 | objects.append(obj_dict)
29 | return objects
30 |
31 |
32 | def voc_ap(rec, prec, use_07_metric=False):
33 | """
34 | average precision calculations
35 | [precision integrated to recall]
36 | :param rec: recall
37 | :param prec: precision
38 | :param use_07_metric: 2007 metric is 11-recall-point based AP
39 | :return: average precision
40 | """
41 | if use_07_metric:
42 | ap = 0.
43 | for t in np.arange(0., 1.1, 0.1):
44 | if np.sum(rec >= t) == 0:
45 | p = 0
46 | else:
47 | p = np.max(prec[rec >= t])
48 | ap += p / 11.
49 | else:
50 | # append sentinel values at both ends
51 | mrec = np.concatenate([0.], rec, [1.])
52 | mpre = np.concatenate([0.], prec, [0.])
53 |
54 | # compute precision integration ladder
55 | for i in range(mpre.size - 1, 0, -1):
56 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
57 |
58 | # look for recall value changes
59 | i = np.where(mrec[1:] != mrec[:-1])[0]
60 |
61 | # sum (\delta recall) * prec
62 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
63 | return ap
64 |
65 |
66 | def voc_eval(detpath, annopath, imageset_file, classname, cache_dir, ovthresh=0.5, use_07_metric=False):
67 | """
68 | pascal voc evaluation
69 | :param detpath: detection results detpath.format(classname)
70 | :param annopath: annotations annopath.format(classname)
71 | :param imageset_file: text file containing list of images
72 | :param classname: category name
73 | :param cache_dir: caching annotations
74 | :param ovthresh: overlap threshold
75 | :param use_07_metric: whether to use voc07's 11 point ap computation
76 | :return: rec, prec, ap
77 | """
78 | if not os.path.isdir(cache_dir):
79 | os.mkdir(cache_dir)
80 | cache_file = os.path.join(cache_dir, 'annotations.pkl')
81 | with open(imageset_file, 'r') as f:
82 | lines = f.readlines()
83 | image_filenames = [x.strip() for x in lines]
84 |
85 | # load annotations from cache
86 | if not os.path.isfile(cache_file):
87 | recs = {}
88 | for ind, image_filename in enumerate(image_filenames):
89 | recs[image_filename] = parse_voc_rec(annopath.format(image_filename))
90 | if ind % 100 == 0:
91 | print('reading annotations for {:d}/{:d}'.format(ind + 1, len(image_filenames)))
92 | print('saving annotations cache to {:s}'.format(cache_file))
93 | with open(cache_file, 'w') as f:
94 | cPickle.dump(recs, f)
95 | else:
96 | with open(cache_file, 'r') as f:
97 | recs = cPickle.load(f)
98 |
99 | # extract objects in :param classname:
100 | class_recs = {}
101 | npos = 0
102 | for image_filename in image_filenames:
103 | objects = [obj for obj in recs[image_filename] if obj['name'] == classname]
104 | bbox = np.array([x['bbox'] for x in objects])
105 | difficult = np.array([x['difficult'] for x in objects]).astype(np.bool)
106 | det = [False] * len(objects) # stand for detected
107 | npos = npos + sum(~difficult)
108 | class_recs[image_filename] = {'bbox': bbox,
109 | 'difficult': difficult,
110 | 'det': det}
111 |
112 | # read detections
113 | detfile = detpath.format(classname)
114 | with open(detfile, 'r') as f:
115 | lines = f.readlines()
116 |
117 | splitlines = [x.strip().split(' ') for x in lines]
118 | image_ids = [x[0] for x in splitlines]
119 | confidence = np.array([float(x[1]) for x in splitlines])
120 | bbox = np.array([[float(z) for z in x[2:]] for x in splitlines])
121 |
122 | # sort by confidence
123 | sorted_inds = np.argsort(-confidence)
124 | sorted_scores = np.sort(-confidence)
125 | bbox = bbox[sorted_inds, :]
126 | image_ids = [image_ids[x] for x in sorted_inds]
127 |
128 | # go down detections and mark true positives and false positives
129 | nd = len(image_ids)
130 | tp = np.zeros(nd)
131 | fp = np.zeros(nd)
132 | for d in range(nd):
133 | r = class_recs[image_ids[d]]
134 | bb = bbox[d, :].astype(float)
135 | ovmax = -np.inf
136 | bbgt = r['bbox'].astype(float)
137 |
138 | if bbgt.size > 0:
139 | # compute overlaps
140 | # intersection
141 | ixmin = np.maximum(bbgt[:, 0], bb[0])
142 | iymin = np.maximum(bbgt[:, 1], bb[1])
143 | ixmax = np.minimum(bbgt[:, 2], bb[2])
144 | iymax = np.minimum(bbgt[:, 3], bb[3])
145 | iw = np.maximum(ixmax - ixmin + 1., 0.)
146 | ih = np.maximum(iymax - iymin + 1., 0.)
147 | inters = iw * ih
148 |
149 | # union
150 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
151 | (bbgt[:, 2] - bbgt[:, 0] + 1.) *
152 | (bbgt[:, 3] - bbgt[:, 1] + 1.) - inters)
153 |
154 | overlaps = inters / uni
155 | ovmax = np.max(overlaps)
156 | jmax = np.argmax(overlaps)
157 |
158 | if ovmax > ovthresh:
159 | if not r['difficult'][jmax]:
160 | if not r['det'][jmax]:
161 | tp[d] = 1.
162 | r['det'][jmax] = 1
163 | else:
164 | fp[d] = 1.
165 | else:
166 | fp[d] = 1.
167 |
168 | # compute precision recall
169 | fp = np.cumsum(fp)
170 | tp = np.cumsum(tp)
171 | rec = tp / float(npos)
172 | # avoid division by zero in case first detection matches a difficult ground ruth
173 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
174 | ap = voc_ap(rec, prec, use_07_metric)
175 |
176 | return rec, prec, ap
177 |
--------------------------------------------------------------------------------
/dataset/pascal_voc.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from .imdb import Imdb
4 | import xml.etree.ElementTree as ET
5 | from evaluator.eval_voc import voc_eval
6 | import cv2
7 |
8 | class PascalVoc(Imdb):
9 | """
10 | Implementation of Imdb for Pascal VOC datasets
11 |
12 | Parameters:
13 | ----------
14 | image_set : str
15 | set to be used, can be train, val, trainval, test
16 | year : str
17 | year of dataset, can be 2007, 2010, 2012...
18 | devkit_path : str
19 | devkit path of VOC dataset
20 | shuffle : boolean
21 | whether to initial shuffle the image list
22 | is_train : boolean
23 | if true, will load annotations
24 | """
25 | def __init__(self, image_set, year, devkit_path, shuffle=False, is_train=False):
26 | super(PascalVoc, self).__init__('voc_' + year + '_' + image_set)
27 | self.image_set = image_set
28 | self.year = year
29 | self.devkit_path = devkit_path
30 | self.data_path = os.path.join(devkit_path, 'VOC' + year)
31 | self.extension = '.jpg'
32 | self.is_train = is_train
33 |
34 | self.classes = ['aeroplane', 'bicycle', 'bird', 'boat',
35 | 'bottle', 'bus', 'car', 'cat', 'chair',
36 | 'cow', 'diningtable', 'dog', 'horse',
37 | 'motorbike', 'person', 'pottedplant',
38 | 'sheep', 'sofa', 'train', 'tvmonitor']
39 |
40 | self.config = {'use_difficult': True,
41 | 'comp_id': 'comp4',
42 | 'padding': 56}
43 |
44 | self.num_classes = len(self.classes)
45 | self.image_set_index = self._load_image_set_index(shuffle)
46 | self.num_images = len(self.image_set_index)
47 | if self.is_train:
48 | self.labels = self._load_image_labels()
49 |
50 | @property
51 | def cache_path(self):
52 | """
53 | make a directory to store all caches
54 |
55 | Returns:
56 | ---------
57 | cache path
58 | """
59 | cache_path = os.path.join(os.path.dirname(__file__), '..', 'cache')
60 | if not os.path.exists(cache_path):
61 | os.mkdir(cache_path)
62 | return cache_path
63 |
64 | def _load_image_set_index(self, shuffle):
65 | """
66 | find out which indexes correspond to given image set (train or val)
67 |
68 | Parameters:
69 | ----------
70 | shuffle : boolean
71 | whether to shuffle the image list
72 | Returns:
73 | ----------
74 | entire list of images specified in the setting
75 | """
76 | image_set_index_file = os.path.join(self.data_path, 'ImageSets', 'Main', self.image_set + '.txt')
77 | assert os.path.exists(image_set_index_file), 'Path does not exist: {}'.format(image_set_index_file)
78 | with open(image_set_index_file) as f:
79 | image_set_index = [x.strip() for x in f.readlines()]
80 | if shuffle:
81 | np.random.shuffle(image_set_index)
82 | return image_set_index
83 |
84 | def image_path_from_index(self, index):
85 | """
86 | given image index, find out full path
87 |
88 | Parameters:
89 | ----------
90 | index: int
91 | index of a specific image
92 | Returns:
93 | ----------
94 | full path of this image
95 | """
96 | assert self.image_set_index is not None, "Dataset not initialized"
97 | name = self.image_set_index[index]
98 | image_file = os.path.join(self.data_path, 'JPEGImages', name + self.extension)
99 | assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file)
100 | return image_file
101 |
102 | def label_from_index(self, index):
103 | """
104 | given image index, return preprocessed ground-truth
105 |
106 | Parameters:
107 | ----------
108 | index: int
109 | index of a specific image
110 | Returns:
111 | ----------
112 | ground-truths of this image
113 | """
114 | assert self.labels is not None, "Labels not processed"
115 | return self.labels[index, :, :]
116 |
117 | def _label_path_from_index(self, index):
118 | """
119 | given image index, find out annotation path
120 |
121 | Parameters:
122 | ----------
123 | index: int
124 | index of a specific image
125 |
126 | Returns:
127 | ----------
128 | full path of annotation file
129 | """
130 | label_file = os.path.join(self.data_path, 'Annotations', index + '.xml')
131 | assert os.path.exists(label_file), 'Path does not exist: {}'.format(image_file)
132 | return label_file
133 |
134 | def _load_image_labels(self):
135 | """
136 | preprocess all ground-truths
137 |
138 | Returns:
139 | ----------
140 | labels packed in [num_images x max_num_objects x 5] tensor
141 | """
142 | temp = []
143 | max_objects = 0
144 |
145 | # load ground-truth from xml annotations
146 | for idx in self.image_set_index:
147 | label_file = self._label_path_from_index(idx)
148 | tree = ET.parse(label_file)
149 | root = tree.getroot()
150 | size = root.find('size')
151 | width = float(size.find('width').text)
152 | height = float(size.find('height').text)
153 | label = []
154 |
155 | for obj in root.iter('object'):
156 | difficult = int(obj.find('difficult').text)
157 | if not self.config['use_difficult'] and difficult == 1:
158 | continue
159 | cls_name = obj.find('name').text
160 | if cls_name not in self.classes:
161 | continue
162 | cls_id = self.classes.index(cls_name)
163 | xml_box = obj.find('bndbox')
164 | xmin = float(xml_box.find('xmin').text) / width
165 | ymin = float(xml_box.find('ymin').text) / height
166 | xmax = float(xml_box.find('xmax').text) / width
167 | ymax = float(xml_box.find('ymax').text) / height
168 | label.append([cls_id, xmin, ymin, xmax, ymax])
169 | temp.append(np.array(label))
170 | max_objects = max(max_objects, len(label))
171 |
172 | # add padding to labels so that the dimensions match in each batch
173 | # TODO: design a better way to handle label padding
174 | assert max_objects > 0, "No objects found for any of the images"
175 | assert max_objects <= self.config['padding'], "# obj exceed padding"
176 | self.padding = self.config['padding']
177 | labels = []
178 | for label in temp:
179 | label = np.lib.pad(label, ((0, self.padding-label.shape[0]), (0,0)), \
180 | 'constant', constant_values=(-1, -1))
181 | labels.append(label)
182 |
183 | return np.array(labels)
184 |
185 | def evaluate_detections(self, detections):
186 | """
187 | top level evaluations
188 | Parameters:
189 | ----------
190 | detections: list
191 | result list, each entry is a matrix of detections
192 | Returns:
193 | ----------
194 | None
195 | """
196 | # make all these folders for results
197 | result_dir = os.path.join(self.devkit_path, 'results')
198 | if not os.path.exists(result_dir):
199 | os.mkdir(result_dir)
200 | year_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year)
201 | if not os.path.exists(year_folder):
202 | os.mkdir(year_folder)
203 | res_file_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year, 'Main')
204 | if not os.path.exists(res_file_folder):
205 | os.mkdir(res_file_folder)
206 |
207 | self.write_pascal_results(detections)
208 | self.do_python_eval()
209 |
210 | def get_result_file_template(self):
211 | """
212 | this is a template
213 | VOCdevkit/results/VOC2007/Main/_det_test_aeroplane.txt
214 |
215 | Returns:
216 | ----------
217 | a string template
218 | """
219 | res_file_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year, 'Main')
220 | comp_id = self.config['comp_id']
221 | filename = comp_id + '_det_' + self.image_set + '_{:s}.txt'
222 | path = os.path.join(res_file_folder, filename)
223 | return path
224 |
225 | def write_pascal_results(self, all_boxes):
226 | """
227 | write results files in pascal devkit path
228 | Parameters:
229 | ----------
230 | all_boxes: list
231 | boxes to be processed [bbox, confidence]
232 | Returns:
233 | ----------
234 | None
235 | """
236 | for cls_ind, cls in enumerate(self.classes):
237 | print('Writing {} VOC results file'.format(cls))
238 | filename = self.get_result_file_template().format(cls)
239 | with open(filename, 'wt') as f:
240 | for im_ind, index in enumerate(self.image_set_index):
241 | dets = all_boxes[im_ind]
242 | if dets.shape[0] < 1:
243 | continue
244 | h, w = self._get_imsize(self.image_path_from_index(im_ind))
245 | # the VOCdevkit expects 1-based indices
246 | for k in range(dets.shape[0]):
247 | if (int(dets[k, 0]) == cls_ind):
248 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
249 | format(index, dets[k, 1],
250 | int(dets[k, 2] * w) + 1, int(dets[k, 3] * h) + 1,
251 | int(dets[k, 4] * w) + 1, int(dets[k, 5] * h) + 1))
252 |
253 | def do_python_eval(self):
254 | """
255 | python evaluation wrapper
256 |
257 | Returns:
258 | ----------
259 | None
260 | """
261 | annopath = os.path.join(self.data_path, 'Annotations', '{:s}.xml')
262 | imageset_file = os.path.join(self.data_path, 'ImageSets', 'Main', self.image_set + '.txt')
263 | cache_dir = os.path.join(self.cache_path, self.name)
264 | aps = []
265 | # The PASCAL VOC metric changed in 2010
266 | use_07_metric = True if int(self.year) < 2010 else False
267 | print('VOC07 metric? ' + ('Y' if use_07_metric else 'No'))
268 | for cls_ind, cls in enumerate(self.classes):
269 | filename = self.get_result_file_template().format(cls)
270 | rec, prec, ap = voc_eval(filename, annopath, imageset_file, cls, cache_dir,
271 | ovthresh=0.5, use_07_metric=use_07_metric)
272 | aps += [ap]
273 | print('AP for {} = {:.4f}'.format(cls, ap))
274 | print('Mean AP = {:.4f}'.format(np.mean(aps)))
275 |
276 | def _get_imsize(self, im_name):
277 | """
278 | get image size info
279 | Returns:
280 | ----------
281 | tuple of (height, width)
282 | """
283 | img = cv2.imread(im_name)
284 | return (img.shape[0], img.shape[1])
285 |
--------------------------------------------------------------------------------
/utils/rand_sampler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import math
3 |
4 | class RandSampler(object):
5 | """
6 | Random sampler base class, used for data augmentation
7 |
8 | Parameters:
9 | ----------
10 | max_trials : int
11 | maximum trials, if exceed this number, give up anyway
12 | max_sample : int
13 | maximum random crop samples to be generated
14 | """
15 | def __init__(self, max_trials, max_sample):
16 | assert max_trials > 0
17 | self.max_trials = int(max_trials)
18 | assert max_sample >= 0
19 | self.max_sample = int(max_sample)
20 |
21 | def sample(self, label):
22 | """
23 | Interface for calling sampling function
24 |
25 | Parameters:
26 | ----------
27 | label : numpy.array (n x 5 matrix)
28 | ground-truths
29 |
30 | Returns:
31 | ----------
32 | list of (crop_box, label) tuples, if failed, return empty list []
33 | """
34 | return NotImplementedError
35 |
36 |
37 | class RandCropper(RandSampler):
38 | """
39 | Random cropping original images with various settings
40 |
41 | Parameters:
42 | ----------
43 | min_scale : float
44 | minimum crop scale, (0, 1]
45 | max_scale : float
46 | maximum crop scale, (0, 1], must larger than min_scale
47 | min_aspect_ratio : float
48 | minimum crop aspect ratio, (0, 1]
49 | max_aspect_ratio : float
50 | maximum crop aspect ratio, [1, inf)
51 | min_overlap : float
52 | hreshold of minimum overlap between a rand crop and any gt
53 | max_trials : int
54 | maximum trials, if exceed this number, give up anyway
55 | max_sample : int
56 | maximum random crop samples to be generated
57 | """
58 | def __init__(self, min_scale=1., max_scale=1.,
59 | min_aspect_ratio=1., max_aspect_ratio=1.,
60 | min_overlap=0., max_trials=50, max_sample=1):
61 | super(RandCropper, self).__init__(max_trials, max_sample)
62 | assert min_scale <= max_scale, "min_scale must <= max_scale"
63 | assert 0 < min_scale and min_scale <= 1, "min_scale must in (0, 1]"
64 | assert 0 < max_scale and max_scale <= 1, "max_scale must in (0, 1]"
65 | self.min_scale = min_scale
66 | self.max_scale = max_scale
67 | assert 0 < min_aspect_ratio and min_aspect_ratio <= 1, "min_ratio must in (0, 1]"
68 | assert 1 <= max_aspect_ratio , "max_ratio must >= 1"
69 | self.min_aspect_ratio = min_aspect_ratio
70 | self.max_aspect_ratio = max_aspect_ratio
71 | assert 0 <= min_overlap and min_overlap <= 1, "min_overlap must in [0,1]"
72 | self.min_overlap = min_overlap
73 |
74 | self.config = {'gt_constraint' : 'center'}
75 |
76 | def sample(self, label):
77 | """
78 | generate random cropping boxes according to parameters
79 | if satifactory crops generated, apply to ground-truth as well
80 |
81 | Parameters:
82 | ----------
83 | label : numpy.array (n x 5 matrix)
84 | ground-truths
85 |
86 | Returns:
87 | ----------
88 | list of (crop_box, label) tuples, if failed, return empty list []
89 | """
90 | samples = []
91 | count = 0
92 | for trial in range(self.max_trials):
93 | if count >= self.max_sample:
94 | return samples
95 | scale = np.random.uniform(self.min_scale, self.max_scale)
96 | min_ratio = max(self.min_aspect_ratio, scale * scale)
97 | max_ratio = min(self.max_aspect_ratio, 1. / scale / scale)
98 | ratio = math.sqrt( np.random.uniform(min_ratio, max_ratio) )
99 | width = scale * ratio
100 | height = scale / ratio
101 | left = np.random.uniform(0., 1 - width)
102 | top = np.random.uniform(0., 1 - height)
103 | rand_box = (left, top, left + width, top + height)
104 | valid_mask = np.where(label[:, 0] > -1)[0]
105 | gt = label[valid_mask, :]
106 | ious = self._check_satisfy(rand_box, gt)
107 | if ious is not None:
108 | # transform gt labels after crop, discard bad ones
109 | l, t, r, b = rand_box
110 | new_gt_boxes = []
111 | new_width = r - l
112 | new_height = b - t
113 | for i in range(valid_mask.size):
114 | if ious[i] > 0:
115 | xmin = max(0., (gt[i, 1] - l) / new_width)
116 | ymin = max(0., (gt[i, 2] - t) / new_height)
117 | xmax = min(1., (gt[i, 3] - l) / new_width)
118 | ymax = min(1., (gt[i, 4] - t) / new_height)
119 | new_gt_boxes.append([gt[i, 0], xmin, ymin, xmax, ymax])
120 | if not new_gt_boxes:
121 | continue
122 | new_gt_boxes = np.array(new_gt_boxes)
123 | label = np.lib.pad(new_gt_boxes,
124 | ((0, label.shape[0]-new_gt_boxes.shape[0]), (0,0)), \
125 | 'constant', constant_values=(-1, -1))
126 | samples.append((rand_box, label))
127 | count += 1
128 | return samples
129 |
130 | def _check_satisfy(self, rand_box, gt_boxes):
131 | """
132 | check if overlap with any gt box is larger than threshold
133 | """
134 | l, t, r, b = rand_box
135 | num_gt = gt_boxes.shape[0]
136 | ls = np.ones(num_gt) * l
137 | ts = np.ones(num_gt) * t
138 | rs = np.ones(num_gt) * r
139 | bs = np.ones(num_gt) * b
140 | mask = np.where(ls < gt_boxes[:, 1])[0]
141 | ls[mask] = gt_boxes[mask, 1]
142 | mask = np.where(ts < gt_boxes[:, 2])[0]
143 | ts[mask] = gt_boxes[mask, 2]
144 | mask = np.where(rs > gt_boxes[:, 3])[0]
145 | rs[mask] = gt_boxes[mask, 3]
146 | mask = np.where(bs > gt_boxes[:, 4])[0]
147 | bs[mask] = gt_boxes[mask, 4]
148 | w = rs - ls
149 | w[w < 0] = 0
150 | h = bs - ts
151 | h[h < 0] = 0
152 | inter_area = h * w
153 | union_area = np.ones(num_gt) * max(0, r - l) * max(0, b - t)
154 | union_area += (gt_boxes[:, 3] - gt_boxes[:, 1]) * (gt_boxes[:, 4] - gt_boxes[:, 2])
155 | union_area -= inter_area
156 | ious = inter_area / union_area
157 | ious[union_area <= 0] = 0
158 | max_iou = np.amax(ious)
159 | if max_iou < self.min_overlap:
160 | return None
161 | # check ground-truth constraint
162 | if self.config['gt_constraint'] == 'center':
163 | for i in range(ious.shape[0]):
164 | if ious[i] > 0:
165 | gt_x = (gt_boxes[i, 1] + gt_boxes[i, 3]) / 2.0
166 | gt_y = (gt_boxes[i, 2] + gt_boxes[i, 4]) / 2.0
167 | if gt_x < l or gt_x > r or gt_y < t or gt_y > b:
168 | return None
169 | elif self.config['gt_constraint'] == 'corner':
170 | for i in range(ious.shape[0]):
171 | if ious[i] > 0:
172 | if gt_boxes[i, 1] < l or gt_boxes[i, 3] > r \
173 | or gt_boxes[i, 2] < t or gt_boxes[i, 4] > b:
174 | return None
175 | return ious
176 |
177 | class RandPadder(RandSampler):
178 | """
179 | Random cropping original images with various settings
180 |
181 | Parameters:
182 | ----------
183 | min_scale : float
184 | minimum crop scale, [1, inf)
185 | max_scale : float
186 | maximum crop scale, [1, inf), must larger than min_scale
187 | min_aspect_ratio : float
188 | minimum crop aspect ratio, (0, 1]
189 | max_aspect_ratio : float
190 | maximum crop aspect ratio, [1, inf)
191 | min_gt_scale : float
192 | minimum ground-truth scale to be satisfied after padding,
193 | either width or height, [0, 1]
194 | max_trials : int
195 | maximum trials, if exceed this number, give up anyway
196 | max_sample : int
197 | maximum random crop samples to be generated
198 | """
199 | def __init__(self, min_scale=1., max_scale=1., min_aspect_ratio=1., \
200 | max_aspect_ratio=1., min_gt_scale=.01, max_trials=50,
201 | max_sample=1):
202 | super(RandPadder, self).__init__(max_trials, max_sample)
203 | assert min_scale <= max_scale, "min_scale must <= max_scale"
204 | assert min_scale >= 1, "min_scale must in (0, 1]"
205 | self.min_scale = min_scale
206 | self.max_scale = max_scale
207 | assert 0 < min_aspect_ratio and min_aspect_ratio <= 1, "min_ratio must in (0, 1]"
208 | assert 1 <= max_aspect_ratio , "max_ratio must >= 1"
209 | self.min_aspect_ratio = min_aspect_ratio
210 | self.max_aspect_ratio = max_aspect_ratio
211 | assert 0 <= min_gt_scale and min_gt_scale <= 1, "min_gt_scale must in [0, 1]"
212 | self.min_gt_scale = min_gt_scale
213 |
214 | def sample(self, label):
215 | """
216 | generate random padding boxes according to parameters
217 | if satifactory padding generated, apply to ground-truth as well
218 |
219 | Parameters:
220 | ----------
221 | label : numpy.array (n x 5 matrix)
222 | ground-truths
223 |
224 | Returns:
225 | ----------
226 | list of (crop_box, label) tuples, if failed, return empty list []
227 | """
228 | samples = []
229 | count = 0
230 | for trial in range(self.max_trials):
231 | if count >= self.max_sample:
232 | return samples
233 | scale = np.random.uniform(self.min_scale, self.max_scale)
234 | min_ratio = max(self.min_aspect_ratio, scale * scale)
235 | max_ratio = min(self.max_aspect_ratio, 1. / scale / scale)
236 | ratio = math.sqrt(np.random.uniform(min_ratio, max_ratio))
237 | width = scale * ratio
238 | if width < 1:
239 | continue
240 | height = scale / ratio
241 | if height < 1:
242 | continue
243 | left = np.random.uniform(0., 1 - width)
244 | top = np.random.uniform(0., 1 - height)
245 | right = left + width
246 | bot = top + height
247 | rand_box = (left, top, right, bot)
248 | valid_mask = np.where(label[:, 0] > -1)[0]
249 | gt = label[valid_mask, :]
250 | new_gt_boxes = []
251 | for i in range(gt.shape[0]):
252 | xmin = (gt[i, 1] - left) / width
253 | ymin = (gt[i, 2] - top) / height
254 | xmax = (gt[i, 3] - left) / width
255 | ymax = (gt[i, 4] - top) / height
256 | new_size = min(xmax - xmin, ymax - ymin)
257 | if new_size < self.min_gt_scale:
258 | new_gt_boxes = []
259 | break
260 | new_gt_boxes.append([gt[i, 0], xmin, ymin, xmax, ymax])
261 | if not new_gt_boxes:
262 | continue
263 | new_gt_boxes = np.array(new_gt_boxes)
264 | label = np.lib.pad(new_gt_boxes,
265 | ((0, label.shape[0]-new_gt_boxes.shape[0]), (0,0)), \
266 | 'constant', constant_values=(-1, -1))
267 | samples.append((rand_box, label))
268 | count += 1
269 | return samples
270 |
--------------------------------------------------------------------------------