├── README.md ├── config.py ├── data └── citypersons ├── dataloader ├── __init__.py ├── data_augment.py ├── load_data.py └── loader.py ├── eval_city ├── __init__.py ├── cocoapi │ ├── .gitignore │ ├── LuaAPI │ │ ├── CocoApi.lua │ │ ├── MaskApi.lua │ │ ├── cocoDemo.lua │ │ ├── env.lua │ │ ├── init.lua │ │ └── rocks │ │ │ └── coco-scm-1.rockspec │ ├── MatlabAPI │ │ ├── CocoApi.m │ │ ├── CocoEval.m │ │ ├── CocoUtils.m │ │ ├── MaskApi.m │ │ ├── cocoDemo.m │ │ ├── evalDemo.m │ │ ├── gason.m │ │ └── private │ │ │ ├── gasonMex.cpp │ │ │ ├── gasonMex.mexa64 │ │ │ ├── gasonMex.mexmaci64 │ │ │ ├── getPrmDflt.m │ │ │ ├── maskApiMex.c │ │ │ ├── maskApiMex.mexa64 │ │ │ └── maskApiMex.mexmaci64 │ ├── PythonAPI │ │ ├── Makefile │ │ ├── pycocotools │ │ │ ├── __init__.py │ │ │ ├── _mask.pyx │ │ │ ├── coco.py │ │ │ ├── cocoeval.py │ │ │ └── mask.py │ │ └── setup.py │ ├── README.txt │ ├── common │ │ ├── gason.cpp │ │ ├── gason.h │ │ ├── maskApi.c │ │ └── maskApi.h │ └── license.txt ├── dt_txt2json.m ├── eval_script │ ├── __init__.py │ ├── coco.py │ ├── eval_MR_multisetup.py │ ├── eval_demo.py │ └── readme.txt ├── readme.txt └── val_gt.json ├── net ├── __init__.py ├── __init__.pyc ├── l2norm.py ├── l2norm.pyc ├── loss.py ├── network.py ├── network.pyc ├── resnet.py └── resnet.pyc ├── trainval_caffestyle.py ├── trainval_torchstyle.py └── util ├── Makefile ├── __init__.py ├── functions.py ├── nms ├── .gitignore ├── __init__.py ├── cpu_nms.pyx ├── gpu_nms.hpp ├── gpu_nms.pyx ├── nms_kernel.cu └── py_cpu_nms.py ├── nms_wrapper.py └── setup.py /README.md: -------------------------------------------------------------------------------- 1 | # CSP PyTorch Implementation 2 | Unofficially Pytorch implementation of [**High-level Semantic Feature Detection: A New Perspective for Pedestrian Detection**]() 3 | 4 | This code is only for CityPersons dataset, and only for center-position+height regression+offset regression model. 5 | 6 | ## NOTE 7 | This repo's codes have bugs, and will not be updated for days or weeks. 8 | you may run the code, but check it carefully. 9 | A new repo may be uploaded in the future. 10 | 11 | ## update 12 | 13 | On Cityperson validation set 14 | 11.70 MR BaiduYun https://pan.baidu.com/s/1t5JhFvFM0Z8xObmqva0Gtg password:xarm 15 | 16 | 11.71 MR [CSPNet-26.pth](https://www.dropbox.com/s/albzr94lru7fdsv/CSPNet-26.pth?dl=0) (NEW !) 17 | 18 | 12.56 MR [CSPNet-89.pth]() 19 | 20 | ## Requirement 21 | 22 | Python, pytorch and other related libaries 23 | 24 | GPU is needed 25 | 26 | ## Usage 27 | 28 | Compile lib 29 | 30 | ~~~ 31 | cd util 32 | make all 33 | ~~~ 34 | 35 | Prepare CityPersons dataset as the original codes doing 36 | 37 | * For citypersons, we use the training set (2975 images) for training and test on the validation set (500 images), we assume that images and annotations are stored in `./data/citypersons`, and the directory structure is 38 | 39 | ``` 40 | *DATA_PATH 41 | *annotations 42 | *anno_train.mat 43 | *anno_val.mat 44 | *images 45 | *train 46 | *val 47 | ``` 48 | 49 | 50 | 51 | Training & val 52 | 53 | ~~~ 54 | python trainval_torchstyle.py 55 | python trainval_caffestyle.py 56 | ~~~ 57 | 58 | NOTE 59 | 60 | using caffe style, you need to download additional pre-trained weight. 61 | 62 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | class Config(object): 2 | def __init__(self): 3 | self.gpu_ids = [0, 1] 4 | self.onegpu = 2 5 | self.num_epochs = 150 6 | self.add_epoch = 0 7 | self.iter_per_epoch = 2000 8 | self.init_lr = 1e-4 9 | self.alpha = 0.999 10 | 11 | # dataset 12 | self.train_path = './data/citypersons' 13 | self.train_random = True 14 | 15 | # setting for network architechture 16 | self.network = 'resnet50' # or 'mobilenet' 17 | self.point = 'center' # or 'top', 'bottom 18 | self.scale = 'h' # or 'w', 'hw' 19 | self.num_scale = 1 # 1 for height (or width) prediction, 2 for height+width prediction 20 | self.offset = False # append offset prediction or not 21 | self.down = 4 # downsampling rate of the feature map for detection 22 | self.radius = 2 # surrounding areas of positives for the scale map 23 | 24 | # setting for data augmentation 25 | self.use_horizontal_flips = True 26 | self.brightness = (0.5, 2, 0.5) 27 | self.size_train = (336, 448) 28 | self.size_test = (336, 338) 29 | 30 | # image channel-wise mean to subtract, the order is BGR 31 | self.img_channel_mean = [103.939, 116.779, 123.68] 32 | 33 | # whether or not use caffe style training which is used in paper 34 | self.caffemodel = False 35 | 36 | # use teacher 37 | self.teacher = True 38 | 39 | self.test_path = './data/citypersons' 40 | 41 | # whether or not to do validation during training 42 | self.val = True 43 | self.val_frequency = 10 44 | 45 | def print_conf(self): 46 | print '\n'.join(['%s:%s' % item for item in self.__dict__.items()]) -------------------------------------------------------------------------------- /data/citypersons: -------------------------------------------------------------------------------- 1 | ../../../dataset/Citypersons/ -------------------------------------------------------------------------------- /dataloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/dataloader/__init__.py -------------------------------------------------------------------------------- /dataloader/data_augment.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import cv2 3 | import numpy as np 4 | import copy 5 | 6 | 7 | def _brightness(image, min=0.5, max=2.0): 8 | ''' 9 | Randomly change the brightness of the input image. 10 | Protected against overflow. 11 | ''' 12 | hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) 13 | 14 | random_br = np.random.uniform(min, max) 15 | 16 | # To protect against overflow: Calculate a mask for all pixels 17 | # where adjustment of the brightness would exceed the maximum 18 | # brightness value and set the value to the maximum at those pixels. 19 | mask = hsv[:, :, 2] * random_br > 255 20 | v_channel = np.where(mask, 255, hsv[:, :, 2] * random_br) 21 | hsv[:, :, 2] = v_channel 22 | 23 | return cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) 24 | 25 | 26 | def resize_image(image, gts, igs, scale=(0.4, 1.5)): 27 | height, width = image.shape[0:2] 28 | ratio = np.random.uniform(scale[0], scale[1]) 29 | # if len(gts)>0 and np.max(gts[:,3]-gts[:,1])>300: 30 | # ratio = np.random.uniform(scale[0], 1.0) 31 | new_height, new_width = int(ratio * height), int(ratio * width) 32 | image = cv2.resize(image, (new_width, new_height)) 33 | if len(gts) > 0: 34 | gts = np.asarray(gts, dtype=float) 35 | gts[:, 0:4:2] *= ratio 36 | gts[:, 1:4:2] *= ratio 37 | 38 | if len(igs) > 0: 39 | igs = np.asarray(igs, dtype=float) 40 | igs[:, 0:4:2] *= ratio 41 | igs[:, 1:4:2] *= ratio 42 | 43 | return image, gts, igs 44 | 45 | 46 | def random_crop(image, gts, igs, crop_size, limit=8): 47 | img_height, img_width = image.shape[0:2] 48 | crop_h, crop_w = crop_size 49 | 50 | if len(gts) > 0: 51 | sel_id = np.random.randint(0, len(gts)) 52 | sel_center_x = int((gts[sel_id, 0] + gts[sel_id, 2]) / 2.0) 53 | sel_center_y = int((gts[sel_id, 1] + gts[sel_id, 3]) / 2.0) 54 | else: 55 | sel_center_x = int(np.random.randint(0, img_width - crop_w + 1) + crop_w * 0.5) 56 | sel_center_y = int(np.random.randint(0, img_height - crop_h + 1) + crop_h * 0.5) 57 | 58 | crop_x1 = max(sel_center_x - int(crop_w * 0.5), int(0)) 59 | crop_y1 = max(sel_center_y - int(crop_h * 0.5), int(0)) 60 | diff_x = max(crop_x1 + crop_w - img_width, int(0)) 61 | crop_x1 -= diff_x 62 | diff_y = max(crop_y1 + crop_h - img_height, int(0)) 63 | crop_y1 -= diff_y 64 | cropped_image = np.copy(image[crop_y1:crop_y1 + crop_h, crop_x1:crop_x1 + crop_w]) 65 | # crop detections 66 | if len(igs) > 0: 67 | igs[:, 0:4:2] -= crop_x1 68 | igs[:, 1:4:2] -= crop_y1 69 | igs[:, 0:4:2] = np.clip(igs[:, 0:4:2], 0, crop_w) 70 | igs[:, 1:4:2] = np.clip(igs[:, 1:4:2], 0, crop_h) 71 | keep_inds = ((igs[:, 2] - igs[:, 0]) >= 8) & \ 72 | ((igs[:, 3] - igs[:, 1]) >= 8) 73 | igs = igs[keep_inds] 74 | if len(gts) > 0: 75 | ori_gts = np.copy(gts) 76 | gts[:, 0:4:2] -= crop_x1 77 | gts[:, 1:4:2] -= crop_y1 78 | gts[:, 0:4:2] = np.clip(gts[:, 0:4:2], 0, crop_w) 79 | gts[:, 1:4:2] = np.clip(gts[:, 1:4:2], 0, crop_h) 80 | 81 | before_area = (ori_gts[:, 2] - ori_gts[:, 0]) * (ori_gts[:, 3] - ori_gts[:, 1]) 82 | after_area = (gts[:, 2] - gts[:, 0]) * (gts[:, 3] - gts[:, 1]) 83 | 84 | keep_inds = ((gts[:, 2] - gts[:, 0]) >= limit) & \ 85 | (after_area >= 0.5 * before_area) 86 | gts = gts[keep_inds] 87 | 88 | return cropped_image, gts, igs 89 | 90 | 91 | def random_pave(image, gts, igs, pave_size, limit=8): 92 | img_height, img_width = image.shape[0:2] 93 | pave_h, pave_w = pave_size 94 | # paved_image = np.zeros((pave_h, pave_w, 3), dtype=image.dtype) 95 | paved_image = np.ones((pave_h, pave_w, 3), dtype=image.dtype) * np.mean(image, dtype=int) 96 | pave_x = int(np.random.randint(0, pave_w - img_width + 1)) 97 | pave_y = int(np.random.randint(0, pave_h - img_height + 1)) 98 | paved_image[pave_y:pave_y + img_height, pave_x:pave_x + img_width] = image 99 | # pave detections 100 | if len(igs) > 0: 101 | igs[:, 0:4:2] += pave_x 102 | igs[:, 1:4:2] += pave_y 103 | keep_inds = ((igs[:, 2] - igs[:, 0]) >= 8) & \ 104 | ((igs[:, 3] - igs[:, 1]) >= 8) 105 | igs = igs[keep_inds] 106 | 107 | if len(gts) > 0: 108 | gts[:, 0:4:2] += pave_x 109 | gts[:, 1:4:2] += pave_y 110 | keep_inds = ((gts[:, 2] - gts[:, 0]) >= limit) 111 | gts = gts[keep_inds] 112 | 113 | return paved_image, gts, igs 114 | 115 | 116 | def augment(img_data, c, img): 117 | assert 'filepath' in img_data 118 | assert 'bboxes' in img_data 119 | img_data_aug = copy.deepcopy(img_data) 120 | if img is None: 121 | img = cv2.imread(img_data_aug['filepath']) 122 | img_height, img_width = img.shape[:2] 123 | 124 | # random brightness 125 | if c.brightness and np.random.randint(0, 2) == 0: 126 | img = _brightness(img, min=c.brightness[0], max=c.brightness[1]) 127 | # random horizontal flip 128 | if c.use_horizontal_flips and np.random.randint(0, 2) == 0: 129 | img = cv2.flip(img, 1) 130 | if len(img_data_aug['bboxes']) > 0: 131 | img_data_aug['bboxes'][:, [0, 2]] = img_width - img_data_aug['bboxes'][:, [2, 0]] 132 | if len(img_data_aug['ignoreareas']) > 0: 133 | img_data_aug['ignoreareas'][:, [0, 2]] = img_width - img_data_aug['ignoreareas'][:, [2, 0]] 134 | 135 | gts = np.copy(img_data_aug['bboxes']) 136 | igs = np.copy(img_data_aug['ignoreareas']) 137 | 138 | img, gts, igs = resize_image(img, gts, igs, scale=(0.4, 1.5)) 139 | if img.shape[0] >= c.size_train[0]: 140 | img, gts, igs = random_crop(img, gts, igs, c.size_train, limit=16) 141 | else: 142 | img, gts, igs = random_pave(img, gts, igs, c.size_train, limit=16) 143 | 144 | img_data_aug['bboxes'] = gts 145 | img_data_aug['ignoreareas'] = igs 146 | 147 | img_data_aug['width'] = c.size_train[1] 148 | img_data_aug['height'] = c.size_train[0] 149 | 150 | return img_data_aug, img -------------------------------------------------------------------------------- /dataloader/load_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import numpy as np 4 | from scipy import io as scio 5 | 6 | 7 | def get_citypersons(root_dir='data/cityperson', type='train'): 8 | all_img_path = os.path.join(root_dir, 'images') 9 | all_anno_path = os.path.join(root_dir, 'annotations') 10 | rows, cols = 1024, 2048 11 | 12 | anno_path = os.path.join(all_anno_path, 'anno_' + type + '.mat') 13 | res_path = os.path.join('data/cache/cityperson', type) 14 | image_data = [] 15 | annos = scio.loadmat(anno_path) 16 | index = 'anno_' + type + '_aligned' 17 | valid_count = 0 18 | iggt_count = 0 19 | box_count = 0 20 | 21 | for l in range(len(annos[index][0])): 22 | anno = annos[index][0][l] 23 | cityname = anno[0][0][0][0].encode() 24 | imgname = anno[0][0][1][0].encode() 25 | gts = anno[0][0][2] 26 | img_path = os.path.join(all_img_path, type + '/' + cityname + '/' + imgname) 27 | boxes = [] 28 | ig_boxes = [] 29 | vis_boxes = [] 30 | for i in range(len(gts)): 31 | label, x1, y1, w, h = gts[i, :5] 32 | x1, y1 = max(int(x1), 0), max(int(y1), 0) 33 | w, h = min(int(w), cols - x1 - 1), min(int(h), rows - y1 - 1) 34 | xv1, yv1, wv, hv = gts[i, 6:] 35 | xv1, yv1 = max(int(xv1), 0), max(int(yv1), 0) 36 | wv, hv = min(int(wv), cols - xv1 - 1), min(int(hv), rows - yv1 - 1) 37 | 38 | if label == 1 and h >= 50: 39 | box = np.array([int(x1), int(y1), int(x1) + int(w), int(y1) + int(h)]) 40 | boxes.append(box) 41 | vis_box = np.array([int(xv1), int(yv1), int(xv1) + int(wv), int(yv1) + int(hv)]) 42 | vis_boxes.append(vis_box) 43 | else: 44 | ig_box = np.array([int(x1), int(y1), int(x1) + int(w), int(y1) + int(h)]) 45 | ig_boxes.append(ig_box) 46 | boxes = np.array(boxes) 47 | vis_boxes = np.array(vis_boxes) 48 | ig_boxes = np.array(ig_boxes) 49 | 50 | if len(boxes) > 0: 51 | valid_count += 1 52 | annotation = {} 53 | annotation['filepath'] = img_path 54 | box_count += len(boxes) 55 | iggt_count += len(ig_boxes) 56 | annotation['bboxes'] = boxes 57 | annotation['vis_bboxes'] = vis_boxes 58 | annotation['ignoreareas'] = ig_boxes 59 | image_data.append(annotation) 60 | 61 | return image_data -------------------------------------------------------------------------------- /dataloader/loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import sys 3 | import random 4 | import cv2 5 | import torch 6 | import numpy as np 7 | from PIL import Image 8 | from torch.utils.data import Dataset 9 | 10 | import data_augment 11 | from load_data import get_citypersons 12 | 13 | 14 | class CityPersons(Dataset): 15 | def __init__(self, path, type, config, preloaded=False, transform=None, caffemodel=False): 16 | 17 | self.dataset = get_citypersons(root_dir=path, type=type) 18 | self.dataset_len = len(self.dataset) 19 | self.type = type 20 | 21 | if self.type == 'train' and config.train_random: 22 | random.shuffle(self.dataset) 23 | self.config = config 24 | self.transform = transform 25 | self.caffemodel = caffemodel 26 | 27 | if self.type == 'train': 28 | self.preprocess = RandomResizeFix(size=config.size_train, scale=(0.4, 1.5)) 29 | else: 30 | self.preprocess = None 31 | 32 | self.preloaded = preloaded 33 | 34 | if self.preloaded: 35 | self.img_cache = [] 36 | for i, data in enumerate(self.dataset): 37 | if self.caffemodel: 38 | self.img_cache.append(cv2.imread(data['filepath'])) 39 | else: 40 | self.img_cache.append(Image.open(data['filepath'])) 41 | print('%d/%d\r' % (i+1, self.dataset_len)), 42 | sys.stdout.flush() 43 | print('') 44 | 45 | def __getitem__(self, item): 46 | 47 | if self.caffemodel: 48 | # input is BGR order, not normalized 49 | img_data = self.dataset[item] 50 | if self.preloaded: 51 | img = self.img_cache[item] 52 | else: 53 | img = cv2.imread(img_data['filepath']) 54 | 55 | if self.type == 'train': 56 | img_data, x_img = data_augment.augment(self.dataset[item], self.config, img) 57 | 58 | gts = img_data['bboxes'].copy() 59 | igs = img_data['ignoreareas'].copy() 60 | 61 | y_center, y_height, y_offset = self.calc_gt_center(gts, igs, radius=2, stride=self.config.down) 62 | 63 | x_img = x_img.astype(np.float32) 64 | x_img -= [103.939, 116.779, 123.68] 65 | x_img = torch.from_numpy(x_img).permute([2, 0, 1]) 66 | 67 | return x_img, [y_center, y_height, y_offset] 68 | 69 | else: 70 | x_img = img.astype(np.float32) 71 | x_img -= [103.939, 116.779, 123.68] 72 | x_img = torch.from_numpy(x_img).permute([2, 0, 1]) 73 | 74 | return x_img 75 | 76 | else: 77 | # input is RGB order, and normalized 78 | img_data = self.dataset[item] 79 | if self.preloaded: 80 | img = self.img_cache[item] 81 | else: 82 | img = Image.open(img_data['filepath']) 83 | 84 | if self.type == 'train': 85 | gts = img_data['bboxes'].copy() 86 | igs = img_data['ignoreareas'].copy() 87 | 88 | x_img, gts, igs = self.preprocess(img, gts, igs) 89 | 90 | y_center, y_height, y_offset = self.calc_gt_center(gts, igs, radius=2, stride=self.config.down) 91 | 92 | if self.transform is not None: 93 | x_img = self.transform(x_img) 94 | 95 | return x_img, [y_center, y_height, y_offset] 96 | 97 | else: 98 | if self.transform is not None: 99 | x_img = self.transform(img) 100 | else: 101 | x_img = img 102 | 103 | return x_img 104 | 105 | def __len__(self): 106 | return self.dataset_len 107 | 108 | def calc_gt_center(self, gts, igs, radius=2, stride=4): 109 | 110 | def gaussian(kernel): 111 | sigma = ((kernel-1) * 0.5 - 1) * 0.3 + 0.8 112 | s = 2*(sigma**2) 113 | dx = np.exp(-np.square(np.arange(kernel) - int(kernel / 2)) / s) 114 | return np.reshape(dx, (-1, 1)) 115 | 116 | scale_map = np.zeros((2, int(self.config.size_train[0] / stride), int(self.config.size_train[1] / stride))) 117 | offset_map = np.zeros((3, int(self.config.size_train[0] / stride), int(self.config.size_train[1] / stride))) 118 | pos_map = np.zeros((3, int(self.config.size_train[0] / stride), int(self.config.size_train[1] / stride))) 119 | pos_map[1, :, :, ] = 1 # channel 1: 1-value mask, ignore area will be set to 0 120 | 121 | if len(igs) > 0: 122 | igs = igs / stride 123 | for ind in range(len(igs)): 124 | x1, y1, x2, y2 = int(igs[ind, 0]), int(igs[ind, 1]), int(np.ceil(igs[ind, 2])), int(np.ceil(igs[ind, 3])) 125 | pos_map[1, y1:y2, x1:x2] = 0 126 | 127 | if len(gts) > 0: 128 | gts = gts / stride 129 | for ind in range(len(gts)): 130 | x1, y1, x2, y2 = int(np.ceil(gts[ind, 0])), int(np.ceil(gts[ind, 1])), int(gts[ind, 2]), int(gts[ind, 3]) 131 | c_x, c_y = int((gts[ind, 0] + gts[ind, 2]) / 2), int((gts[ind, 1] + gts[ind, 3]) / 2) 132 | 133 | dx = gaussian(x2-x1) 134 | dy = gaussian(y2-y1) 135 | gau_map = np.multiply(dy, np.transpose(dx)) 136 | 137 | pos_map[0, y1:y2, x1:x2] = np.maximum(pos_map[0, y1:y2, x1:x2], gau_map) # gauss map 138 | pos_map[1, y1:y2, x1:x2] = 1 # 1-mask map 139 | pos_map[2, c_y, c_x] = 1 # center map 140 | 141 | scale_map[0, c_y-radius:c_y+radius+1, c_x-radius:c_x+radius+1] = np.log(gts[ind, 3] - gts[ind, 1]) # log value of height 142 | scale_map[1, c_y-radius:c_y+radius+1, c_x-radius:c_x+radius+1] = 1 # 1-mask 143 | 144 | offset_map[0, c_y, c_x] = (gts[ind, 1] + gts[ind, 3]) / 2 - c_y - 0.5 # height-Y offset 145 | offset_map[1, c_y, c_x] = (gts[ind, 0] + gts[ind, 2]) / 2 - c_x - 0.5 # width-X offset 146 | offset_map[2, c_y, c_x] = 1 # 1-mask 147 | 148 | return pos_map, scale_map, offset_map 149 | 150 | 151 | class RandomResizeFix(object): 152 | """ 153 | Args: 154 | size: expected output size of each edge 155 | scale: scale factor 156 | interpolation: Default: PIL.Image.BILINEAR 157 | """ 158 | def __init__(self, size, scale=(0.4, 1.5), interpolation=Image.BILINEAR): 159 | self.size = size 160 | self.interpolation = interpolation 161 | self.scale = scale 162 | 163 | def __call__(self, img, gts, igs): 164 | # resize image 165 | w, h = img.size 166 | ratio = np.random.uniform(self.scale[0], self.scale[1]) 167 | n_w, n_h = int(ratio * w), int(ratio * h) 168 | img = img.resize((n_w, n_h), self.interpolation) 169 | gts = gts.copy() 170 | igs = igs.copy() 171 | 172 | # resize label 173 | if len(gts) > 0: 174 | gts = np.asarray(gts, dtype=float) 175 | gts *= ratio 176 | 177 | if len(igs) > 0: 178 | igs = np.asarray(igs, dtype=float) 179 | igs *= ratio 180 | 181 | # random flip 182 | w, h = img.size 183 | if np.random.randint(0, 2) == 0: 184 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 185 | if len(gts) > 0: 186 | gts[:, [0, 2]] = w - gts[:, [2, 0]] 187 | if len(igs) > 0: 188 | igs[:, [0, 2]] = w - igs[:, [2, 0]] 189 | 190 | if h >= self.size[0]: 191 | # random crop 192 | img, gts, igs = self.random_crop(img, gts, igs, self.size, limit=16) 193 | else: 194 | # random pad 195 | img, gts, igs = self.random_pave(img, gts, igs, self.size, limit=16) 196 | 197 | return img, gts, igs 198 | 199 | @staticmethod 200 | def random_crop(img, gts, igs, size, limit=8): 201 | w, h = img.size 202 | crop_h, crop_w = size 203 | 204 | if len(gts) > 0: 205 | sel_id = np.random.randint(0, len(gts)) 206 | sel_center_x = int((gts[sel_id, 0] + gts[sel_id, 2]) / 2.0) 207 | sel_center_y = int((gts[sel_id, 1] + gts[sel_id, 3]) / 2.0) 208 | else: 209 | sel_center_x = int(np.random.randint(0, w - crop_w + 1) + crop_w * 0.5) 210 | sel_center_y = int(np.random.randint(0, h - crop_h + 1) + crop_h * 0.5) 211 | 212 | crop_x1 = max(sel_center_x - int(crop_w * 0.5), int(0)) 213 | crop_y1 = max(sel_center_y - int(crop_h * 0.5), int(0)) 214 | diff_x = max(crop_x1 + crop_w - w, int(0)) 215 | crop_x1 -= diff_x 216 | diff_y = max(crop_y1 + crop_h - h, int(0)) 217 | crop_y1 -= diff_y 218 | cropped_img = img.crop((crop_x1, crop_y1, crop_x1 + crop_w, crop_y1 + crop_h)) 219 | 220 | # crop detections 221 | if len(igs) > 0: 222 | igs[:, 0:4:2] -= crop_x1 223 | igs[:, 1:4:2] -= crop_y1 224 | igs[:, 0:4:2] = np.clip(igs[:, 0:4:2], 0, crop_w) 225 | igs[:, 1:4:2] = np.clip(igs[:, 1:4:2], 0, crop_h) 226 | keep_inds = ((igs[:, 2] - igs[:, 0]) >= 8) & ((igs[:, 3] - igs[:, 1]) >= 8) 227 | igs = igs[keep_inds] 228 | 229 | if len(gts) > 0: 230 | before_area = (gts[:, 2] - gts[:, 0]) * (gts[:, 3] - gts[:, 1]) 231 | gts[:, 0:4:2] -= crop_x1 232 | gts[:, 1:4:2] -= crop_y1 233 | gts[:, 0:4:2] = np.clip(gts[:, 0:4:2], 0, crop_w) 234 | gts[:, 1:4:2] = np.clip(gts[:, 1:4:2], 0, crop_h) 235 | 236 | after_area = (gts[:, 2] - gts[:, 0]) * (gts[:, 3] - gts[:, 1]) 237 | 238 | keep_inds = ((gts[:, 2] - gts[:, 0]) >= limit) & (after_area >= 0.5 * before_area) 239 | gts = gts[keep_inds] 240 | 241 | return cropped_img, gts, igs 242 | 243 | @staticmethod 244 | def random_pave(img, gts, igs, size, limit=8): 245 | img = np.asarray(img) 246 | h, w = img.shape[0:2] 247 | pave_h, pave_w = size 248 | # paved_image = np.zeros((pave_h, pave_w, 3), dtype=image.dtype) 249 | paved_image = np.ones((pave_h, pave_w, 3), dtype=img.dtype) * np.mean(img, dtype=int) 250 | pave_x = int(np.random.randint(0, pave_w - w + 1)) 251 | pave_y = int(np.random.randint(0, pave_h - h + 1)) 252 | paved_image[pave_y:pave_y + h, pave_x:pave_x + w] = img 253 | # pave detections 254 | if len(igs) > 0: 255 | igs[:, 0:4:2] += pave_x 256 | igs[:, 1:4:2] += pave_y 257 | keep_inds = ((igs[:, 2] - igs[:, 0]) >= 8) & ((igs[:, 3] - igs[:, 1]) >= 8) 258 | igs = igs[keep_inds] 259 | 260 | if len(gts) > 0: 261 | gts[:, 0:4:2] += pave_x 262 | gts[:, 1:4:2] += pave_y 263 | keep_inds = ((gts[:, 2] - gts[:, 0]) >= limit) 264 | gts = gts[keep_inds] 265 | 266 | return Image.fromarray(paved_image), gts, igs 267 | 268 | 269 | -------------------------------------------------------------------------------- /eval_city/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/eval_city/__init__.py -------------------------------------------------------------------------------- /eval_city/cocoapi/.gitignore: -------------------------------------------------------------------------------- 1 | images/ 2 | annotations/ 3 | results/ 4 | external/ 5 | .DS_Store 6 | 7 | MatlabAPI/analyze*/ 8 | MatlabAPI/visualize*/ 9 | 10 | PythonAPI/pycocotools/__init__.pyc 11 | PythonAPI/pycocotools/_mask.c 12 | PythonAPI/pycocotools/_mask.so 13 | PythonAPI/pycocotools/coco.pyc 14 | PythonAPI/pycocotools/cocoeval.pyc 15 | PythonAPI/pycocotools/mask.pyc 16 | -------------------------------------------------------------------------------- /eval_city/cocoapi/LuaAPI/CocoApi.lua: -------------------------------------------------------------------------------- 1 | --[[---------------------------------------------------------------------------- 2 | 3 | Interface for accessing the Common Objects in COntext (COCO) dataset. 4 | 5 | For an overview of the API please see http://mscoco.org/dataset/#download. 6 | CocoApi.lua (this file) is modeled after the Matlab CocoApi.m: 7 | https://github.com/pdollar/coco/blob/master/MatlabAPI/CocoApi.m 8 | 9 | The following API functions are defined in the Lua API: 10 | CocoApi - Load COCO annotation file and prepare data structures. 11 | getAnnIds - Get ann ids that satisfy given filter conditions. 12 | getCatIds - Get cat ids that satisfy given filter conditions. 13 | getImgIds - Get img ids that satisfy given filter conditions. 14 | loadAnns - Load anns with the specified ids. 15 | loadCats - Load cats with the specified ids. 16 | loadImgs - Load imgs with the specified ids. 17 | showAnns - Display the specified annotations. 18 | Throughout the API "ann"=annotation, "cat"=category, and "img"=image. 19 | For detailed usage information please see cocoDemo.lua. 20 | 21 | LIMITATIONS: the following API functions are NOT defined in the Lua API: 22 | loadRes - Load algorithm results and create API for accessing them. 23 | download - Download COCO images from mscoco.org server. 24 | In addition, currently the getCatIds() and getImgIds() do not accept filters. 25 | getAnnIds() can be called using getAnnIds({imgId=id}) and getAnnIds({catId=id}). 26 | 27 | Note: loading COCO JSON annotations to Lua tables is quite slow. Hence, a call 28 | to CocApi(annFile) converts the annotations to a custom 'flattened' format that 29 | is more efficient. The first time a COCO JSON is loaded, the conversion is 30 | invoked (this may take up to a minute). The converted data is then stored in a 31 | t7 file (the code must have write permission to the dir of the JSON file). 32 | Future calls of cocoApi=CocApi(annFile) take a fraction of a second. To view the 33 | created data just inspect cocoApi.data of a created instance of the CocoApi. 34 | 35 | Common Objects in COntext (COCO) Toolbox. version 3.0 36 | Data, paper, and tutorials available at: http://mscoco.org/ 37 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016. 38 | Licensed under the Simplified BSD License [see coco/license.txt] 39 | 40 | ------------------------------------------------------------------------------]] 41 | 42 | local json = require 'cjson' 43 | local coco = require 'coco.env' 44 | 45 | local TensorTable = torch.class('TensorTable',coco) 46 | local CocoSeg = torch.class('CocoSeg',coco) 47 | local CocoApi = torch.class('CocoApi',coco) 48 | 49 | -------------------------------------------------------------------------------- 50 | 51 | --[[ TensorTable is a lightweight data structure for storing variable size 1D 52 | tensors. Tables of tensors are slow to save/load to disk. Instead, TensorTable 53 | stores all the data in a single long tensor (along with indices into the tensor) 54 | making serialization fast. A TensorTable may only contain 1D same-type torch 55 | tensors or strings. It supports only creation from a table and indexing. ]] 56 | 57 | function TensorTable:__init( T ) 58 | local n = #T; assert(n>0) 59 | local isStr = torch.type(T[1])=='string' 60 | assert(isStr or torch.isTensor(T[1])) 61 | local c=function(s) return torch.CharTensor(torch.CharStorage():string(s)) end 62 | if isStr then local S=T; T={}; for i=1,n do T[i]=c(S[i]) end end 63 | local ms, idx = torch.LongTensor(n), torch.LongTensor(n+1) 64 | for i=1,n do ms[i]=T[i]:numel() end 65 | idx[1]=1; idx:narrow(1,2,n):copy(ms); idx=idx:cumsum() 66 | local type = string.sub(torch.type(T[1]),7,-1) 67 | local data = torch[type](idx[n+1]-1) 68 | if isStr then type='string' end 69 | for i=1,n do if ms[i]>0 then data:sub(idx[i],idx[i+1]-1):copy(T[i]) end end 70 | if ms:eq(ms[1]):all() and ms[1]>0 then data=data:view(n,ms[1]); idx=nil end 71 | self.data, self.idx, self.type = data, idx, type 72 | end 73 | 74 | function TensorTable:__index__( i ) 75 | if torch.type(i)~='number' then return false end 76 | local d, idx, type = self.data, self.idx, self.type 77 | if idx and idx[i]==idx[i+1] then 78 | if type=='string' then d='' else d=torch[type]() end 79 | else 80 | if idx then d=d:sub(idx[i],idx[i+1]-1) else d=d[i] end 81 | if type=='string' then d=d:clone():storage():string() end 82 | end 83 | return d, true 84 | end 85 | 86 | -------------------------------------------------------------------------------- 87 | 88 | --[[ CocoSeg is an efficient data structure for storing COCO segmentations. ]] 89 | 90 | function CocoSeg:__init( segs ) 91 | local polys, pIdx, sizes, rles, p, isStr = {}, {}, {}, {}, 0, 0 92 | for i,seg in pairs(segs) do if seg.size then isStr=seg.counts break end end 93 | isStr = torch.type(isStr)=='string' 94 | for i,seg in pairs(segs) do 95 | pIdx[i], sizes[i] = {}, {} 96 | if seg.size then 97 | sizes[i],rles[i] = seg.size,seg.counts 98 | else 99 | if isStr then rles[i]='' else rles[i]={} end 100 | for j=1,#seg do p=p+1; pIdx[i][j],polys[p] = p,seg[j] end 101 | end 102 | pIdx[i],sizes[i] = torch.LongTensor(pIdx[i]),torch.IntTensor(sizes[i]) 103 | if not isStr then rles[i]=torch.IntTensor(rles[i]) end 104 | end 105 | for i=1,p do polys[i]=torch.DoubleTensor(polys[i]) end 106 | self.polys, self.pIdx = coco.TensorTable(polys), coco.TensorTable(pIdx) 107 | self.sizes, self.rles = coco.TensorTable(sizes), coco.TensorTable(rles) 108 | end 109 | 110 | function CocoSeg:__index__( i ) 111 | if torch.type(i)~='number' then return false end 112 | if self.sizes[i]:numel()>0 then 113 | return {size=self.sizes[i],counts=self.rles[i]}, true 114 | else 115 | local ids, polys = self.pIdx[i], {} 116 | for i=1,ids:numel() do polys[i]=self.polys[ids[i]] end 117 | return polys, true 118 | end 119 | end 120 | 121 | -------------------------------------------------------------------------------- 122 | 123 | --[[ CocoApi is the API to the COCO dataset, see main comment for details. ]] 124 | 125 | function CocoApi:__init( annFile ) 126 | assert( string.sub(annFile,-4,-1)=='json' and paths.filep(annFile) ) 127 | local torchFile = string.sub(annFile,1,-6) .. '.t7' 128 | if not paths.filep(torchFile) then self:__convert(annFile,torchFile) end 129 | local data = torch.load(torchFile) 130 | self.data, self.inds = data, {} 131 | for k,v in pairs({images='img',categories='cat',annotations='ann'}) do 132 | local M = {}; self.inds[v..'IdsMap']=M 133 | if data[k] then for i=1,data[k].id:size(1) do M[data[k].id[i]]=i end end 134 | end 135 | end 136 | 137 | function CocoApi:__convert( annFile, torchFile ) 138 | print('convert: '..annFile..' --> .t7 [please be patient]') 139 | local tic = torch.tic() 140 | -- load data and decode json 141 | local data = torch.CharStorage(annFile):string() 142 | data = json.decode(data); collectgarbage() 143 | -- transpose and flatten each field in the coco data struct 144 | local convert = {images=true, categories=true, annotations=true} 145 | for field, d in pairs(data) do if convert[field] then 146 | print('converting: '..field) 147 | local n, out = #d, {} 148 | if n==0 then d,n={d},1 end 149 | for k,v in pairs(d[1]) do 150 | local t, isReg = torch.type(v), true 151 | for i=1,n do isReg=isReg and torch.type(d[i][k])==t end 152 | if t=='number' and isReg then 153 | out[k] = torch.DoubleTensor(n) 154 | for i=1,n do out[k][i]=d[i][k] end 155 | elseif t=='string' and isReg then 156 | out[k]={}; for i=1,n do out[k][i]=d[i][k] end 157 | out[k] = coco.TensorTable(out[k]) 158 | elseif t=='table' and isReg and torch.type(v[1])=='number' then 159 | out[k]={}; for i=1,n do out[k][i]=torch.DoubleTensor(d[i][k]) end 160 | out[k] = coco.TensorTable(out[k]) 161 | if not out[k].idx then out[k]=out[k].data end 162 | else 163 | out[k]={}; for i=1,n do out[k][i]=d[i][k] end 164 | if k=='segmentation' then out[k] = coco.CocoSeg(out[k]) end 165 | end 166 | collectgarbage() 167 | end 168 | if out.id then out.idx=torch.range(1,out.id:size(1)) end 169 | data[field] = out 170 | collectgarbage() 171 | end end 172 | -- create mapping from cat/img index to anns indices for that cat/img 173 | print('convert: building indices') 174 | local makeMap = function( type, type_id ) 175 | if not data[type] or not data.annotations then return nil end 176 | local invmap, n = {}, data[type].id:size(1) 177 | for i=1,n do invmap[data[type].id[i]]=i end 178 | local map = {}; for i=1,n do map[i]={} end 179 | data.annotations[type_id..'x'] = data.annotations[type_id]:clone() 180 | for i=1,data.annotations.id:size(1) do 181 | local id = invmap[data.annotations[type_id][i]] 182 | data.annotations[type_id..'x'][i] = id 183 | table.insert(map[id],data.annotations.id[i]) 184 | end 185 | for i=1,n do map[i]=torch.LongTensor(map[i]) end 186 | return coco.TensorTable(map) 187 | end 188 | data.annIdsPerImg = makeMap('images','image_id') 189 | data.annIdsPerCat = makeMap('categories','category_id') 190 | -- save to disk 191 | torch.save( torchFile, data ) 192 | print(('convert: complete [%.2f s]'):format(torch.toc(tic))) 193 | end 194 | 195 | function CocoApi:getAnnIds( filters ) 196 | if not filters then filters = {} end 197 | if filters.imgId then 198 | return self.data.annIdsPerImg[self.inds.imgIdsMap[filters.imgId]] or {} 199 | elseif filters.catId then 200 | return self.data.annIdsPerCat[self.inds.catIdsMap[filters.catId]] or {} 201 | else 202 | return self.data.annotations.id 203 | end 204 | end 205 | 206 | function CocoApi:getCatIds() 207 | return self.data.categories.id 208 | end 209 | 210 | function CocoApi:getImgIds() 211 | return self.data.images.id 212 | end 213 | 214 | function CocoApi:loadAnns( ids ) 215 | return self:__load(self.data.annotations,self.inds.annIdsMap,ids) 216 | end 217 | 218 | function CocoApi:loadCats( ids ) 219 | return self:__load(self.data.categories,self.inds.catIdsMap,ids) 220 | end 221 | 222 | function CocoApi:loadImgs( ids ) 223 | return self:__load(self.data.images,self.inds.imgIdsMap,ids) 224 | end 225 | 226 | function CocoApi:showAnns( img, anns ) 227 | local n, h, w = #anns, img:size(2), img:size(3) 228 | local MaskApi, clrs = coco.MaskApi, torch.rand(n,3)*.6+.4 229 | local O = img:clone():contiguous():float() 230 | if n==0 then anns,n={anns},1 end 231 | if anns[1].keypoints then for i=1,n do if anns[i].iscrowd==0 then 232 | local sk, kp, j, k = self:loadCats(anns[i].category_id)[1].skeleton 233 | kp=anns[i].keypoints; k=kp:size(1); j=torch.range(1,k,3):long(); k=k/3; 234 | local x,y,v = kp:index(1,j), kp:index(1,j+1), kp:index(1,j+2) 235 | for _,s in pairs(sk) do if v[s[1]]>0 and v[s[2]]>0 then 236 | MaskApi.drawLine(O,x[s[1]],y[s[1]],x[s[2]],y[s[2]],.75,clrs[i]) 237 | end end 238 | for j=1,k do if v[j]==1 then MaskApi.drawCirc(O,x[j],y[j],4,{0,0,0}) end end 239 | for j=1,k do if v[j]>0 then MaskApi.drawCirc(O,x[j],y[j],3,clrs[i]) end end 240 | end end end 241 | if anns[1].segmentation or anns[1].bbox then 242 | local Rs, alpha = {}, anns[1].keypoints and .25 or .4 243 | for i=1,n do 244 | Rs[i]=anns[i].segmentation 245 | if Rs[i] and #Rs[i]>0 then Rs[i]=MaskApi.frPoly(Rs[i],h,w) end 246 | if not Rs[i] then Rs[i]=MaskApi.frBbox(anns[i].bbox,h,w)[1] end 247 | end 248 | MaskApi.drawMasks(O,MaskApi.decode(Rs),nil,alpha,clrs) 249 | end 250 | return O 251 | end 252 | 253 | function CocoApi:__load( data, map, ids ) 254 | if not torch.isTensor(ids) then ids=torch.LongTensor({ids}) end 255 | local out, idx = {}, nil 256 | for i=1,ids:numel() do 257 | out[i], idx = {}, map[ids[i]] 258 | for k,v in pairs(data) do out[i][k]=v[idx] end 259 | end 260 | return out 261 | end 262 | -------------------------------------------------------------------------------- /eval_city/cocoapi/LuaAPI/MaskApi.lua: -------------------------------------------------------------------------------- 1 | --[[---------------------------------------------------------------------------- 2 | 3 | Interface for manipulating masks stored in RLE format. 4 | 5 | For an overview of RLE please see http://mscoco.org/dataset/#download. 6 | Additionally, more detailed information can be found in the Matlab MaskApi.m: 7 | https://github.com/pdollar/coco/blob/master/MatlabAPI/MaskApi.m 8 | 9 | The following API functions are defined: 10 | encode - Encode binary masks using RLE. 11 | decode - Decode binary masks encoded via RLE. 12 | merge - Compute union or intersection of encoded masks. 13 | iou - Compute intersection over union between masks. 14 | nms - Compute non-maximum suppression between ordered masks. 15 | area - Compute area of encoded masks. 16 | toBbox - Get bounding boxes surrounding encoded masks. 17 | frBbox - Convert bounding boxes to encoded masks. 18 | frPoly - Convert polygon to encoded mask. 19 | drawCirc - Draw circle into image (alters input). 20 | drawLine - Draw line into image (alters input). 21 | drawMasks - Draw masks into image (alters input). 22 | 23 | Usage: 24 | Rs = MaskApi.encode( masks ) 25 | masks = MaskApi.decode( Rs ) 26 | R = MaskApi.merge( Rs, [intersect=false] ) 27 | o = MaskApi.iou( dt, gt, [iscrowd=false] ) 28 | keep = MaskApi.nms( dt, thr ) 29 | a = MaskApi.area( Rs ) 30 | bbs = MaskApi.toBbox( Rs ) 31 | Rs = MaskApi.frBbox( bbs, h, w ) 32 | R = MaskApi.frPoly( poly, h, w ) 33 | MaskApi.drawCirc( img, x, y, rad, clr ) 34 | MaskApi.drawLine( img, x0, y0, x1, y1, rad, clr ) 35 | MaskApi.drawMasks( img, masks, [maxn=n], [alpha=.4], [clrs] ) 36 | For detailed usage information please see cocoDemo.lua. 37 | 38 | In the API the following formats are used: 39 | R,Rs - [table] Run-length encoding of binary mask(s) 40 | masks - [nxhxw] Binary mask(s) 41 | bbs - [nx4] Bounding box(es) stored as [x y w h] 42 | poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 43 | dt,gt - May be either bounding boxes or encoded masks 44 | Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 45 | 46 | Common Objects in COntext (COCO) Toolbox. version 3.0 47 | Data, paper, and tutorials available at: http://mscoco.org/ 48 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016. 49 | Licensed under the Simplified BSD License [see coco/license.txt] 50 | 51 | ------------------------------------------------------------------------------]] 52 | 53 | local ffi = require 'ffi' 54 | local coco = require 'coco.env' 55 | 56 | coco.MaskApi = {} 57 | local MaskApi = coco.MaskApi 58 | 59 | coco.libmaskapi = ffi.load(package.searchpath('libmaskapi',package.cpath)) 60 | local libmaskapi = coco.libmaskapi 61 | 62 | -------------------------------------------------------------------------------- 63 | 64 | MaskApi.encode = function( masks ) 65 | local n, h, w = masks:size(1), masks:size(2), masks:size(3) 66 | masks = masks:type('torch.ByteTensor'):transpose(2,3) 67 | local data = masks:contiguous():data() 68 | local Qs = MaskApi._rlesInit(n) 69 | libmaskapi.rleEncode(Qs[0],data,h,w,n) 70 | return MaskApi._rlesToLua(Qs,n) 71 | end 72 | 73 | MaskApi.decode = function( Rs ) 74 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 75 | local masks = torch.ByteTensor(n,w,h):zero():contiguous() 76 | libmaskapi.rleDecode(Qs,masks:data(),n) 77 | MaskApi._rlesFree(Qs,n) 78 | return masks:transpose(2,3) 79 | end 80 | 81 | MaskApi.merge = function( Rs, intersect ) 82 | intersect = intersect or 0 83 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 84 | local Q = MaskApi._rlesInit(1) 85 | libmaskapi.rleMerge(Qs,Q,n,intersect) 86 | MaskApi._rlesFree(Qs,n) 87 | return MaskApi._rlesToLua(Q,1)[1] 88 | end 89 | 90 | MaskApi.iou = function( dt, gt, iscrowd ) 91 | if not iscrowd then iscrowd = NULL else 92 | iscrowd = iscrowd:type('torch.ByteTensor'):contiguous():data() 93 | end 94 | if torch.isTensor(gt) and torch.isTensor(dt) then 95 | local nDt, k = dt:size(1), dt:size(2); assert(k==4) 96 | local nGt, k = gt:size(1), gt:size(2); assert(k==4) 97 | local dDt = dt:type('torch.DoubleTensor'):contiguous():data() 98 | local dGt = gt:type('torch.DoubleTensor'):contiguous():data() 99 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 100 | libmaskapi.bbIou(dDt,dGt,nDt,nGt,iscrowd,o:data()) 101 | return o:transpose(1,2) 102 | else 103 | local qDt, nDt = MaskApi._rlesFrLua(dt) 104 | local qGt, nGt = MaskApi._rlesFrLua(gt) 105 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 106 | libmaskapi.rleIou(qDt,qGt,nDt,nGt,iscrowd,o:data()) 107 | MaskApi._rlesFree(qDt,nDt); MaskApi._rlesFree(qGt,nGt) 108 | return o:transpose(1,2) 109 | end 110 | end 111 | 112 | MaskApi.nms = function( dt, thr ) 113 | if torch.isTensor(dt) then 114 | local n, k = dt:size(1), dt:size(2); assert(k==4) 115 | local Q = dt:type('torch.DoubleTensor'):contiguous():data() 116 | local kp = torch.IntTensor(n):contiguous() 117 | libmaskapi.bbNms(Q,n,kp:data(),thr) 118 | return kp 119 | else 120 | local Q, n = MaskApi._rlesFrLua(dt) 121 | local kp = torch.IntTensor(n):contiguous() 122 | libmaskapi.rleNms(Q,n,kp:data(),thr) 123 | MaskApi._rlesFree(Q,n) 124 | return kp 125 | end 126 | end 127 | 128 | MaskApi.area = function( Rs ) 129 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 130 | local a = torch.IntTensor(n):contiguous() 131 | libmaskapi.rleArea(Qs,n,a:data()) 132 | MaskApi._rlesFree(Qs,n) 133 | return a 134 | end 135 | 136 | MaskApi.toBbox = function( Rs ) 137 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 138 | local bb = torch.DoubleTensor(n,4):contiguous() 139 | libmaskapi.rleToBbox(Qs,bb:data(),n) 140 | MaskApi._rlesFree(Qs,n) 141 | return bb 142 | end 143 | 144 | MaskApi.frBbox = function( bbs, h, w ) 145 | if bbs:dim()==1 then bbs=bbs:view(1,bbs:size(1)) end 146 | local n, k = bbs:size(1), bbs:size(2); assert(k==4) 147 | local data = bbs:type('torch.DoubleTensor'):contiguous():data() 148 | local Qs = MaskApi._rlesInit(n) 149 | libmaskapi.rleFrBbox(Qs[0],data,h,w,n) 150 | return MaskApi._rlesToLua(Qs,n) 151 | end 152 | 153 | MaskApi.frPoly = function( poly, h, w ) 154 | local n = #poly 155 | local Qs, Q = MaskApi._rlesInit(n), MaskApi._rlesInit(1) 156 | for i,p in pairs(poly) do 157 | local xy = p:type('torch.DoubleTensor'):contiguous():data() 158 | libmaskapi.rleFrPoly(Qs[i-1],xy,p:size(1)/2,h,w) 159 | end 160 | libmaskapi.rleMerge(Qs,Q[0],n,0) 161 | MaskApi._rlesFree(Qs,n) 162 | return MaskApi._rlesToLua(Q,1)[1] 163 | end 164 | 165 | -------------------------------------------------------------------------------- 166 | 167 | MaskApi.drawCirc = function( img, x, y, rad, clr ) 168 | assert(img:isContiguous() and img:dim()==3) 169 | local k, h, w, data = img:size(1), img:size(2), img:size(3), img:data() 170 | for dx=-rad,rad do for dy=-rad,rad do 171 | local xi, yi = torch.round(x+dx), torch.round(y+dy) 172 | if dx*dx+dy*dy<=rad*rad and xi>=0 and yi>=0 and xi=0 and yi>=0 and xi= 5.1", 17 | "torch >= 7.0", 18 | "lua-cjson" 19 | } 20 | 21 | build = { 22 | type = "builtin", 23 | modules = { 24 | ["coco.env"] = "LuaAPI/env.lua", 25 | ["coco.init"] = "LuaAPI/init.lua", 26 | ["coco.MaskApi"] = "LuaAPI/MaskApi.lua", 27 | ["coco.CocoApi"] = "LuaAPI/CocoApi.lua", 28 | libmaskapi = { 29 | sources = { "common/maskApi.c" }, 30 | incdirs = { "common/" } 31 | } 32 | } 33 | } 34 | 35 | -- luarocks make LuaAPI/rocks/coco-scm-1.rockspec 36 | -- https://github.com/pdollar/coco/raw/master/LuaAPI/rocks/coco-scm-1.rockspec 37 | -------------------------------------------------------------------------------- /eval_city/cocoapi/MatlabAPI/CocoApi.m: -------------------------------------------------------------------------------- 1 | classdef CocoApi 2 | % Interface for accessing the Microsoft COCO dataset. 3 | % 4 | % Microsoft COCO is a large image dataset designed for object detection, 5 | % segmentation, and caption generation. CocoApi.m is a Matlab API that 6 | % assists in loading, parsing and visualizing the annotations in COCO. 7 | % Please visit http://mscoco.org/ for more information on COCO, including 8 | % for the data, paper, and tutorials. The exact format of the annotations 9 | % is also described on the COCO website. For example usage of the CocoApi 10 | % please see cocoDemo.m. In addition to this API, please download both 11 | % the COCO images and annotations in order to run the demo. 12 | % 13 | % An alternative to using the API is to load the annotations directly 14 | % into a Matlab struct. This can be achieved via: 15 | % data = gason(fileread(annFile)); 16 | % Using the API provides additional utility functions. Note that this API 17 | % supports both *instance* and *caption* annotations. In the case of 18 | % captions not all functions are defined (e.g. categories are undefined). 19 | % 20 | % The following API functions are defined: 21 | % CocoApi - Load COCO annotation file and prepare data structures. 22 | % getAnnIds - Get ann ids that satisfy given filter conditions. 23 | % getCatIds - Get cat ids that satisfy given filter conditions. 24 | % getImgIds - Get img ids that satisfy given filter conditions. 25 | % loadAnns - Load anns with the specified ids. 26 | % loadCats - Load cats with the specified ids. 27 | % loadImgs - Load imgs with the specified ids. 28 | % showAnns - Display the specified annotations. 29 | % loadRes - Load algorithm results and create API for accessing them. 30 | % Throughout the API "ann"=annotation, "cat"=category, and "img"=image. 31 | % Help on each functions can be accessed by: "help CocoApi>function". 32 | % 33 | % See also CocoApi>CocoApi, CocoApi>getAnnIds, CocoApi>getCatIds, 34 | % CocoApi>getImgIds, CocoApi>loadAnns, CocoApi>loadCats, 35 | % CocoApi>loadImgs, CocoApi>showAnns, CocoApi>loadRes 36 | % 37 | % Microsoft COCO Toolbox. version 2.0 38 | % Data, paper, and tutorials available at: http://mscoco.org/ 39 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 40 | % Licensed under the Simplified BSD License [see coco/license.txt] 41 | 42 | properties 43 | data % COCO annotation data structure 44 | inds % data structures for fast indexing 45 | end 46 | 47 | methods 48 | function coco = CocoApi( annFile ) 49 | % Load COCO annotation file and prepare data structures. 50 | % 51 | % USAGE 52 | % coco = CocoApi( annFile ) 53 | % 54 | % INPUTS 55 | % annFile - COCO annotation filename 56 | % 57 | % OUTPUTS 58 | % coco - initialized coco object 59 | fprintf('Loading and preparing annotations... '); clk=clock; 60 | if(isstruct(annFile)), coco.data=annFile; else 61 | coco.data=gason(fileread(annFile)); end 62 | is.imgIds = [coco.data.images.id]'; 63 | is.imgIdsMap = makeMap(is.imgIds); 64 | if( isfield(coco.data,'annotations') ) 65 | ann=coco.data.annotations; o=[ann.image_id]; 66 | if(isfield(ann,'category_id')), o=o*1e10+[ann.category_id]; end 67 | [~,o]=sort(o); ann=ann(o); coco.data.annotations=ann; 68 | s={'category_id','area','iscrowd','id','image_id'}; 69 | t={'annCatIds','annAreas','annIscrowd','annIds','annImgIds'}; 70 | for f=1:5, if(isfield(ann,s{f})), is.(t{f})=[ann.(s{f})]'; end; end 71 | is.annIdsMap = makeMap(is.annIds); 72 | is.imgAnnIdsMap = makeMultiMap(is.imgIds,... 73 | is.imgIdsMap,is.annImgIds,is.annIds,0); 74 | end 75 | if( isfield(coco.data,'categories') ) 76 | is.catIds = [coco.data.categories.id]'; 77 | is.catIdsMap = makeMap(is.catIds); 78 | if(isfield(is,'annCatIds')), is.catImgIdsMap = makeMultiMap(... 79 | is.catIds,is.catIdsMap,is.annCatIds,is.annImgIds,1); end 80 | end 81 | coco.inds=is; fprintf('DONE (t=%0.2fs).\n',etime(clock,clk)); 82 | 83 | function map = makeMap( keys ) 84 | % Make map from key to integer id associated with key. 85 | if(isempty(keys)), map=containers.Map(); return; end 86 | map=containers.Map(keys,1:length(keys)); 87 | end 88 | 89 | function map = makeMultiMap( keys, keysMap, keysAll, valsAll, sqz ) 90 | % Make map from keys to set of vals associated with each key. 91 | js=values(keysMap,num2cell(keysAll)); js=[js{:}]; 92 | m=length(js); n=length(keys); k=zeros(1,n); 93 | for i=1:m, j=js(i); k(j)=k(j)+1; end; vs=zeros(n,max(k)); k(:)=0; 94 | for i=1:m, j=js(i); k(j)=k(j)+1; vs(j,k(j))=valsAll(i); end 95 | map = containers.Map('KeyType','double','ValueType','any'); 96 | if(sqz), for j=1:n, map(keys(j))=unique(vs(j,1:k(j))); end 97 | else for j=1:n, map(keys(j))=vs(j,1:k(j)); end; end 98 | end 99 | end 100 | 101 | function ids = getAnnIds( coco, varargin ) 102 | % Get ann ids that satisfy given filter conditions. 103 | % 104 | % USAGE 105 | % ids = coco.getAnnIds( params ) 106 | % 107 | % INPUTS 108 | % params - filtering parameters (struct or name/value pairs) 109 | % setting any filter to [] skips that filter 110 | % .imgIds - [] get anns for given imgs 111 | % .catIds - [] get anns for given cats 112 | % .areaRng - [] get anns for given area range (e.g. [0 inf]) 113 | % .iscrowd - [] get anns for given crowd label (0 or 1) 114 | % 115 | % OUTPUTS 116 | % ids - integer array of ann ids 117 | def = {'imgIds',[],'catIds',[],'areaRng',[],'iscrowd',[]}; 118 | [imgIds,catIds,ar,iscrowd] = getPrmDflt(varargin,def,1); 119 | if( length(imgIds)==1 ) 120 | t = coco.loadAnns(coco.inds.imgAnnIdsMap(imgIds)); 121 | if(~isempty(catIds)), t = t(ismember([t.category_id],catIds)); end 122 | if(~isempty(ar)), a=[t.area]; t = t(a>=ar(1) & a<=ar(2)); end 123 | if(~isempty(iscrowd)), t = t([t.iscrowd]==iscrowd); end 124 | ids = [t.id]; 125 | else 126 | ids=coco.inds.annIds; K = true(length(ids),1); t = coco.inds; 127 | if(~isempty(imgIds)), K = K & ismember(t.annImgIds,imgIds); end 128 | if(~isempty(catIds)), K = K & ismember(t.annCatIds,catIds); end 129 | if(~isempty(ar)), a=t.annAreas; K = K & a>=ar(1) & a<=ar(2); end 130 | if(~isempty(iscrowd)), K = K & t.annIscrowd==iscrowd; end 131 | ids=ids(K); 132 | end 133 | end 134 | 135 | function ids = getCatIds( coco, varargin ) 136 | % Get cat ids that satisfy given filter conditions. 137 | % 138 | % USAGE 139 | % ids = coco.getCatIds( params ) 140 | % 141 | % INPUTS 142 | % params - filtering parameters (struct or name/value pairs) 143 | % setting any filter to [] skips that filter 144 | % .catNms - [] get cats for given cat names 145 | % .supNms - [] get cats for given supercategory names 146 | % .catIds - [] get cats for given cat ids 147 | % 148 | % OUTPUTS 149 | % ids - integer array of cat ids 150 | if(~isfield(coco.data,'categories')), ids=[]; return; end 151 | def={'catNms',[],'supNms',[],'catIds',[]}; t=coco.data.categories; 152 | [catNms,supNms,catIds] = getPrmDflt(varargin,def,1); 153 | if(~isempty(catNms)), t = t(ismember({t.name},catNms)); end 154 | if(~isempty(supNms)), t = t(ismember({t.supercategory},supNms)); end 155 | if(~isempty(catIds)), t = t(ismember([t.id],catIds)); end 156 | ids = [t.id]; 157 | end 158 | 159 | function ids = getImgIds( coco, varargin ) 160 | % Get img ids that satisfy given filter conditions. 161 | % 162 | % USAGE 163 | % ids = coco.getImgIds( params ) 164 | % 165 | % INPUTS 166 | % params - filtering parameters (struct or name/value pairs) 167 | % setting any filter to [] skips that filter 168 | % .imgIds - [] get imgs for given ids 169 | % .catIds - [] get imgs with all given cats 170 | % 171 | % OUTPUTS 172 | % ids - integer array of img ids 173 | def={'imgIds',[],'catIds',[]}; ids=coco.inds.imgIds; 174 | [imgIds,catIds] = getPrmDflt(varargin,def,1); 175 | if(~isempty(imgIds)), ids=intersect(ids,imgIds); end 176 | if(isempty(catIds)), return; end 177 | t=values(coco.inds.catImgIdsMap,num2cell(catIds)); 178 | for i=1:length(t), ids=intersect(ids,t{i}); end 179 | end 180 | 181 | function anns = loadAnns( coco, ids ) 182 | % Load anns with the specified ids. 183 | % 184 | % USAGE 185 | % anns = coco.loadAnns( ids ) 186 | % 187 | % INPUTS 188 | % ids - integer ids specifying anns 189 | % 190 | % OUTPUTS 191 | % anns - loaded ann objects 192 | ids = values(coco.inds.annIdsMap,num2cell(ids)); 193 | anns = coco.data.annotations([ids{:}]); 194 | end 195 | 196 | function cats = loadCats( coco, ids ) 197 | % Load cats with the specified ids. 198 | % 199 | % USAGE 200 | % cats = coco.loadCats( ids ) 201 | % 202 | % INPUTS 203 | % ids - integer ids specifying cats 204 | % 205 | % OUTPUTS 206 | % cats - loaded cat objects 207 | if(~isfield(coco.data,'categories')), cats=[]; return; end 208 | ids = values(coco.inds.catIdsMap,num2cell(ids)); 209 | cats = coco.data.categories([ids{:}]); 210 | end 211 | 212 | function imgs = loadImgs( coco, ids ) 213 | % Load imgs with the specified ids. 214 | % 215 | % USAGE 216 | % imgs = coco.loadImgs( ids ) 217 | % 218 | % INPUTS 219 | % ids - integer ids specifying imgs 220 | % 221 | % OUTPUTS 222 | % imgs - loaded img objects 223 | ids = values(coco.inds.imgIdsMap,num2cell(ids)); 224 | imgs = coco.data.images([ids{:}]); 225 | end 226 | 227 | function hs = showAnns( coco, anns ) 228 | % Display the specified annotations. 229 | % 230 | % USAGE 231 | % hs = coco.showAnns( anns ) 232 | % 233 | % INPUTS 234 | % anns - annotations to display 235 | % 236 | % OUTPUTS 237 | % hs - handles to segment graphic objects 238 | n=length(anns); if(n==0), return; end 239 | r=.4:.2:1; [r,g,b]=ndgrid(r,r,r); cs=[r(:) g(:) b(:)]; 240 | cs=cs(randperm(size(cs,1)),:); cs=repmat(cs,100,1); 241 | if( isfield( anns,'keypoints') ) 242 | for i=1:n 243 | a=anns(i); if(isfield(a,'iscrowd') && a.iscrowd), continue; end 244 | seg={}; if(isfield(a,'segmentation')), seg=a.segmentation; end 245 | k=a.keypoints; x=k(1:3:end)+1; y=k(2:3:end)+1; v=k(3:3:end); 246 | k=coco.loadCats(a.category_id); k=k.skeleton; c=cs(i,:); hold on 247 | p={'FaceAlpha',.25,'LineWidth',2,'EdgeColor',c}; % polygon 248 | for j=seg, xy=j{1}+.5; fill(xy(1:2:end),xy(2:2:end),c,p{:}); end 249 | p={'Color',c,'LineWidth',3}; % skeleton 250 | for j=k, s=j{1}; if(all(v(s)>0)), line(x(s),y(s),p{:}); end; end 251 | p={'MarkerSize',8,'MarkerFaceColor',c,'MarkerEdgeColor'}; % pnts 252 | plot(x(v>0),y(v>0),'o',p{:},'k'); 253 | plot(x(v>1),y(v>1),'o',p{:},c); hold off; 254 | end 255 | elseif( any(isfield(anns,{'segmentation','bbox'})) ) 256 | if(~isfield(anns,'iscrowd')), [anns(:).iscrowd]=deal(0); end 257 | if(~isfield(anns,'segmentation')), S={anns.bbox}; %#ok 258 | for i=1:n, x=S{i}(1); w=S{i}(3); y=S{i}(2); h=S{i}(4); 259 | anns(i).segmentation={[x,y,x,y+h,x+w,y+h,x+w,y]}; end; end 260 | S={anns.segmentation}; hs=zeros(10000,1); k=0; hold on; 261 | pFill={'FaceAlpha',.4,'LineWidth',3}; 262 | for i=1:n 263 | if(anns(i).iscrowd), C=[.01 .65 .40]; else C=rand(1,3); end 264 | if(isstruct(S{i})), M=double(MaskApi.decode(S{i})); k=k+1; 265 | hs(k)=imagesc(cat(3,M*C(1),M*C(2),M*C(3)),'Alphadata',M*.5); 266 | else for j=1:length(S{i}), P=S{i}{j}+.5; k=k+1; 267 | hs(k)=fill(P(1:2:end),P(2:2:end),C,pFill{:}); end 268 | end 269 | end 270 | hs=hs(1:k); hold off; 271 | elseif( isfield(anns,'caption') ) 272 | S={anns.caption}; 273 | for i=1:n, S{i}=[int2str(i) ') ' S{i} '\newline']; end 274 | S=[S{:}]; title(S,'FontSize',12); 275 | end 276 | end 277 | 278 | function cocoRes = loadRes( coco, resFile ) 279 | % Load algorithm results and create API for accessing them. 280 | % 281 | % The API for accessing and viewing algorithm results is identical to 282 | % the CocoApi for the ground truth. The single difference is that the 283 | % ground truth results are replaced by the algorithm results. 284 | % 285 | % USAGE 286 | % cocoRes = coco.loadRes( resFile ) 287 | % 288 | % INPUTS 289 | % resFile - COCO results filename 290 | % 291 | % OUTPUTS 292 | % cocoRes - initialized results API 293 | fprintf('Loading and preparing results... '); clk=clock; 294 | cdata=coco.data; R=gason(fileread(resFile)); m=length(R); 295 | valid=ismember([R.image_id],[cdata.images.id]); 296 | if(~all(valid)), error('Results provided for invalid images.'); end 297 | t={'segmentation','bbox','keypoints','caption'}; t=t{isfield(R,t)}; 298 | if(strcmp(t,'caption')) 299 | for i=1:m, R(i).id=i; end; imgs=cdata.images; 300 | cdata.images=imgs(ismember([imgs.id],[R.image_id])); 301 | else 302 | assert(all(isfield(R,{'category_id','score',t}))); 303 | s=cat(1,R.(t)); if(strcmp(t,'bbox')), a=s(:,3).*s(:,4); end 304 | if(strcmp(t,'segmentation')), a=MaskApi.area(s); end 305 | if(strcmp(t,'keypoints')), x=s(:,1:3:end)'; y=s(:,2:3:end)'; 306 | a=(max(x)-min(x)).*(max(y)-min(y)); end 307 | for i=1:m, R(i).area=a(i); R(i).id=i; end 308 | end 309 | fprintf('DONE (t=%0.2fs).\n',etime(clock,clk)); 310 | cdata.annotations=R; cocoRes=CocoApi(cdata); 311 | end 312 | end 313 | 314 | end 315 | -------------------------------------------------------------------------------- /eval_city/cocoapi/MatlabAPI/CocoUtils.m: -------------------------------------------------------------------------------- 1 | classdef CocoUtils 2 | % Utility functions for testing and validation of COCO code. 3 | % 4 | % The following utility functions are defined: 5 | % convertPascalGt - Convert ground truth for PASCAL to COCO format. 6 | % convertImageNetGt - Convert ground truth for ImageNet to COCO format. 7 | % convertPascalDt - Convert detections on PASCAL to COCO format. 8 | % convertImageNetDt - Convert detections on ImageNet to COCO format. 9 | % validateOnPascal - Validate COCO eval code against PASCAL code. 10 | % validateOnImageNet - Validate COCO eval code against ImageNet code. 11 | % generateFakeDt - Generate fake detections from ground truth. 12 | % validateMaskApi - Validate MaskApi against Matlab functions. 13 | % gasonSplit - Split JSON file into multiple JSON files. 14 | % gasonMerge - Merge JSON files into single JSON file. 15 | % Help on each functions can be accessed by: "help CocoUtils>function". 16 | % 17 | % See also CocoApi MaskApi CocoEval CocoUtils>convertPascalGt 18 | % CocoUtils>convertImageNetGt CocoUtils>convertPascalDt 19 | % CocoUtils>convertImageNetDt CocoUtils>validateOnPascal 20 | % CocoUtils>validateOnImageNet CocoUtils>generateFakeDt 21 | % CocoUtils>validateMaskApi CocoUtils>gasonSplit CocoUtils>gasonMerge 22 | % 23 | % Microsoft COCO Toolbox. version 2.0 24 | % Data, paper, and tutorials available at: http://mscoco.org/ 25 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 26 | % Licensed under the Simplified BSD License [see coco/license.txt] 27 | 28 | methods( Static ) 29 | function convertPascalGt( dataDir, year, split, annFile ) 30 | % Convert ground truth for PASCAL to COCO format. 31 | % 32 | % USAGE 33 | % CocoUtils.convertPascalGt( dataDir, year, split, annFile ) 34 | % 35 | % INPUTS 36 | % dataDir - dir containing VOCdevkit/ 37 | % year - dataset year (e.g. '2007') 38 | % split - dataset split (e.g. 'val') 39 | % annFile - annotation file for writing results 40 | if(exist(annFile,'file')), return; end 41 | fprintf('Converting PASCAL VOC dataset... '); clk=tic; 42 | dev=[dataDir '/VOCdevkit/']; addpath(genpath([dev '/VOCcode'])); 43 | VOCinit; C=VOCopts.classes'; catsMap=containers.Map(C,1:length(C)); 44 | f=fopen([dev '/VOC' year '/ImageSets/Main/' split '.txt']); 45 | is=textscan(f,'%s %*s'); is=is{1}; fclose(f); n=length(is); 46 | data=CocoUtils.initData(C,n); 47 | for i=1:n, nm=[is{i} '.jpg']; 48 | f=[dev '/VOC' year '/Annotations/' is{i} '.xml']; 49 | R=PASreadrecord(f); hw=R.imgsize([2 1]); O=R.objects; 50 | id=is{i}; id(id=='_')=[]; id=str2double(id); 51 | ignore=[O.difficult]; bbs=cat(1,O.bbox); 52 | t=catsMap.values({O.class}); catIds=[t{:}]; iscrowd=ignore*0; 53 | data=CocoUtils.addData(data,nm,id,hw,catIds,ignore,iscrowd,bbs); 54 | end 55 | f=fopen(annFile,'w'); fwrite(f,gason(data)); fclose(f); 56 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 57 | end 58 | 59 | function convertImageNetGt( dataDir, year, split, annFile ) 60 | % Convert ground truth for ImageNet to COCO format. 61 | % 62 | % USAGE 63 | % CocoUtils.convertImageNetGt( dataDir, year, split, annFile ) 64 | % 65 | % INPUTS 66 | % dataDir - dir containing ILSVRC*/ folders 67 | % year - dataset year (e.g. '2013') 68 | % split - dataset split (e.g. 'val') 69 | % annFile - annotation file for writing results 70 | if(exist(annFile,'file')), return; end 71 | fprintf('Converting ImageNet dataset... '); clk=tic; 72 | dev=[dataDir '/ILSVRC' year '_devkit/']; 73 | addpath(genpath([dev '/evaluation/'])); 74 | t=[dev '/data/meta_det.mat']; 75 | t=load(t); synsets=t.synsets(1:200); catNms={synsets.name}; 76 | catsMap=containers.Map({synsets.WNID},1:length(catNms)); 77 | if(~strcmp(split,'val')), blacklist=cell(1,2); else 78 | f=[dev '/data/' 'ILSVRC' year '_det_validation_blacklist.txt']; 79 | f=fopen(f); blacklist=textscan(f,'%d %s'); fclose(f); 80 | t=catsMap.values(blacklist{2}); blacklist{2}=[t{:}]; 81 | end 82 | if(strcmp(split,'train')) 83 | dl=@(i) [dev '/data/det_lists/' split '_pos_' int2str(i) '.txt']; 84 | is=cell(1,200); for i=1:200, f=fopen(dl(i)); 85 | is{i}=textscan(f,'%s %*s'); is{i}=is{i}{1}; fclose(f); end 86 | is=unique(cat(1,is{:})); n=length(is); 87 | else 88 | f=fopen([dev '/data/det_lists/' split '.txt']); 89 | is=textscan(f,'%s %*s'); is=is{1}; fclose(f); n=length(is); 90 | end 91 | data=CocoUtils.initData(catNms,n); 92 | for i=1:n 93 | f=[dataDir '/ILSVRC' year '_DET_bbox_' split '/' is{i} '.xml']; 94 | R=VOCreadxml(f); R=R.annotation; nm=[is{i} '.JPEG']; 95 | hw=str2double({R.size.height R.size.width}); 96 | if(~isfield(R,'object')), catIds=[]; bbs=[]; else 97 | O=R.object; t=catsMap.values({O.name}); catIds=[t{:}]; 98 | b=[O.bndbox]; bbs=str2double({b.xmin; b.ymin; b.xmax; b.ymax})'; 99 | end 100 | j=blacklist{2}(blacklist{1}==i); m=numel(j); b=[0 0 hw(2) hw(1)]; 101 | catIds=[j catIds]; bbs=[repmat(b,m,1); bbs]; %#ok 102 | ignore=ismember(catIds,j); iscrowd=ignore*0; iscrowd(1:m)=1; 103 | data=CocoUtils.addData(data,nm,i,hw,catIds,ignore,iscrowd,bbs); 104 | end 105 | f=fopen(annFile,'w'); fwrite(f,gason(data)); fclose(f); 106 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 107 | end 108 | 109 | function convertPascalDt( srcFiles, tarFile ) 110 | % Convert detections on PASCAL to COCO format. 111 | % 112 | % USAGE 113 | % CocoUtils.convertPascalDt( srcFiles, tarFile ) 114 | % 115 | % INPUTS 116 | % srcFiles - source detection file(s) in PASCAL format 117 | % tarFile - target detection file in COCO format 118 | if(exist(tarFile,'file')), return; end; R=[]; 119 | for i=1:length(srcFiles), f=fopen(srcFiles{i},'r'); 120 | R1=textscan(f,'%d %f %f %f %f %f'); fclose(f); 121 | [~,~,x0,y0,x1,y1]=deal(R1{:}); b=[x0-1 y0-1 x1-x0+1 y1-y0+1]; 122 | b(:,3:4)=max(b(:,3:4),1); b=mat2cell(b,ones(1,size(b,1)),4); 123 | R=[R; struct('image_id',num2cell(R1{1}),'bbox',b,... 124 | 'category_id',i,'score',num2cell(R1{2}))]; %#ok 125 | end 126 | f=fopen(tarFile,'w'); fwrite(f,gason(R)); fclose(f); 127 | end 128 | 129 | function convertImageNetDt( srcFile, tarFile ) 130 | % Convert detections on ImageNet to COCO format. 131 | % 132 | % USAGE 133 | % CocoUtils.convertImageNetDt( srcFile, tarFile ) 134 | % 135 | % INPUTS 136 | % srcFile - source detection file in ImageNet format 137 | % tarFile - target detection file in COCO format 138 | if(exist(tarFile,'file')), return; end; f=fopen(srcFile,'r'); 139 | R=textscan(f,'%d %d %f %f %f %f %f'); fclose(f); 140 | [~,~,~,x0,y0,x1,y1]=deal(R{:}); b=[x0-1 y0-1 x1-x0+1 y1-y0+1]; 141 | b(:,3:4)=max(b(:,3:4),1); bbox=mat2cell(b,ones(1,size(b,1)),4); 142 | R=struct('image_id',num2cell(R{1}),'bbox',bbox,... 143 | 'category_id',num2cell(R{2}),'score',num2cell(R{3})); 144 | f=fopen(tarFile,'w'); fwrite(f,gason(R)); fclose(f); 145 | end 146 | 147 | function validateOnPascal( dataDir ) 148 | % Validate COCO eval code against PASCAL code. 149 | % 150 | % USAGE 151 | % CocoUtils.validateOnPascal( dataDir ) 152 | % 153 | % INPUTS 154 | % dataDir - dir containing VOCdevkit/ 155 | split='val'; year='2007'; thrs=0:.001:1; T=length(thrs); 156 | dev=[dataDir '/VOCdevkit/']; addpath(genpath([dev '/VOCcode/'])); 157 | d=pwd; cd(dev); VOCinit; cd(d); O=VOCopts; O.testset=split; 158 | O.detrespath=[O.detrespath(1:end-10) split '_%s.txt']; 159 | catNms=O.classes; K=length(catNms); ap=zeros(K,1); 160 | for i=1:K, [R,P]=VOCevaldet(O,'comp3',catNms{i},0); R1=[R; inf]; 161 | P1=[P; 0]; for t=1:T, ap(i)=ap(i)+max(P1(R1>=thrs(t)))/T; end; end 162 | srcFile=[dev '/results/VOC' year '/Main/comp3_det_' split]; 163 | resFile=[srcFile '.json']; annFile=[dev '/VOC2007/' split '.json']; 164 | sfs=cell(1,K); for i=1:K, sfs{i}=[srcFile '_' catNms{i} '.txt']; end 165 | CocoUtils.convertPascalGt(dataDir,year,split,annFile); 166 | CocoUtils.convertPascalDt(sfs,resFile); 167 | D=CocoApi(annFile); R=D.loadRes(resFile); E=CocoEval(D,R); 168 | p=E.params; p.recThrs=thrs; p.iouThrs=.5; p.areaRng=[0 inf]; 169 | p.useSegm=0; p.maxDets=inf; E.params=p; E.evaluate(); E.accumulate(); 170 | apCoco=squeeze(mean(E.eval.precision,2)); deltas=abs(apCoco-ap); 171 | fprintf('AP delta: mean=%.2e median=%.2e max=%.2e\n',... 172 | mean(deltas),median(deltas),max(deltas)) 173 | if(max(deltas)>1e-2), msg='FAILED'; else msg='PASSED'; end 174 | warning(['Eval code *' msg '* validation!']); 175 | end 176 | 177 | function validateOnImageNet( dataDir ) 178 | % Validate COCO eval code against ImageNet code. 179 | % 180 | % USAGE 181 | % CocoUtils.validateOnImageNet( dataDir ) 182 | % 183 | % INPUTS 184 | % dataDir - dir containing ILSVRC*/ folders 185 | warning(['Set pixelTolerance=0 in line 30 of eval_detection.m '... 186 | '(and delete cache) otherwise AP will differ by >1e-4!']); 187 | year='2013'; dev=[dataDir '/ILSVRC' year '_devkit/']; 188 | fs = { [dev 'evaluation/demo.val.pred.det.txt'] 189 | [dataDir '/ILSVRC' year '_DET_bbox_val/'] 190 | [dev 'data/meta_det.mat'] 191 | [dev 'data/det_lists/val.txt'] 192 | [dev 'data/ILSVRC' year '_det_validation_blacklist.txt'] 193 | [dev 'data/ILSVRC' year '_det_validation_cache.mat'] }; 194 | addpath(genpath([dev 'evaluation/'])); 195 | ap=eval_detection(fs{:})'; 196 | resFile=[fs{1}(1:end-3) 'json']; 197 | annFile=[dev 'data/ILSVRC' year '_val.json']; 198 | CocoUtils.convertImageNetDt(fs{1},resFile); 199 | CocoUtils.convertImageNetGt(dataDir,year,'val',annFile) 200 | D=CocoApi(annFile); R=D.loadRes(resFile); E=CocoEval(D,R); 201 | p=E.params; p.recThrs=0:.0001:1; p.iouThrs=.5; p.areaRng=[0 inf]; 202 | p.useSegm=0; p.maxDets=inf; E.params=p; E.evaluate(); E.accumulate(); 203 | apCoco=squeeze(mean(E.eval.precision,2)); deltas=abs(apCoco-ap); 204 | fprintf('AP delta: mean=%.2e median=%.2e max=%.2e\n',... 205 | mean(deltas),median(deltas),max(deltas)) 206 | if(max(deltas)>1e-4), msg='FAILED'; else msg='PASSED'; end 207 | warning(['Eval code *' msg '* validation!']); 208 | end 209 | 210 | function generateFakeDt( coco, dtFile, varargin ) 211 | % Generate fake detections from ground truth. 212 | % 213 | % USAGE 214 | % CocoUtils.generateFakeDt( coco, dtFile, varargin ) 215 | % 216 | % INPUTS 217 | % coco - instance of CocoApi containing ground truth 218 | % dtFile - target file for writing detection results 219 | % params - parameters (struct or name/value pairs) 220 | % .n - [100] number images for which to generate dets 221 | % .fn - [.20] false negative rate (00; if(~any(v)), continue; end 251 | x=o(1:3:end); y=o(2:3:end); x(~v)=mean(x(v)); y(~v)=mean(y(v)); 252 | x=max(0,min(w-1,x+dx)); o(1:3:end)=x; o(2:3:end)=y; 253 | end 254 | k=k+1; R(k).image_id=imgIds(i); R(k).category_id=catId; 255 | R(k).(opts.type)=o; R(k).score=round(rand(rstream)*1000)/1000; 256 | end 257 | end 258 | R=R(1:k); f=fopen(dtFile,'w'); fwrite(f,gason(R)); fclose(f); 259 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 260 | end 261 | 262 | function validateMaskApi( coco ) 263 | % Validate MaskApi against Matlab functions. 264 | % 265 | % USAGE 266 | % CocoUtils.validateMaskApi( coco ) 267 | % 268 | % INPUTS 269 | % coco - instance of CocoApi containing ground truth 270 | S=coco.data.annotations; S=S(~[S.iscrowd]); S={S.segmentation}; 271 | h=1000; n=1000; Z=cell(1,n); A=Z; B=Z; M=Z; IB=zeros(1,n); 272 | fprintf('Running MaskApi implementations... '); clk=tic; 273 | for i=1:n, A{i}=MaskApi.frPoly(S{i},h,h); end 274 | Ia=MaskApi.iou(A{1},[A{:}]); 275 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 276 | fprintf('Running Matlab implementations... '); clk=tic; 277 | for i=1:n, M1=0; for j=1:length(S{i}), x=S{i}{j}+.5; 278 | M1=M1+poly2mask(x(1:2:end),x(2:2:end),h,h); end 279 | M{i}=uint8(M1>0); B{i}=MaskApi.encode(M{i}); 280 | IB(i)=sum(sum(M{1}&M{i}))/sum(sum(M{1}|M{i})); 281 | end 282 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 283 | if(isequal(A,B)&&isequal(Ia,IB)), 284 | msg='PASSED'; else msg='FAILED'; end 285 | warning(['MaskApi *' msg '* validation!']); 286 | end 287 | 288 | function gasonSplit( name, k ) 289 | % Split JSON file into multiple JSON files. 290 | % 291 | % Splits file 'name.json' into multiple files 'name-*.json'. Only 292 | % works for JSON arrays. Memory efficient. Inverted by gasonMerge(). 293 | % 294 | % USAGE 295 | % CocoUtils.gasonSplit( name, k ) 296 | % 297 | % INPUTS 298 | % name - file containing JSON array (w/o '.json' ext) 299 | % k - number of files to split JSON into 300 | s=gasonMex('split',fileread([name '.json']),k); k=length(s); 301 | for i=1:k, f=fopen(sprintf('%s-%06i.json',name,i),'w'); 302 | fwrite(f,s{i}); fclose(f); end 303 | end 304 | 305 | function gasonMerge( name ) 306 | % Merge JSON files into single JSON file. 307 | % 308 | % Merge files 'name-*.json' into single file 'name.json'. Only works 309 | % for JSON arrays. Memory efficient. Inverted by gasonSplit(). 310 | % 311 | % USAGE 312 | % CocoUtils.gasonMerge( name ) 313 | % 314 | % INPUTS 315 | % name - files containing JSON arrays (w/o '.json' ext) 316 | s=dir([name '-*.json']); s=sort({s.name}); k=length(s); 317 | p=fileparts(name); for i=1:k, s{i}=fullfile(p,s{i}); end 318 | for i=1:k, s{i}=fileread(s{i}); end; s=gasonMex('merge',s); 319 | f=fopen([name '.json'],'w'); fwrite(f,s); fclose(f); 320 | end 321 | end 322 | 323 | methods( Static, Access=private ) 324 | function data = initData( catNms, n ) 325 | % Helper for convert() functions: init annotations. 326 | m=length(catNms); ms=num2cell(1:m); 327 | I = struct('file_name',0,'height',0,'width',0,'id',0); 328 | C = struct('supercategory','none','id',ms,'name',catNms); 329 | A = struct('segmentation',0,'area',0,'iscrowd',0,... 330 | 'image_id',0,'bbox',0,'category_id',0,'id',0,'ignore',0); 331 | I=repmat(I,1,n); A=repmat(A,1,n*20); 332 | data = struct('images',I,'type','instances',... 333 | 'annotations',A,'categories',C,'nImgs',0,'nAnns',0); 334 | end 335 | 336 | function data = addData( data,nm,id,hw,catIds,ignore,iscrowd,bbs ) 337 | % Helper for convert() functions: add annotations. 338 | data.nImgs=data.nImgs+1; 339 | data.images(data.nImgs)=struct('file_name',nm,... 340 | 'height',hw(1),'width',hw(2),'id',id); 341 | for j=1:length(catIds), data.nAnns=data.nAnns+1; k=data.nAnns; 342 | b=bbs(j,:); b=b-1; b(3:4)=b(3:4)-b(1:2)+1; 343 | x1=b(1); x2=b(1)+b(3); y1=b(2); y2=b(2)+b(4); 344 | S={{[x1 y1 x1 y2 x2 y2 x2 y1]}}; a=b(3)*b(4); 345 | data.annotations(k)=struct('segmentation',S,'area',a,... 346 | 'iscrowd',iscrowd(j),'image_id',id,'bbox',b,... 347 | 'category_id',catIds(j),'id',k,'ignore',ignore(j)); 348 | end 349 | if( data.nImgs == length(data.images) ) 350 | data.annotations=data.annotations(1:data.nAnns); 351 | data=rmfield(data,{'nImgs','nAnns'}); 352 | end 353 | end 354 | end 355 | 356 | end 357 | -------------------------------------------------------------------------------- /eval_city/cocoapi/MatlabAPI/MaskApi.m: -------------------------------------------------------------------------------- 1 | classdef MaskApi 2 | % Interface for manipulating masks stored in RLE format. 3 | % 4 | % RLE is a simple yet efficient format for storing binary masks. RLE 5 | % first divides a vector (or vectorized image) into a series of piecewise 6 | % constant regions and then for each piece simply stores the length of 7 | % that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 8 | % be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 9 | % (note that the odd counts are always the numbers of zeros). Instead of 10 | % storing the counts directly, additional compression is achieved with a 11 | % variable bitrate representation based on a common scheme called LEB128. 12 | % 13 | % Compression is greatest given large piecewise constant regions. 14 | % Specifically, the size of the RLE is proportional to the number of 15 | % *boundaries* in M (or for an image the number of boundaries in the y 16 | % direction). Assuming fairly simple shapes, the RLE representation is 17 | % O(sqrt(n)) where n is number of pixels in the object. Hence space usage 18 | % is substantially lower, especially for large simple objects (large n). 19 | % 20 | % Many common operations on masks can be computed directly using the RLE 21 | % (without need for decoding). This includes computations such as area, 22 | % union, intersection, etc. All of these operations are linear in the 23 | % size of the RLE, in other words they are O(sqrt(n)) where n is the area 24 | % of the object. Computing these operations on the original mask is O(n). 25 | % Thus, using the RLE can result in substantial computational savings. 26 | % 27 | % The following API functions are defined: 28 | % encode - Encode binary masks using RLE. 29 | % decode - Decode binary masks encoded via RLE. 30 | % merge - Compute union or intersection of encoded masks. 31 | % iou - Compute intersection over union between masks. 32 | % nms - Compute non-maximum suppression between ordered masks. 33 | % area - Compute area of encoded masks. 34 | % toBbox - Get bounding boxes surrounding encoded masks. 35 | % frBbox - Convert bounding boxes to encoded masks. 36 | % frPoly - Convert polygon to encoded mask. 37 | % 38 | % Usage: 39 | % Rs = MaskApi.encode( masks ) 40 | % masks = MaskApi.decode( Rs ) 41 | % R = MaskApi.merge( Rs, [intersect=false] ) 42 | % o = MaskApi.iou( dt, gt, [iscrowd=false] ) 43 | % keep = MaskApi.nms( dt, thr ) 44 | % a = MaskApi.area( Rs ) 45 | % bbs = MaskApi.toBbox( Rs ) 46 | % Rs = MaskApi.frBbox( bbs, h, w ) 47 | % R = MaskApi.frPoly( poly, h, w ) 48 | % 49 | % In the API the following formats are used: 50 | % R,Rs - [struct] Run-length encoding of binary mask(s) 51 | % masks - [hxwxn] Binary mask(s) (must have type uint8) 52 | % bbs - [nx4] Bounding box(es) stored as [x y w h] 53 | % poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 54 | % dt,gt - May be either bounding boxes or encoded masks 55 | % Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 56 | % 57 | % Finally, a note about the intersection over union (iou) computation. 58 | % The standard iou of a ground truth (gt) and detected (dt) object is 59 | % iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 60 | % For "crowd" regions, we use a modified criteria. If a gt object is 61 | % marked as "iscrowd", we allow a dt to match any subregion of the gt. 62 | % Choosing gt' in the crowd gt that best matches the dt can be done using 63 | % gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 64 | % iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 65 | % For crowd gt regions we use this modified criteria above for the iou. 66 | % 67 | % To compile use the following (some precompiled binaries are included): 68 | % mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',... 69 | % 'private/maskApiMex.c','../common/maskApi.c',... 70 | % '-I../common/','-outdir','private'); 71 | % Please do not contact us for help with compiling. 72 | % 73 | % Microsoft COCO Toolbox. version 2.0 74 | % Data, paper, and tutorials available at: http://mscoco.org/ 75 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 76 | % Licensed under the Simplified BSD License [see coco/license.txt] 77 | 78 | methods( Static ) 79 | function Rs = encode( masks ) 80 | Rs = maskApiMex( 'encode', masks ); 81 | end 82 | 83 | function masks = decode( Rs ) 84 | masks = maskApiMex( 'decode', Rs ); 85 | end 86 | 87 | function R = merge( Rs, varargin ) 88 | R = maskApiMex( 'merge', Rs, varargin{:} ); 89 | end 90 | 91 | function o = iou( dt, gt, varargin ) 92 | o = maskApiMex( 'iou', dt', gt', varargin{:} ); 93 | end 94 | 95 | function keep = nms( dt, thr ) 96 | keep = maskApiMex('nms',dt',thr); 97 | end 98 | 99 | function a = area( Rs ) 100 | a = maskApiMex( 'area', Rs ); 101 | end 102 | 103 | function bbs = toBbox( Rs ) 104 | bbs = maskApiMex( 'toBbox', Rs )'; 105 | end 106 | 107 | function Rs = frBbox( bbs, h, w ) 108 | Rs = maskApiMex( 'frBbox', bbs', h, w ); 109 | end 110 | 111 | function R = frPoly( poly, h, w ) 112 | R = maskApiMex( 'frPoly', poly, h , w ); 113 | end 114 | end 115 | 116 | end 117 | -------------------------------------------------------------------------------- /eval_city/cocoapi/MatlabAPI/cocoDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo for the CocoApi (see CocoApi.m) 2 | 3 | %% initialize COCO api (please specify dataType/annType below) 4 | annTypes = { 'instances', 'captions', 'person_keypoints' }; 5 | dataType='val2014'; annType=annTypes{1}; % specify dataType/annType 6 | annFile=sprintf('../annotations/%s_%s.json',annType,dataType); 7 | coco=CocoApi(annFile); 8 | 9 | %% display COCO categories and supercategories 10 | if( ~strcmp(annType,'captions') ) 11 | cats = coco.loadCats(coco.getCatIds()); 12 | nms={cats.name}; fprintf('COCO categories: '); 13 | fprintf('%s, ',nms{:}); fprintf('\n'); 14 | nms=unique({cats.supercategory}); fprintf('COCO supercategories: '); 15 | fprintf('%s, ',nms{:}); fprintf('\n'); 16 | end 17 | 18 | %% get all images containing given categories, select one at random 19 | catIds = coco.getCatIds('catNms',{'person','dog','skateboard'}); 20 | imgIds = coco.getImgIds('catIds',catIds); 21 | imgId = imgIds(randi(length(imgIds))); 22 | 23 | %% load and display image 24 | img = coco.loadImgs(imgId); 25 | I = imread(sprintf('../images/%s/%s',dataType,img.file_name)); 26 | figure(1); imagesc(I); axis('image'); set(gca,'XTick',[],'YTick',[]) 27 | 28 | %% load and display annotations 29 | annIds = coco.getAnnIds('imgIds',imgId,'catIds',catIds,'iscrowd',[]); 30 | anns = coco.loadAnns(annIds); coco.showAnns(anns); 31 | -------------------------------------------------------------------------------- /eval_city/cocoapi/MatlabAPI/evalDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo demonstrating the algorithm result formats for COCO 2 | 3 | %% select results type for demo (either bbox or segm) 4 | type = {'segm','bbox','keypoints'}; type = type{1}; % specify type here 5 | fprintf('Running demo for *%s* results.\n\n',type); 6 | 7 | %% initialize COCO ground truth api 8 | dataDir='../'; prefix='instances'; dataType='val2014'; 9 | if(strcmp(type,'keypoints')), prefix='person_keypoints'; end 10 | annFile=sprintf('%s/annotations/%s_%s.json',dataDir,prefix,dataType); 11 | cocoGt=CocoApi(annFile); 12 | 13 | %% initialize COCO detections api 14 | resFile='%s/results/%s_%s_fake%s100_results.json'; 15 | resFile=sprintf(resFile,dataDir,prefix,dataType,type); 16 | cocoDt=cocoGt.loadRes(resFile); 17 | 18 | %% visialuze gt and dt side by side 19 | imgIds=sort(cocoGt.getImgIds()); imgIds=imgIds(1:100); 20 | imgId = imgIds(randi(100)); img = cocoGt.loadImgs(imgId); 21 | I = imread(sprintf('%s/images/val2014/%s',dataDir,img.file_name)); 22 | figure(1); subplot(1,2,1); imagesc(I); axis('image'); axis off; 23 | annIds = cocoGt.getAnnIds('imgIds',imgId); title('ground truth') 24 | anns = cocoGt.loadAnns(annIds); cocoGt.showAnns(anns); 25 | figure(1); subplot(1,2,2); imagesc(I); axis('image'); axis off; 26 | annIds = cocoDt.getAnnIds('imgIds',imgId); title('results') 27 | anns = cocoDt.loadAnns(annIds); cocoDt.showAnns(anns); 28 | 29 | %% load raw JSON and show exact format for results 30 | fprintf('results structure have the following format:\n'); 31 | res = gason(fileread(resFile)); disp(res) 32 | 33 | %% the following command can be used to save the results back to disk 34 | if(0), f=fopen(resFile,'w'); fwrite(f,gason(res)); fclose(f); end 35 | 36 | %% run COCO evaluation code (see CocoEval.m) 37 | cocoEval=CocoEval(cocoGt,cocoDt,type); 38 | cocoEval.params.imgIds=imgIds; 39 | cocoEval.evaluate(); 40 | cocoEval.accumulate(); 41 | cocoEval.summarize(); 42 | 43 | %% generate Derek Hoiem style analyis of false positives (slow) 44 | if(0), cocoEval.analyze(); end 45 | -------------------------------------------------------------------------------- /eval_city/cocoapi/MatlabAPI/gason.m: -------------------------------------------------------------------------------- 1 | function out = gason( in ) 2 | % Convert between JSON strings and corresponding JSON objects. 3 | % 4 | % This parser is based on Gason written and maintained by Ivan Vashchaev: 5 | % https://github.com/vivkin/gason 6 | % Gason is a "lightweight and fast JSON parser for C++". Please see the 7 | % above link for license information and additional details about Gason. 8 | % 9 | % Given a JSON string, gason calls the C++ parser and converts the output 10 | % into an appropriate Matlab structure. As the parsing is performed in mex 11 | % the resulting parser is blazingly fast. Large JSON structs (100MB+) take 12 | % only a few seconds to parse (compared to hours for pure Matlab parsers). 13 | % 14 | % Given a JSON object, gason calls the C++ encoder to convert the object 15 | % back into a JSON string representation. Nearly any Matlab struct, cell 16 | % array, or numeric array represent a valid JSON object. Note that gason() 17 | % can be used to go both from JSON string to JSON object and back. 18 | % 19 | % Gason requires C++11 to compile (for GCC this requires version 4.7 or 20 | % later). The following command compiles the parser (may require tweaking): 21 | % mex('CXXFLAGS=\$CXXFLAGS -std=c++11 -Wall','-largeArrayDims',... 22 | % 'private/gasonMex.cpp','../common/gason.cpp',... 23 | % '-I../common/','-outdir','private'); 24 | % Note the use of the "-std=c++11" flag. A number of precompiled binaries 25 | % are included, please do not contact us for help with compiling. If needed 26 | % you can specify a compiler by adding the option 'CXX="/usr/bin/g++"'. 27 | % 28 | % Note that by default JSON arrays that contain only numbers are stored as 29 | % regular Matlab arrays. Likewise, JSON arrays that contain only objects of 30 | % the same type are stored as Matlab struct arrays. This is much faster and 31 | % can use considerably less memory than always using Matlab cell arrays. 32 | % 33 | % USAGE 34 | % object = gason( string ) 35 | % string = gason( object ) 36 | % 37 | % INPUTS/OUTPUTS 38 | % string - JSON string 39 | % object - JSON object 40 | % 41 | % EXAMPLE 42 | % o = struct('first',{'piotr','ty'},'last',{'dollar','lin'}) 43 | % s = gason( o ) % convert JSON object -> JSON string 44 | % p = gason( s ) % convert JSON string -> JSON object 45 | % 46 | % See also 47 | % 48 | % Microsoft COCO Toolbox. version 2.0 49 | % Data, paper, and tutorials available at: http://mscoco.org/ 50 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 51 | % Licensed under the Simplified BSD License [see coco/license.txt] 52 | 53 | out = gasonMex( 'convert', in ); 54 | -------------------------------------------------------------------------------- /eval_city/cocoapi/MatlabAPI/private/gasonMex.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "gason.h" 8 | #include "mex.h" 9 | #include "string.h" 10 | #include "math.h" 11 | #include 12 | #include 13 | #include 14 | typedef std::ostringstream ostrm; 15 | typedef unsigned long siz; 16 | typedef unsigned short ushort; 17 | 18 | siz length( const JsonValue &a ) { 19 | // get number of elements in JSON_ARRAY or JSON_OBJECT 20 | siz k=0; auto n=a.toNode(); while(n) { k++; n=n->next; } return k; 21 | } 22 | 23 | bool isRegularObjArray( const JsonValue &a ) { 24 | // check if all JSON_OBJECTs in JSON_ARRAY have the same fields 25 | JsonValue o=a.toNode()->value; siz k, n; const char **keys; 26 | n=length(o); keys=new const char*[n]; 27 | k=0; for(auto j:o) keys[k++]=j->key; 28 | for( auto i:a ) { 29 | if(length(i->value)!=n) return false; k=0; 30 | for(auto j:i->value) if(strcmp(j->key,keys[k++])) return false; 31 | } 32 | delete [] keys; return true; 33 | } 34 | 35 | mxArray* json( const JsonValue &o ) { 36 | // convert JsonValue to Matlab mxArray 37 | siz k, m, n; mxArray *M; const char **keys; 38 | switch( o.getTag() ) { 39 | case JSON_NUMBER: 40 | return mxCreateDoubleScalar(o.toNumber()); 41 | case JSON_STRING: 42 | return mxCreateString(o.toString()); 43 | case JSON_ARRAY: { 44 | if(!o.toNode()) return mxCreateDoubleMatrix(1,0,mxREAL); 45 | JsonValue o0=o.toNode()->value; JsonTag tag=o0.getTag(); 46 | n=length(o); bool isRegular=true; 47 | for(auto i:o) isRegular=isRegular && i->value.getTag()==tag; 48 | if( isRegular && tag==JSON_OBJECT && isRegularObjArray(o) ) { 49 | m=length(o0); keys=new const char*[m]; 50 | k=0; for(auto j:o0) keys[k++]=j->key; 51 | M = mxCreateStructMatrix(1,n,m,keys); 52 | k=0; for(auto i:o) { m=0; for(auto j:i->value) 53 | mxSetFieldByNumber(M,k,m++,json(j->value)); k++; } 54 | delete [] keys; return M; 55 | } else if( isRegular && tag==JSON_NUMBER ) { 56 | M = mxCreateDoubleMatrix(1,n,mxREAL); double *p=mxGetPr(M); 57 | k=0; for(auto i:o) p[k++]=i->value.toNumber(); return M; 58 | } else { 59 | M = mxCreateCellMatrix(1,n); 60 | k=0; for(auto i:o) mxSetCell(M,k++,json(i->value)); 61 | return M; 62 | } 63 | } 64 | case JSON_OBJECT: 65 | if(!o.toNode()) return mxCreateStructMatrix(1,0,0,NULL); 66 | n=length(o); keys=new const char*[n]; 67 | k=0; for(auto i:o) keys[k++]=i->key; 68 | M = mxCreateStructMatrix(1,1,n,keys); k=0; 69 | for(auto i:o) mxSetFieldByNumber(M,0,k++,json(i->value)); 70 | delete [] keys; return M; 71 | case JSON_TRUE: 72 | return mxCreateDoubleScalar(1); 73 | case JSON_FALSE: 74 | return mxCreateDoubleScalar(0); 75 | case JSON_NULL: 76 | return mxCreateDoubleMatrix(0,0,mxREAL); 77 | default: return NULL; 78 | } 79 | } 80 | 81 | template ostrm& json( ostrm &S, T *A, siz n ) { 82 | // convert numeric array to JSON string with casting 83 | if(n==0) { S<<"[]"; return S; } if(n==1) { S< ostrm& json( ostrm &S, T *A, siz n ) { 89 | // convert numeric array to JSON string without casting 90 | return json(S,A,n); 91 | } 92 | 93 | ostrm& json( ostrm &S, const char *A ) { 94 | // convert char array to JSON string (handle escape characters) 95 | #define RPL(a,b) case a: { S << b; A++; break; } 96 | S << "\""; while( *A>0 ) switch( *A ) { 97 | RPL('"',"\\\""); RPL('\\',"\\\\"); RPL('/',"\\/"); RPL('\b',"\\b"); 98 | RPL('\f',"\\f"); RPL('\n',"\\n"); RPL('\r',"\\r"); RPL('\t',"\\t"); 99 | default: S << *A; A++; 100 | } 101 | S << "\""; return S; 102 | } 103 | 104 | ostrm& json( ostrm& S, const JsonValue *o ) { 105 | // convert JsonValue to JSON string 106 | switch( o->getTag() ) { 107 | case JSON_NUMBER: S << o->toNumber(); return S; 108 | case JSON_TRUE: S << "true"; return S; 109 | case JSON_FALSE: S << "false"; return S; 110 | case JSON_NULL: S << "null"; return S; 111 | case JSON_STRING: return json(S,o->toString()); 112 | case JSON_ARRAY: 113 | S << "["; for(auto i:*o) { 114 | json(S,&i->value) << (i->next ? "," : ""); } 115 | S << "]"; return S; 116 | case JSON_OBJECT: 117 | S << "{"; for(auto i:*o) { 118 | json(S,i->key) << ":"; 119 | json(S,&i->value) << (i->next ? "," : ""); } 120 | S << "}"; return S; 121 | default: return S; 122 | } 123 | } 124 | 125 | ostrm& json( ostrm& S, const mxArray *M ) { 126 | // convert Matlab mxArray to JSON string 127 | siz i, j, m, n=mxGetNumberOfElements(M); 128 | void *A=mxGetData(M); ostrm *nms; 129 | switch( mxGetClassID(M) ) { 130 | case mxDOUBLE_CLASS: return json(S,(double*) A,n); 131 | case mxSINGLE_CLASS: return json(S,(float*) A,n); 132 | case mxINT64_CLASS: return json(S,(int64_t*) A,n); 133 | case mxUINT64_CLASS: return json(S,(uint64_t*) A,n); 134 | case mxINT32_CLASS: return json(S,(int32_t*) A,n); 135 | case mxUINT32_CLASS: return json(S,(uint32_t*) A,n); 136 | case mxINT16_CLASS: return json(S,(int16_t*) A,n); 137 | case mxUINT16_CLASS: return json(S,(uint16_t*) A,n); 138 | case mxINT8_CLASS: return json(S,(int8_t*) A,n); 139 | case mxUINT8_CLASS: return json(S,(uint8_t*) A,n); 140 | case mxLOGICAL_CLASS: return json(S,(uint8_t*) A,n); 141 | case mxCHAR_CLASS: return json(S,mxArrayToString(M)); 142 | case mxCELL_CLASS: 143 | S << "["; for(i=0; i0) json(S,mxGetCell(M,n-1)); S << "]"; return S; 145 | case mxSTRUCT_CLASS: 146 | if(n==0) { S<<"{}"; return S; } m=mxGetNumberOfFields(M); 147 | if(m==0) { S<<"["; for(i=0; i1) S<<"["; nms=new ostrm[m]; 149 | for(j=0; j1) S<<"]"; delete [] nms; return S; 156 | default: 157 | mexErrMsgTxt( "Unknown type." ); return S; 158 | } 159 | } 160 | 161 | mxArray* mxCreateStringRobust( const char* str ) { 162 | // convert char* to Matlab string (robust version of mxCreateString) 163 | mxArray *M; ushort *c; mwSize n[2]={1,strlen(str)}; 164 | M=mxCreateCharArray(2,n); c=(ushort*) mxGetData(M); 165 | for( siz i=0; i1 ) mexErrMsgTxt("One output expected."); 182 | 183 | if(!strcmp(action,"convert")) { 184 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 185 | if( mxGetClassID(pr[0])==mxCHAR_CLASS ) { 186 | // object = mexFunction( string ) 187 | char *str = mxArrayToStringRobust(pr[0]); 188 | int status = jsonParse(str, &endptr, &val, allocator); 189 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 190 | pl[0] = json(val); mxFree(str); 191 | } else { 192 | // string = mexFunction( object ) 193 | ostrm S; S << std::setprecision(12); json(S,pr[0]); 194 | pl[0]=mxCreateStringRobust(S.str().c_str()); 195 | } 196 | 197 | } else if(!strcmp(action,"split")) { 198 | // strings = mexFunction( string, k ) 199 | if( nr!=2 ) mexErrMsgTxt("Two input expected."); 200 | char *str = mxArrayToStringRobust(pr[0]); 201 | int status = jsonParse(str, &endptr, &val, allocator); 202 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 203 | if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected"); 204 | siz i=0, t=0, n=length(val), k=(siz) mxGetScalar(pr[1]); 205 | k=(k>n)?n:(k<1)?1:k; k=ceil(n/ceil(double(n)/k)); 206 | pl[0]=mxCreateCellMatrix(1,k); ostrm S; S<value); t--; if(!o->next) t=0; S << (t ? "," : "]"); 210 | if(!t) mxSetCell(pl[0],i++,mxCreateStringRobust(S.str().c_str())); 211 | } 212 | 213 | } else if(!strcmp(action,"merge")) { 214 | // string = mexFunction( strings ) 215 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 216 | if(!mxIsCell(pr[0])) mexErrMsgTxt("Cell array expected."); 217 | siz n = mxGetNumberOfElements(pr[0]); 218 | ostrm S; S << std::setprecision(12); S << "["; 219 | for( siz i=0; ivalue) << (j->next ? "," : ""); 225 | mxFree(str); if(i1) 14 | % [ param1 ... paramN ] = getPrmDflt( prm, dfs, [checkExtra] ) 15 | % 16 | % INPUTS 17 | % prm - param struct or cell of form {'name1' v1 'name2' v2 ...} 18 | % dfs - cell of form {'name1' def1 'name2' def2 ...} 19 | % checkExtra - [0] if 1 throw error if prm contains params not in dfs 20 | % if -1 if prm contains params not in dfs adds them 21 | % 22 | % OUTPUTS (nargout==1) 23 | % prm - parameter struct with fields 'name1' through 'nameN' assigned 24 | % 25 | % OUTPUTS (nargout>1) 26 | % param1 - value assigned to parameter with 'name1' 27 | % ... 28 | % paramN - value assigned to parameter with 'nameN' 29 | % 30 | % EXAMPLE 31 | % dfs = { 'x','REQ', 'y',0, 'z',[], 'eps',1e-3 }; 32 | % prm = getPrmDflt( struct('x',1,'y',1), dfs ) 33 | % [ x y z eps ] = getPrmDflt( {'x',2,'y',1}, dfs ) 34 | % 35 | % See also INPUTPARSER 36 | % 37 | % Piotr's Computer Vision Matlab Toolbox Version 2.60 38 | % Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] 39 | % Licensed under the Simplified BSD License [see external/bsd.txt] 40 | 41 | if( mod(length(dfs),2) ), error('odd number of default parameters'); end 42 | if nargin<=2, checkExtra = 0; end 43 | 44 | % get the input parameters as two cell arrays: prmVal and prmField 45 | if iscell(prm) && length(prm)==1, prm=prm{1}; end 46 | if iscell(prm) 47 | if(mod(length(prm),2)), error('odd number of parameters in prm'); end 48 | prmField = prm(1:2:end); prmVal = prm(2:2:end); 49 | else 50 | if(~isstruct(prm)), error('prm must be a struct or a cell'); end 51 | prmVal = struct2cell(prm); prmField = fieldnames(prm); 52 | end 53 | 54 | % get and update default values using quick for loop 55 | dfsField = dfs(1:2:end); dfsVal = dfs(2:2:end); 56 | if checkExtra>0 57 | for i=1:length(prmField) 58 | j = find(strcmp(prmField{i},dfsField)); 59 | if isempty(j), error('parameter %s is not valid', prmField{i}); end 60 | dfsVal(j) = prmVal(i); 61 | end 62 | elseif checkExtra<0 63 | for i=1:length(prmField) 64 | j = find(strcmp(prmField{i},dfsField)); 65 | if isempty(j), j=length(dfsVal)+1; dfsField{j}=prmField{i}; end 66 | dfsVal(j) = prmVal(i); 67 | end 68 | else 69 | for i=1:length(prmField) 70 | dfsVal(strcmp(prmField{i},dfsField)) = prmVal(i); 71 | end 72 | end 73 | 74 | % check for missing values 75 | if any(strcmp('REQ',dfsVal)) 76 | cmpArray = find(strcmp('REQ',dfsVal)); 77 | error(['Required field ''' dfsField{cmpArray(1)} ''' not specified.'] ); 78 | end 79 | 80 | % set output 81 | if nargout==1 82 | varargout{1} = cell2struct( dfsVal, dfsField, 2 ); 83 | else 84 | varargout = dfsVal; 85 | end 86 | -------------------------------------------------------------------------------- /eval_city/cocoapi/MatlabAPI/private/maskApiMex.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "mex.h" 8 | #include "maskApi.h" 9 | #include 10 | 11 | void checkType( const mxArray *M, mxClassID id ) { 12 | if(mxGetClassID(M)!=id) mexErrMsgTxt("Invalid type."); 13 | } 14 | 15 | mxArray* toMxArray( const RLE *R, siz n ) { 16 | const char *fs[] = {"size", "counts"}; 17 | mxArray *M=mxCreateStructMatrix(1,n,2,fs); 18 | for( siz i=0; i1) mexErrMsgTxt(err); 35 | for( i=0; i<*n; i++ ) { 36 | mxArray *S, *C; double *s; void *c; 37 | S=mxGetFieldByNumber(M,i,O[0]); checkType(S,mxDOUBLE_CLASS); 38 | C=mxGetFieldByNumber(M,i,O[1]); s=mxGetPr(S); c=mxGetData(C); 39 | h=(siz)s[0]; w=(siz)s[1]; m=mxGetNumberOfElements(C); 40 | if(same && i>0 && (h!=R[0].h || w!=R[0].w)) mexErrMsgTxt(err); 41 | if( mxGetClassID(C)==mxDOUBLE_CLASS ) { 42 | rleInit(R+i,h,w,m,0); 43 | for(j=0; j=2) ? (mxGetScalar(pr[1])>0) : false; 74 | rleMerge(R,&M,n,intersect); pl[0]=toMxArray(&M,1); rleFree(&M); 75 | 76 | } else if(!strcmp(action,"area")) { 77 | R=frMxArray(pr[0],&n,0); 78 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 79 | uint *a=(uint*) mxGetPr(pl[0]); rleArea(R,n,a); 80 | 81 | } else if(!strcmp(action,"iou")) { 82 | if(nr>2) checkType(pr[2],mxUINT8_CLASS); siz nDt, nGt; 83 | byte *iscrowd = nr>2 ? (byte*) mxGetPr(pr[2]) : NULL; 84 | if(mxIsStruct(pr[0]) || mxIsStruct(pr[1])) { 85 | RLE *dt=frMxArray(pr[0],&nDt,1), *gt=frMxArray(pr[1],&nGt,1); 86 | pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL); 87 | double *o=mxGetPr(pl[0]); rleIou(dt,gt,nDt,nGt,iscrowd,o); 88 | rlesFree(&dt,nDt); rlesFree(>,nGt); 89 | } else { 90 | checkType(pr[0],mxDOUBLE_CLASS); checkType(pr[1],mxDOUBLE_CLASS); 91 | double *dt=mxGetPr(pr[0]); nDt=mxGetN(pr[0]); 92 | double *gt=mxGetPr(pr[1]); nGt=mxGetN(pr[1]); 93 | pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL); 94 | double *o=mxGetPr(pl[0]); bbIou(dt,gt,nDt,nGt,iscrowd,o); 95 | } 96 | 97 | } else if(!strcmp(action,"nms")) { 98 | siz n; uint *keep; double thr=(double) mxGetScalar(pr[1]); 99 | if(mxIsStruct(pr[0])) { 100 | RLE *dt=frMxArray(pr[0],&n,1); 101 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 102 | keep=(uint*) mxGetPr(pl[0]); rleNms(dt,n,keep,thr); 103 | rlesFree(&dt,n); 104 | } else { 105 | checkType(pr[0],mxDOUBLE_CLASS); 106 | double *dt=mxGetPr(pr[0]); n=mxGetN(pr[0]); 107 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 108 | keep=(uint*) mxGetPr(pl[0]); bbNms(dt,n,keep,thr); 109 | } 110 | 111 | } else if(!strcmp(action,"toBbox")) { 112 | R=frMxArray(pr[0],&n,0); 113 | pl[0]=mxCreateNumericMatrix(4,n,mxDOUBLE_CLASS,mxREAL); 114 | BB bb=mxGetPr(pl[0]); rleToBbox(R,bb,n); 115 | 116 | } else if(!strcmp(action,"frBbox")) { 117 | checkType(pr[0],mxDOUBLE_CLASS); 118 | double *bb=mxGetPr(pr[0]); n=mxGetN(pr[0]); 119 | h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]); 120 | rlesInit(&R,n); rleFrBbox(R,bb,h,w,n); pl[0]=toMxArray(R,n); 121 | 122 | } else if(!strcmp(action,"frPoly")) { 123 | checkType(pr[0],mxCELL_CLASS); n=mxGetNumberOfElements(pr[0]); 124 | h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]); rlesInit(&R,n); 125 | for(siz i=0; i malloc(h*w*n* sizeof(byte)) 85 | self._h = h 86 | self._w = w 87 | self._n = n 88 | # def __dealloc__(self): 89 | # the memory management of _mask has been passed to np.ndarray 90 | # it doesn't need to be freed here 91 | 92 | # called when passing into np.array() and return an np.ndarray in column-major order 93 | def __array__(self): 94 | cdef np.npy_intp shape[1] 95 | shape[0] = self._h*self._w*self._n 96 | # Create a 1D array, and reshape it to fortran/Matlab column-major array 97 | ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') 98 | # The _mask allocated by Masks is now handled by ndarray 99 | PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) 100 | return ndarray 101 | 102 | # internal conversion from Python RLEs object to compressed RLE format 103 | def _toString(RLEs Rs): 104 | cdef siz n = Rs.n 105 | cdef bytes py_string 106 | cdef char* c_string 107 | objs = [] 108 | for i in range(n): 109 | c_string = rleToString( &Rs._R[i] ) 110 | py_string = c_string 111 | objs.append({ 112 | 'size': [Rs._R[i].h, Rs._R[i].w], 113 | 'counts': py_string 114 | }) 115 | free(c_string) 116 | return objs 117 | 118 | # internal conversion from compressed RLE format to Python RLEs object 119 | def _frString(rleObjs): 120 | cdef siz n = len(rleObjs) 121 | Rs = RLEs(n) 122 | cdef bytes py_string 123 | cdef char* c_string 124 | for i, obj in enumerate(rleObjs): 125 | if PYTHON_VERSION == 2: 126 | py_string = str(obj['counts']).encode('utf8') 127 | elif PYTHON_VERSION == 3: 128 | py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] 129 | else: 130 | raise Exception('Python version must be 2 or 3') 131 | c_string = py_string 132 | rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) 133 | return Rs 134 | 135 | # encode mask to RLEs objects 136 | # list of RLE string can be generated by RLEs member function 137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): 138 | h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] 139 | cdef RLEs Rs = RLEs(n) 140 | rleEncode(Rs._R,mask.data,h,w,n) 141 | objs = _toString(Rs) 142 | return objs 143 | 144 | # decode mask from compressed list of RLE string or RLEs object 145 | def decode(rleObjs): 146 | cdef RLEs Rs = _frString(rleObjs) 147 | h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n 148 | masks = Masks(h, w, n) 149 | rleDecode(Rs._R, masks._mask, n); 150 | return np.array(masks) 151 | 152 | def merge(rleObjs, intersect=0): 153 | cdef RLEs Rs = _frString(rleObjs) 154 | cdef RLEs R = RLEs(1) 155 | rleMerge(Rs._R, R._R, Rs._n, intersect) 156 | obj = _toString(R)[0] 157 | return obj 158 | 159 | def area(rleObjs): 160 | cdef RLEs Rs = _frString(rleObjs) 161 | cdef uint* _a = malloc(Rs._n* sizeof(uint)) 162 | rleArea(Rs._R, Rs._n, _a) 163 | cdef np.npy_intp shape[1] 164 | shape[0] = Rs._n 165 | a = np.array((Rs._n, ), dtype=np.uint8) 166 | a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) 167 | PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) 168 | return a 169 | 170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox). 171 | def iou( dt, gt, pyiscrowd ): 172 | def _preproc(objs): 173 | if len(objs) == 0: 174 | return objs 175 | if type(objs) == np.ndarray: 176 | if len(objs.shape) == 1: 177 | objs = objs.reshape((objs[0], 1)) 178 | # check if it's Nx4 bbox 179 | if not len(objs.shape) == 2 or not objs.shape[1] == 4: 180 | raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') 181 | objs = objs.astype(np.double) 182 | elif type(objs) == list: 183 | # check if list is in box format and convert it to np.ndarray 184 | isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) 185 | isrle = np.all(np.array([type(obj) == dict for obj in objs])) 186 | if isbox: 187 | objs = np.array(objs, dtype=np.double) 188 | if len(objs.shape) == 1: 189 | objs = objs.reshape((1,objs.shape[0])) 190 | elif isrle: 191 | objs = _frString(objs) 192 | else: 193 | raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') 194 | else: 195 | raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') 196 | return objs 197 | def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 198 | rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) 199 | def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 200 | bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) 201 | def _len(obj): 202 | cdef siz N = 0 203 | if type(obj) == RLEs: 204 | N = obj.n 205 | elif len(obj)==0: 206 | pass 207 | elif type(obj) == np.ndarray: 208 | N = obj.shape[0] 209 | return N 210 | # convert iscrowd to numpy array 211 | cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) 212 | # simple type checking 213 | cdef siz m, n 214 | dt = _preproc(dt) 215 | gt = _preproc(gt) 216 | m = _len(dt) 217 | n = _len(gt) 218 | if m == 0 or n == 0: 219 | return [] 220 | if not type(dt) == type(gt): 221 | raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') 222 | 223 | # define local variables 224 | cdef double* _iou = 0 225 | cdef np.npy_intp shape[1] 226 | # check type and assign iou function 227 | if type(dt) == RLEs: 228 | _iouFun = _rleIou 229 | elif type(dt) == np.ndarray: 230 | _iouFun = _bbIou 231 | else: 232 | raise Exception('input data type not allowed.') 233 | _iou = malloc(m*n* sizeof(double)) 234 | iou = np.zeros((m*n, ), dtype=np.double) 235 | shape[0] = m*n 236 | iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) 237 | PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) 238 | _iouFun(dt, gt, iscrowd, m, n, iou) 239 | return iou.reshape((m,n), order='F') 240 | 241 | def toBbox( rleObjs ): 242 | cdef RLEs Rs = _frString(rleObjs) 243 | cdef siz n = Rs.n 244 | cdef BB _bb = malloc(4*n* sizeof(double)) 245 | rleToBbox( Rs._R, _bb, n ) 246 | cdef np.npy_intp shape[1] 247 | shape[0] = 4*n 248 | bb = np.array((1,4*n), dtype=np.double) 249 | bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) 250 | PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) 251 | return bb 252 | 253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): 254 | cdef siz n = bb.shape[0] 255 | Rs = RLEs(n) 256 | rleFrBbox( Rs._R, bb.data, h, w, n ) 257 | objs = _toString(Rs) 258 | return objs 259 | 260 | def frPoly( poly, siz h, siz w ): 261 | cdef np.ndarray[np.double_t, ndim=1] np_poly 262 | n = len(poly) 263 | Rs = RLEs(n) 264 | for i, p in enumerate(poly): 265 | np_poly = np.array(p, dtype=np.double, order='F') 266 | rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) 267 | objs = _toString(Rs) 268 | return objs 269 | 270 | def frUncompressedRLE(ucRles, siz h, siz w): 271 | cdef np.ndarray[np.uint32_t, ndim=1] cnts 272 | cdef RLE R 273 | cdef uint *data 274 | n = len(ucRles) 275 | objs = [] 276 | for i in range(n): 277 | Rs = RLEs(1) 278 | cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) 279 | # time for malloc can be saved here but it's fine 280 | data = malloc(len(cnts)* sizeof(uint)) 281 | for j in range(len(cnts)): 282 | data[j] = cnts[j] 283 | R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) 284 | Rs._R[0] = R 285 | objs.append(_toString(Rs)[0]) 286 | return objs 287 | 288 | def frPyObjects(pyobj, h, w): 289 | # encode rle from a list of python objects 290 | if type(pyobj) == np.ndarray: 291 | objs = frBbox(pyobj, h, w) 292 | elif type(pyobj) == list and len(pyobj[0]) == 4: 293 | objs = frBbox(pyobj, h, w) 294 | elif type(pyobj) == list and len(pyobj[0]) > 4: 295 | objs = frPoly(pyobj, h, w) 296 | elif type(pyobj) == list and type(pyobj[0]) == dict \ 297 | and 'counts' in pyobj[0] and 'size' in pyobj[0]: 298 | objs = frUncompressedRLE(pyobj, h, w) 299 | # encode rle from single python object 300 | elif type(pyobj) == list and len(pyobj) == 4: 301 | objs = frBbox([pyobj], h, w)[0] 302 | elif type(pyobj) == list and len(pyobj) > 4: 303 | objs = frPoly([pyobj], h, w)[0] 304 | elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: 305 | objs = frUncompressedRLE([pyobj], h, w)[0] 306 | else: 307 | raise Exception('input type is not supported.') 308 | return objs 309 | -------------------------------------------------------------------------------- /eval_city/cocoapi/PythonAPI/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /eval_city/cocoapi/PythonAPI/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | 'pycocotools._mask', 12 | sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'], 13 | include_dirs = [np.get_include(), '../common'], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | packages=['pycocotools'], 20 | package_dir = {'pycocotools': 'pycocotools'}, 21 | version='2.0', 22 | ext_modules= 23 | cythonize(ext_modules) 24 | ) -------------------------------------------------------------------------------- /eval_city/cocoapi/README.txt: -------------------------------------------------------------------------------- 1 | COCO API - http://cocodataset.org/ 2 | 3 | COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation. This package provides Matlab, Python, and Lua APIs that assists in loading, parsing, and visualizing the annotations in COCO. Please visit http://cocodataset.org/ for more information on COCO, including for the data, paper, and tutorials. The exact format of the annotations is also described on the COCO website. The Matlab and Python APIs are complete, the Lua API provides only basic functionality. 4 | 5 | In addition to this API, please download both the COCO images and annotations in order to run the demos and use the API. Both are available on the project website. 6 | -Please download, unzip, and place the images in: coco/images/ 7 | -Please download and place the annotations in: coco/annotations/ 8 | For substantially more details on the API please see http://cocodataset.org/#download. 9 | 10 | After downloading the images and annotations, run the Matlab, Python, or Lua demos for example usage. 11 | 12 | To install: 13 | -For Matlab, add coco/MatlabApi to the Matlab path (OSX/Linux binaries provided) 14 | -For Python, run "make" under coco/PythonAPI 15 | -For Lua, run “luarocks make LuaAPI/rocks/coco-scm-1.rockspec” under coco/ 16 | -------------------------------------------------------------------------------- /eval_city/cocoapi/common/gason.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #include "gason.h" 3 | #include 4 | 5 | #define JSON_ZONE_SIZE 4096 6 | #define JSON_STACK_SIZE 32 7 | 8 | const char *jsonStrError(int err) { 9 | switch (err) { 10 | #define XX(no, str) \ 11 | case JSON_##no: \ 12 | return str; 13 | JSON_ERRNO_MAP(XX) 14 | #undef XX 15 | default: 16 | return "unknown"; 17 | } 18 | } 19 | 20 | void *JsonAllocator::allocate(size_t size) { 21 | size = (size + 7) & ~7; 22 | 23 | if (head && head->used + size <= JSON_ZONE_SIZE) { 24 | char *p = (char *)head + head->used; 25 | head->used += size; 26 | return p; 27 | } 28 | 29 | size_t allocSize = sizeof(Zone) + size; 30 | Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize); 31 | if (zone == nullptr) 32 | return nullptr; 33 | zone->used = allocSize; 34 | if (allocSize <= JSON_ZONE_SIZE || head == nullptr) { 35 | zone->next = head; 36 | head = zone; 37 | } else { 38 | zone->next = head->next; 39 | head->next = zone; 40 | } 41 | return (char *)zone + sizeof(Zone); 42 | } 43 | 44 | void JsonAllocator::deallocate() { 45 | while (head) { 46 | Zone *next = head->next; 47 | free(head); 48 | head = next; 49 | } 50 | } 51 | 52 | static inline bool isspace(char c) { 53 | return c == ' ' || (c >= '\t' && c <= '\r'); 54 | } 55 | 56 | static inline bool isdelim(char c) { 57 | return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c; 58 | } 59 | 60 | static inline bool isdigit(char c) { 61 | return c >= '0' && c <= '9'; 62 | } 63 | 64 | static inline bool isxdigit(char c) { 65 | return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F'); 66 | } 67 | 68 | static inline int char2int(char c) { 69 | if (c <= '9') 70 | return c - '0'; 71 | return (c & ~' ') - 'A' + 10; 72 | } 73 | 74 | static double string2double(char *s, char **endptr) { 75 | char ch = *s; 76 | if (ch == '-') 77 | ++s; 78 | 79 | double result = 0; 80 | while (isdigit(*s)) 81 | result = (result * 10) + (*s++ - '0'); 82 | 83 | if (*s == '.') { 84 | ++s; 85 | 86 | double fraction = 1; 87 | while (isdigit(*s)) { 88 | fraction *= 0.1; 89 | result += (*s++ - '0') * fraction; 90 | } 91 | } 92 | 93 | if (*s == 'e' || *s == 'E') { 94 | ++s; 95 | 96 | double base = 10; 97 | if (*s == '+') 98 | ++s; 99 | else if (*s == '-') { 100 | ++s; 101 | base = 0.1; 102 | } 103 | 104 | unsigned int exponent = 0; 105 | while (isdigit(*s)) 106 | exponent = (exponent * 10) + (*s++ - '0'); 107 | 108 | double power = 1; 109 | for (; exponent; exponent >>= 1, base *= base) 110 | if (exponent & 1) 111 | power *= base; 112 | 113 | result *= power; 114 | } 115 | 116 | *endptr = s; 117 | return ch == '-' ? -result : result; 118 | } 119 | 120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) { 121 | if (!tail) 122 | return node->next = node; 123 | node->next = tail->next; 124 | tail->next = node; 125 | return node; 126 | } 127 | 128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) { 129 | if (tail) { 130 | auto head = tail->next; 131 | tail->next = nullptr; 132 | return JsonValue(tag, head); 133 | } 134 | return JsonValue(tag, nullptr); 135 | } 136 | 137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) { 138 | JsonNode *tails[JSON_STACK_SIZE]; 139 | JsonTag tags[JSON_STACK_SIZE]; 140 | char *keys[JSON_STACK_SIZE]; 141 | JsonValue o; 142 | int pos = -1; 143 | bool separator = true; 144 | JsonNode *node; 145 | *endptr = s; 146 | 147 | while (*s) { 148 | while (isspace(*s)) { 149 | ++s; 150 | if (!*s) break; 151 | } 152 | *endptr = s++; 153 | switch (**endptr) { 154 | case '-': 155 | if (!isdigit(*s) && *s != '.') { 156 | *endptr = s; 157 | return JSON_BAD_NUMBER; 158 | } 159 | case '0': 160 | case '1': 161 | case '2': 162 | case '3': 163 | case '4': 164 | case '5': 165 | case '6': 166 | case '7': 167 | case '8': 168 | case '9': 169 | o = JsonValue(string2double(*endptr, &s)); 170 | if (!isdelim(*s)) { 171 | *endptr = s; 172 | return JSON_BAD_NUMBER; 173 | } 174 | break; 175 | case '"': 176 | o = JsonValue(JSON_STRING, s); 177 | for (char *it = s; *s; ++it, ++s) { 178 | int c = *it = *s; 179 | if (c == '\\') { 180 | c = *++s; 181 | switch (c) { 182 | case '\\': 183 | case '"': 184 | case '/': 185 | *it = c; 186 | break; 187 | case 'b': 188 | *it = '\b'; 189 | break; 190 | case 'f': 191 | *it = '\f'; 192 | break; 193 | case 'n': 194 | *it = '\n'; 195 | break; 196 | case 'r': 197 | *it = '\r'; 198 | break; 199 | case 't': 200 | *it = '\t'; 201 | break; 202 | case 'u': 203 | c = 0; 204 | for (int i = 0; i < 4; ++i) { 205 | if (isxdigit(*++s)) { 206 | c = c * 16 + char2int(*s); 207 | } else { 208 | *endptr = s; 209 | return JSON_BAD_STRING; 210 | } 211 | } 212 | if (c < 0x80) { 213 | *it = c; 214 | } else if (c < 0x800) { 215 | *it++ = 0xC0 | (c >> 6); 216 | *it = 0x80 | (c & 0x3F); 217 | } else { 218 | *it++ = 0xE0 | (c >> 12); 219 | *it++ = 0x80 | ((c >> 6) & 0x3F); 220 | *it = 0x80 | (c & 0x3F); 221 | } 222 | break; 223 | default: 224 | *endptr = s; 225 | return JSON_BAD_STRING; 226 | } 227 | } else if ((unsigned int)c < ' ' || c == '\x7F') { 228 | *endptr = s; 229 | return JSON_BAD_STRING; 230 | } else if (c == '"') { 231 | *it = 0; 232 | ++s; 233 | break; 234 | } 235 | } 236 | if (!isdelim(*s)) { 237 | *endptr = s; 238 | return JSON_BAD_STRING; 239 | } 240 | break; 241 | case 't': 242 | if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3]))) 243 | return JSON_BAD_IDENTIFIER; 244 | o = JsonValue(JSON_TRUE); 245 | s += 3; 246 | break; 247 | case 'f': 248 | if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4]))) 249 | return JSON_BAD_IDENTIFIER; 250 | o = JsonValue(JSON_FALSE); 251 | s += 4; 252 | break; 253 | case 'n': 254 | if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3]))) 255 | return JSON_BAD_IDENTIFIER; 256 | o = JsonValue(JSON_NULL); 257 | s += 3; 258 | break; 259 | case ']': 260 | if (pos == -1) 261 | return JSON_STACK_UNDERFLOW; 262 | if (tags[pos] != JSON_ARRAY) 263 | return JSON_MISMATCH_BRACKET; 264 | o = listToValue(JSON_ARRAY, tails[pos--]); 265 | break; 266 | case '}': 267 | if (pos == -1) 268 | return JSON_STACK_UNDERFLOW; 269 | if (tags[pos] != JSON_OBJECT) 270 | return JSON_MISMATCH_BRACKET; 271 | if (keys[pos] != nullptr) 272 | return JSON_UNEXPECTED_CHARACTER; 273 | o = listToValue(JSON_OBJECT, tails[pos--]); 274 | break; 275 | case '[': 276 | if (++pos == JSON_STACK_SIZE) 277 | return JSON_STACK_OVERFLOW; 278 | tails[pos] = nullptr; 279 | tags[pos] = JSON_ARRAY; 280 | keys[pos] = nullptr; 281 | separator = true; 282 | continue; 283 | case '{': 284 | if (++pos == JSON_STACK_SIZE) 285 | return JSON_STACK_OVERFLOW; 286 | tails[pos] = nullptr; 287 | tags[pos] = JSON_OBJECT; 288 | keys[pos] = nullptr; 289 | separator = true; 290 | continue; 291 | case ':': 292 | if (separator || keys[pos] == nullptr) 293 | return JSON_UNEXPECTED_CHARACTER; 294 | separator = true; 295 | continue; 296 | case ',': 297 | if (separator || keys[pos] != nullptr) 298 | return JSON_UNEXPECTED_CHARACTER; 299 | separator = true; 300 | continue; 301 | case '\0': 302 | continue; 303 | default: 304 | return JSON_UNEXPECTED_CHARACTER; 305 | } 306 | 307 | separator = false; 308 | 309 | if (pos == -1) { 310 | *endptr = s; 311 | *value = o; 312 | return JSON_OK; 313 | } 314 | 315 | if (tags[pos] == JSON_OBJECT) { 316 | if (!keys[pos]) { 317 | if (o.getTag() != JSON_STRING) 318 | return JSON_UNQUOTED_KEY; 319 | keys[pos] = o.toString(); 320 | continue; 321 | } 322 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr) 323 | return JSON_ALLOCATION_FAILURE; 324 | tails[pos] = insertAfter(tails[pos], node); 325 | tails[pos]->key = keys[pos]; 326 | keys[pos] = nullptr; 327 | } else { 328 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr) 329 | return JSON_ALLOCATION_FAILURE; 330 | tails[pos] = insertAfter(tails[pos], node); 331 | } 332 | tails[pos]->value = o; 333 | } 334 | return JSON_BREAKING_BAD; 335 | } 336 | -------------------------------------------------------------------------------- /eval_city/cocoapi/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /eval_city/cocoapi/common/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | siz i; *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && athr) keep[j]=0; 105 | } 106 | } 107 | } 108 | 109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { 110 | double h, w, i, u, ga, da; siz g, d; int crowd; 111 | for( g=0; gthr) keep[j]=0; 129 | } 130 | } 131 | } 132 | 133 | void rleToBbox( const RLE *R, BB bb, siz n ) { 134 | siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); 173 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 174 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 175 | if(dx>=dy) for( d=0; d<=dx; d++ ) { 176 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 177 | } else for( d=0; d<=dy; d++ ) { 178 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 179 | } 180 | } 181 | /* get points along y-boundary and downsample */ 182 | free(x); free(y); k=m; m=0; double xd, yd; 183 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 184 | for( j=1; jw-1 ) continue; 187 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 189 | x[m]=(int) xd; y[m]=(int) yd; m++; 190 | } 191 | /* compute rle encoding given y-boundary points */ 192 | k=m; a=malloc(sizeof(uint)*(k+1)); 193 | for( j=0; j0) b[m++]=a[j++]; else { 199 | j++; if(jm, p=0; long x; int more; 206 | char *s=malloc(sizeof(char)*m*6); 207 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 209 | while( more ) { 210 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 211 | if(more) c |= 0x20; c+=48; s[p++]=c; 212 | } 213 | } 214 | s[p]=0; return s; 215 | } 216 | 217 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 218 | siz m=0, p=0, k; long x; int more; uint *cnts; 219 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 220 | while( s[p] ) { 221 | x=0; k=0; more=1; 222 | while( more ) { 223 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 224 | more = c & 0x20; p++; k++; 225 | if(!more && (c & 0x10)) x |= -1 << 5*k; 226 | } 227 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 228 | } 229 | rleInit(R,h,w,m,cnts); free(cnts); 230 | } 231 | -------------------------------------------------------------------------------- /eval_city/cocoapi/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /eval_city/cocoapi/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /eval_city/dt_txt2json.m: -------------------------------------------------------------------------------- 1 | clear 2 | addpath('./cocoapi/MatlabAPI') 3 | main_path = '../res/'; 4 | subdir = dir(main_path); 5 | for j = 3 : length(subdir) 6 | ndt=0; 7 | dt_coco = struct(); 8 | dt_path = fullfile(main_path, subdir(j).name); 9 | % res = load([dt_path,'/val_500_det.txt'],'%f'); 10 | res = load([dt_path,'/val_det.txt'],'%f'); 11 | num_imgs = max(res(:,1)); 12 | out = [dt_path,'/val_dt.json']; 13 | if exist(out,'file') 14 | continue 15 | end 16 | for i = 1:num_imgs 17 | bbs = res(res(:,1)==i,:); 18 | for ibb=1:size(bbs,1) 19 | ndt=ndt+1; 20 | bb=bbs(ibb,2:6); 21 | dt_coco(ndt).image_id=i; 22 | dt_coco(ndt).category_id=1; 23 | dt_coco(ndt).bbox=bb(1:4); 24 | dt_coco(ndt).score=bb(5); 25 | end 26 | end 27 | dt_string = gason(dt_coco); 28 | fp = fopen(out,'w'); 29 | fprintf(fp,'%s',dt_string); 30 | fclose(fp); 31 | end -------------------------------------------------------------------------------- /eval_city/eval_script/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/eval_city/eval_script/__init__.py -------------------------------------------------------------------------------- /eval_city/eval_script/eval_demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | from coco import COCO 3 | from eval_MR_multisetup import COCOeval 4 | 5 | 6 | def validate(annFile, dt_path): 7 | mean_MR = [] 8 | for id_setup in range(0, 4): 9 | cocoGt = COCO(annFile) 10 | cocoDt = cocoGt.loadRes(dt_path) 11 | imgIds = sorted(cocoGt.getImgIds()) 12 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 13 | cocoEval.params.imgIds = imgIds 14 | cocoEval.evaluate(id_setup) 15 | cocoEval.accumulate() 16 | mean_MR.append(cocoEval.summarize_nofile(id_setup)) 17 | return mean_MR 18 | 19 | if __name__=='__main__': 20 | annType = 'bbox' #specify type here 21 | 22 | #initialize COCO ground truth api 23 | annFile = '../val_gt.json' 24 | main_path = '../../output/valresults/city/h/off' 25 | for f in sorted(os.listdir(main_path)): 26 | print f 27 | # initialize COCO detections api 28 | dt_path = os.path.join(main_path, f) 29 | resFile = os.path.join(dt_path,'val_dt.json') 30 | respath = os.path.join(dt_path,'results.txt') 31 | # if os.path.exists(respath): 32 | # continue 33 | ## running evaluation 34 | res_file = open(respath, "w") 35 | for id_setup in range(0,1): 36 | cocoGt = COCO(annFile) 37 | cocoDt = cocoGt.loadRes(resFile) 38 | imgIds = sorted(cocoGt.getImgIds()) 39 | cocoEval = COCOeval(cocoGt,cocoDt,annType) 40 | cocoEval.params.imgIds = imgIds 41 | cocoEval.evaluate(id_setup) 42 | cocoEval.accumulate() 43 | cocoEval.summarize(id_setup,res_file) 44 | 45 | res_file.close() -------------------------------------------------------------------------------- /eval_city/eval_script/readme.txt: -------------------------------------------------------------------------------- 1 | ################################ 2 | This script is created to produce miss rate numbers by making minor changes to the COCO python evaluation script [1]. It is a python re-implementation of the Caltech evaluation code, which is written in matlab. This python script produces exactly the same numbers as the matlab code. 3 | [1] https://github.com/pdollar/coco/tree/master/PythonAPI 4 | [2] http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/code/code3.2.1.zip 5 | ################################# 6 | Usage 7 | 1. Prepare detection results in COCO format, and write them in a single .json file. 8 | 2. Run eval_demo.py. 9 | 3. Detailed evaluations will be written to results.txt. 10 | ################################# 11 | 12 | -------------------------------------------------------------------------------- /eval_city/readme.txt: -------------------------------------------------------------------------------- 1 | Submitting results 2 | Please write the detection results for all test images in a single .json file (see dt_mat2json.m), and then send to shanshan.zhang@njust.edu.cn. 3 | You'll receive the evaulation results via e-mail, and then you decide whether to publish it or not. 4 | If you would like to publish your results on the dashboard, please also specify a name of your method. 5 | 6 | Metrics 7 | We use the same protocol as in [1] for evaluation. As a numerical measure of the performance, log-average miss rate (MR) is computed by averaging over the precision range of [10e-2; 10e0] FPPI (false positives per image). 8 | For detailed evaluation, we consider the following 4 subsets: 9 | 1. 'Reasonable': height [50, inf]; visibility [0.65, inf] 10 | 2. 'Reasonable_small': height [50, 75]; visibility [0.65, inf] 11 | 3. 'Reasonable_occ=heavy': height [50, inf]; visibility [0.2, 0.65] 12 | 4. 'All': height [20, inf]; visibility [0.2, inf] 13 | 14 | 15 | Reference 16 | [1] P. Dollar, C. Wojek, B. Schiele and P. Perona. Pedestrian Detection: An Evaluation of the State of the Art. TPAMI, 2012. 17 | -------------------------------------------------------------------------------- /net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/net/__init__.py -------------------------------------------------------------------------------- /net/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/net/__init__.pyc -------------------------------------------------------------------------------- /net/l2norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd import Variable 5 | import torch.nn.init as init 6 | 7 | 8 | class L2Norm(nn.Module): 9 | def __init__(self, n_channels, scale): 10 | super(L2Norm, self).__init__() 11 | self.n_channels = n_channels 12 | self.gamma = scale or None 13 | self.eps = 1e-10 14 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 15 | self.reset_parameters() 16 | 17 | def reset_parameters(self): 18 | init.constant_(self.weight, self.gamma) 19 | 20 | def forward(self, x): 21 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps 22 | x = torch.div(x, norm) 23 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 24 | return out 25 | -------------------------------------------------------------------------------- /net/l2norm.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/net/l2norm.pyc -------------------------------------------------------------------------------- /net/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | 5 | class cls_pos(nn.Module): 6 | def __init__(self): 7 | super(cls_pos, self).__init__() 8 | self.bce = nn.BCELoss(reduction='none') 9 | 10 | def forward(self, pos_pred, pos_label): # 0-gauss 1-mask 2-center 11 | log_loss = self.bce(pos_pred[:, 0, :, :], pos_label[:, 2, :, :]) 12 | 13 | positives = pos_label[:, 2, :, :] 14 | negatives = pos_label[:, 1, :, :] - pos_label[:, 2, :, :] 15 | 16 | fore_weight = positives * (1.0-pos_pred[:, 0, :, :]) ** 2 17 | back_weight = negatives * ((1.0-pos_label[:, 0, :, :])**4.0) * (pos_pred[:, 0, :, :]**2.0) 18 | 19 | focal_weight = fore_weight + back_weight 20 | assigned_box = torch.sum(pos_label[:, 2, :, :]) 21 | 22 | cls_loss = 0.01 * torch.sum(focal_weight*log_loss) / max(1.0, assigned_box) 23 | 24 | return cls_loss 25 | 26 | 27 | class reg_pos(nn.Module): 28 | def __init__(self): 29 | super(reg_pos, self).__init__() 30 | self.smoothl1 = nn.SmoothL1Loss(reduction='none') 31 | 32 | def forward(self, h_pred, h_label): 33 | l1_loss = h_label[:, 1, :, :]*self.smoothl1(h_pred[:, 0, :, :]/(h_label[:, 0, :, :]+1e-10), 34 | h_label[:, 0, :, :]/(h_label[:, 0, :, :]+1e-10)) 35 | reg_loss = torch.sum(l1_loss) / max(1.0, torch.sum(h_label[:, 1, :, :])) 36 | return reg_loss 37 | 38 | 39 | class offset_pos(nn.Module): 40 | def __init__(self): 41 | super(offset_pos, self).__init__() 42 | self.smoothl1 = nn.SmoothL1Loss(reduction='none') 43 | 44 | def forward(self, offset_pred, offset_label): 45 | l1_loss = offset_label[:, 2, :, :].unsqueeze(dim=1)*self.smoothl1(offset_pred, offset_label[:, :2, :, :]) 46 | off_loss = 0.1 * torch.sum(l1_loss) / max(1.0, torch.sum(offset_label[:, 2, :, :])) 47 | return off_loss 48 | -------------------------------------------------------------------------------- /net/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import torch.nn as nn 4 | from resnet import * 5 | from l2norm import L2Norm 6 | 7 | 8 | class CSPNet(nn.Module): 9 | def __init__(self): 10 | super(CSPNet, self).__init__() 11 | 12 | resnet = resnet50(pretrained=True, receptive_keep=True) 13 | 14 | self.conv1 = resnet.conv1 15 | self.bn1 = resnet.bn1 16 | self.relu = resnet.relu 17 | self.maxpool = resnet.maxpool 18 | self.layer1 = resnet.layer1 19 | self.layer2 = resnet.layer2 20 | self.layer3 = resnet.layer3 21 | self.layer4 = resnet.layer4 22 | 23 | self.p3 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1) 24 | self.p4 = nn.ConvTranspose2d(1024, 256, kernel_size=4, stride=4, padding=0) 25 | self.p5 = nn.ConvTranspose2d(2048, 256, kernel_size=4, stride=4, padding=0) 26 | 27 | nn.init.xavier_normal_(self.p3.weight) 28 | nn.init.xavier_normal_(self.p4.weight) 29 | nn.init.xavier_normal_(self.p5.weight) 30 | nn.init.constant_(self.p3.bias, 0) 31 | nn.init.constant_(self.p4.bias, 0) 32 | nn.init.constant_(self.p5.bias, 0) 33 | 34 | self.p3_l2 = L2Norm(256, 10) 35 | self.p4_l2 = L2Norm(256, 10) 36 | self.p5_l2 = L2Norm(256, 10) 37 | 38 | self.feat = nn.Conv2d(768, 256, kernel_size=3, stride=1, padding=1, bias=False) 39 | self.feat_bn = nn.BatchNorm2d(256, momentum=0.01) 40 | self.feat_act = nn.ReLU(inplace=True) 41 | 42 | self.pos_conv = nn.Conv2d(256, 1, kernel_size=1) 43 | self.reg_conv = nn.Conv2d(256, 1, kernel_size=1) 44 | self.off_conv = nn.Conv2d(256, 2, kernel_size=1) 45 | 46 | nn.init.xavier_normal_(self.feat.weight) 47 | nn.init.xavier_normal_(self.pos_conv.weight) 48 | nn.init.xavier_normal_(self.reg_conv.weight) 49 | nn.init.xavier_normal_(self.off_conv.weight) 50 | nn.init.constant_(self.pos_conv.bias, -math.log(0.99/0.01)) 51 | nn.init.constant_(self.reg_conv.bias, 0) 52 | nn.init.constant_(self.off_conv.bias, 0) 53 | 54 | def forward(self, x): 55 | x = self.conv1(x) 56 | x = self.bn1(x) 57 | x = self.relu(x) 58 | x = self.maxpool(x) 59 | 60 | x = self.layer1(x) 61 | 62 | x = self.layer2(x) 63 | p3 = self.p3(x) 64 | p3 = self.p3_l2(p3) 65 | 66 | x = self.layer3(x) 67 | p4 = self.p4(x) 68 | p4 = self.p4_l2(p4) 69 | 70 | x = self.layer4(x) 71 | p5 = self.p5(x) 72 | p5 = self.p5_l2(p5) 73 | 74 | cat = torch.cat([p3, p4, p5], dim=1) 75 | 76 | feat = self.feat(cat) 77 | feat = self.feat_bn(feat) 78 | feat = self.feat_act(feat) 79 | 80 | x_cls = self.pos_conv(feat) 81 | x_cls = torch.sigmoid(x_cls) 82 | x_reg = self.reg_conv(feat) 83 | x_off = self.off_conv(feat) 84 | 85 | return x_cls, x_reg, x_off 86 | 87 | # def train(self, mode=True): 88 | # # Override train so that the training mode is set as we want 89 | # nn.Module.train(self, mode) 90 | # if mode: 91 | # # Set fixed blocks to be in eval mode 92 | # self.conv1.eval() 93 | # self.layer1.eval() 94 | # 95 | # # bn is trainable in CONV2 96 | # def set_bn_train(m): 97 | # class_name = m.__class__.__name__ 98 | # if class_name.find('BatchNorm') != -1: 99 | # m.train() 100 | # else: 101 | # m.eval() 102 | # self.layer1.apply(set_bn_train) 103 | 104 | 105 | # to do 106 | # 107 | # constuct a bn fixed CSP 108 | # 109 | # 110 | # 111 | # 112 | # 113 | 114 | 115 | 116 | class CSPNet_mod(nn.Module): 117 | # This is Batchnorm fixed version of CSP 118 | # under construction !!!!!!!!!!!!!!!!!!! 119 | def __init__(self): 120 | super(CSPNet_mod, self).__init__() 121 | 122 | resnet = resnet50(pretrained=True, receptive_keep=True) 123 | 124 | self.conv1 = resnet.conv1 125 | self.bn1 = resnet.bn1 126 | self.relu = resnet.relu 127 | self.maxpool = resnet.maxpool 128 | self.layer1 = resnet.layer1 129 | self.layer2 = resnet.layer2 130 | self.layer3 = resnet.layer3 131 | self.layer4 = resnet.layer4 132 | 133 | self.p3 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1) 134 | self.p4 = nn.ConvTranspose2d(1024, 256, kernel_size=4, stride=4, padding=0) 135 | self.p5 = nn.ConvTranspose2d(2048, 256, kernel_size=4, stride=4, padding=0) 136 | 137 | nn.init.xavier_normal_(self.p3.weight) 138 | nn.init.xavier_normal_(self.p4.weight) 139 | nn.init.xavier_normal_(self.p5.weight) 140 | nn.init.constant_(self.p3.bias, 0) 141 | nn.init.constant_(self.p4.bias, 0) 142 | nn.init.constant_(self.p5.bias, 0) 143 | 144 | self.p3_l2 = L2Norm(256, 10) 145 | self.p4_l2 = L2Norm(256, 10) 146 | self.p5_l2 = L2Norm(256, 10) 147 | 148 | self.feat = nn.Conv2d(768, 256, kernel_size=3, stride=1, padding=1, bias=True) 149 | self.feat_act = nn.ReLU(inplace=True) 150 | 151 | self.pos_conv = nn.Conv2d(256, 1, kernel_size=1) 152 | self.reg_conv = nn.Conv2d(256, 1, kernel_size=1) 153 | self.off_conv = nn.Conv2d(256, 2, kernel_size=1) 154 | 155 | nn.init.xavier_normal_(self.feat.weight) 156 | nn.init.xavier_normal_(self.pos_conv.weight) 157 | nn.init.xavier_normal_(self.reg_conv.weight) 158 | nn.init.xavier_normal_(self.off_conv.weight) 159 | 160 | nn.init.constant_(self.feat.bias, 0) 161 | nn.init.constant_(self.reg_conv.bias, -math.log(0.99/0.01)) 162 | nn.init.constant_(self.pos_conv.bias, 0) 163 | nn.init.constant_(self.off_conv.bias, 0) 164 | 165 | for p in self.conv1.parameters(): 166 | p.requires_grad = False 167 | for p in self.bn1.parameters(): 168 | p.requires_grad = False 169 | for p in self.layer1.parameters(): 170 | p.requires_grad = False 171 | 172 | def set_bn_fix(m): 173 | classname = m.__class__.__name__ 174 | if classname.find('BatchNorm') != -1: 175 | for p in m.parameters(): p.requires_grad = False 176 | 177 | self.layer2.apply(set_bn_fix) 178 | self.layer3.apply(set_bn_fix) 179 | self.layer4.apply(set_bn_fix) 180 | 181 | def forward(self, x): 182 | x = self.conv1(x) 183 | x = self.bn1(x) 184 | x = self.relu(x) 185 | x = self.maxpool(x) 186 | 187 | x = self.layer1(x) 188 | 189 | x = self.layer2(x) 190 | p3 = self.p3(x) 191 | p3 = self.p3_l2(p3) 192 | 193 | x = self.layer3(x) 194 | p4 = self.p4(x) 195 | p4 = self.p4_l2(p4) 196 | 197 | x = self.layer4(x) 198 | p5 = self.p5(x) 199 | p5 = self.p5_l2(p5) 200 | 201 | cat = torch.cat([p3, p4, p5], dim=1) 202 | 203 | feat = self.feat(cat) 204 | feat = self.feat_act(feat) 205 | 206 | x_cls = self.pos_conv(feat) 207 | x_cls = torch.sigmoid(x_cls) 208 | x_reg = self.reg_conv(feat) 209 | x_off = self.off_conv(feat) 210 | 211 | return x_cls, x_reg, x_off 212 | 213 | def train(self, mode=True): 214 | # Override train so that the training mode is set as we want 215 | nn.Module.train(self, mode) 216 | if mode: 217 | # Set fixed blocks to be in eval mode 218 | self.conv1.eval() 219 | self.bn1.eval() 220 | self.layer1.eval() 221 | 222 | def set_bn_eval(m): 223 | classname = m.__class__.__name__ 224 | if classname.find('BatchNorm') != -1: 225 | m.eval() 226 | 227 | self.layer2.apply(set_bn_eval) 228 | self.layer3.apply(set_bn_eval) 229 | self.layer4.apply(set_bn_eval) 230 | -------------------------------------------------------------------------------- /net/network.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/net/network.pyc -------------------------------------------------------------------------------- /net/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | 6 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 7 | 'resnet152'] 8 | 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 16 | } 17 | 18 | 19 | def conv3x3(in_planes, out_planes, stride=1): 20 | """3x3 convolution with padding""" 21 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 22 | padding=1, bias=False) 23 | 24 | 25 | class BasicBlock(nn.Module): 26 | expansion = 1 27 | 28 | def __init__(self, inplanes, planes, stride=1, downsample=None): 29 | super(BasicBlock, self).__init__() 30 | self.conv1 = conv3x3(inplanes, planes, stride) 31 | self.bn1 = nn.BatchNorm2d(planes) 32 | self.relu = nn.ReLU(inplace=True) 33 | self.conv2 = conv3x3(planes, planes) 34 | self.bn2 = nn.BatchNorm2d(planes) 35 | self.downsample = downsample 36 | self.stride = stride 37 | 38 | def forward(self, x): 39 | residual = x 40 | 41 | out = self.conv1(x) 42 | out = self.bn1(out) 43 | out = self.relu(out) 44 | 45 | out = self.conv2(out) 46 | out = self.bn2(out) 47 | 48 | if self.downsample is not None: 49 | residual = self.downsample(x) 50 | 51 | out += residual 52 | out = self.relu(out) 53 | 54 | return out 55 | 56 | 57 | class Bottleneck(nn.Module): 58 | expansion = 4 59 | 60 | def __init__(self, inplanes, planes, stride=1, dilate=1, downsample=None): 61 | super(Bottleneck, self).__init__() 62 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) 63 | self.bn1 = nn.BatchNorm2d(planes, momentum=0.01) 64 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 65 | dilation=dilate, padding=dilate, bias=False) 66 | self.bn2 = nn.BatchNorm2d(planes, momentum=0.01) 67 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 68 | self.bn3 = nn.BatchNorm2d(planes * 4, momentum=0.01) 69 | self.relu = nn.ReLU(inplace=True) 70 | self.downsample = downsample 71 | self.stride = stride 72 | 73 | def forward(self, x): 74 | residual = x 75 | 76 | out = self.conv1(x) 77 | out = self.bn1(out) 78 | out = self.relu(out) 79 | 80 | out = self.conv2(out) 81 | out = self.bn2(out) 82 | out = self.relu(out) 83 | 84 | out = self.conv3(out) 85 | out = self.bn3(out) 86 | 87 | if self.downsample is not None: 88 | residual = self.downsample(x) 89 | 90 | out += residual 91 | out = self.relu(out) 92 | 93 | return out 94 | 95 | 96 | class ResNet(nn.Module): 97 | 98 | def __init__(self, block, layers, receptive_keep=True): 99 | self.inplanes = 64 100 | super(ResNet, self).__init__() 101 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 102 | bias=False) 103 | self.bn1 = nn.BatchNorm2d(64, momentum=0.01) 104 | self.relu = nn.ReLU(inplace=True) 105 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 106 | self.layer1 = self._make_layer(block, 64, layers[0], stride=1, dilate=1) 107 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=1) 108 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=1) 109 | if receptive_keep: 110 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilate=2) 111 | else: 112 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=1) 113 | 114 | self.avgpool = nn.AvgPool2d(7, stride=1) 115 | self.fc = nn.Linear(512 * block.expansion, 1000) 116 | 117 | for m in self.modules(): 118 | if isinstance(m, nn.Conv2d): 119 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 120 | m.weight.data.normal_(0, math.sqrt(2. / n)) 121 | elif isinstance(m, nn.BatchNorm2d): 122 | m.weight.data.fill_(1) 123 | m.bias.data.zero_() 124 | 125 | def _make_layer(self, block, planes, blocks, stride=1, dilate=1): 126 | downsample = None 127 | if stride != 1 or self.inplanes != planes * block.expansion: 128 | downsample = nn.Sequential( 129 | nn.Conv2d(self.inplanes, planes * block.expansion, 130 | kernel_size=1, stride=stride, bias=False), 131 | nn.BatchNorm2d(planes * block.expansion, momentum=0.01), 132 | ) 133 | 134 | layers = [] 135 | layers.append(block(self.inplanes, planes, stride, dilate, downsample)) 136 | self.inplanes = planes * block.expansion 137 | for i in range(1, blocks): 138 | layers.append(block(self.inplanes, planes, 1, dilate)) 139 | 140 | return nn.Sequential(*layers) 141 | 142 | def forward(self, x): 143 | x = self.conv1(x) 144 | x = self.bn1(x) 145 | x = self.relu(x) 146 | x = self.maxpool(x) 147 | 148 | x = self.layer1(x) 149 | x = self.layer2(x) 150 | x = self.layer3(x) 151 | x = self.layer4(x) 152 | 153 | return x 154 | 155 | 156 | def resnet18(pretrained=False, **kwargs): 157 | """Constructs a ResNet-18 model. 158 | 159 | Args: 160 | pretrained (bool): If True, returns a model pre-trained on ImageNet 161 | """ 162 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 163 | if pretrained: 164 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 165 | return model 166 | 167 | 168 | def resnet34(pretrained=False, **kwargs): 169 | """Constructs a ResNet-34 model. 170 | 171 | Args: 172 | pretrained (bool): If True, returns a model pre-trained on ImageNet 173 | """ 174 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 175 | if pretrained: 176 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 177 | return model 178 | 179 | 180 | def resnet50(pretrained=False, **kwargs): 181 | """Constructs a ResNet-50 model. 182 | 183 | Args: 184 | pretrained (bool): If True, returns a model pre-trained on ImageNet 185 | """ 186 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 187 | if pretrained: 188 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 189 | return model 190 | 191 | 192 | def resnet101(pretrained=False, **kwargs): 193 | """Constructs a ResNet-101 model. 194 | 195 | Args: 196 | pretrained (bool): If True, returns a model pre-trained on ImageNet 197 | """ 198 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 199 | if pretrained: 200 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 201 | return model 202 | 203 | 204 | def resnet152(pretrained=False, **kwargs): 205 | """Constructs a ResNet-152 model. 206 | 207 | Args: 208 | pretrained (bool): If True, returns a model pre-trained on ImageNet 209 | """ 210 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 211 | if pretrained: 212 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 213 | return model 214 | -------------------------------------------------------------------------------- /net/resnet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/net/resnet.pyc -------------------------------------------------------------------------------- /trainval_caffestyle.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import json 5 | import torch.optim as optim 6 | from copy import deepcopy 7 | from torch.utils.data import DataLoader 8 | 9 | from net.loss import * 10 | from net.network import CSPNet, CSPNet_mod 11 | from config import Config 12 | from dataloader.loader import * 13 | from util.functions import parse_det_offset 14 | from eval_city.eval_script.eval_demo import validate 15 | 16 | 17 | config = Config() 18 | config.train_path = './data/citypersons' 19 | config.test_path = './data/citypersons' 20 | config.gpu_ids = [0, 1] 21 | config.onegpu = 4 22 | config.size_train = (640, 1280) 23 | config.size_test = (1024, 2048) 24 | config.init_lr = 2e-4 25 | config.num_epochs = 150 26 | config.offset = True 27 | config.val = True 28 | config.val_frequency = 5 29 | config.caffemodel = True 30 | 31 | # dataset 32 | print('Dataset...') 33 | traindataset = CityPersons(path=config.train_path, type='train', config=config, 34 | caffemodel=config.caffemodel) 35 | trainloader = DataLoader(traindataset, batch_size=config.onegpu*len(config.gpu_ids)) 36 | 37 | if config.val: 38 | testdataset = CityPersons(path=config.train_path, type='val', config=config, 39 | caffemodel=config.caffemodel, preloaded=False) 40 | testloader = DataLoader(testdataset, batch_size=1) 41 | 42 | # net 43 | print('Net...') 44 | net = CSPNet().cuda() 45 | # This is important since the pretrained model is different under config.caffemodel == True 46 | pretrained_dict = torch.load('./resnet101_caffe.pth') 47 | model_dict = net.state_dict() 48 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 49 | model_dict.update(pretrained_dict) 50 | net.load_state_dict(model_dict) 51 | 52 | # position 53 | center = cls_pos().cuda() 54 | height = reg_pos().cuda() 55 | offset = offset_pos().cuda() 56 | 57 | # optimizer 58 | params = [] 59 | for n, p in net.named_parameters(): 60 | if p.requires_grad: 61 | params.append({'params': p}) 62 | else: 63 | print(n) 64 | 65 | if config.teacher: 66 | teacher_dict = net.state_dict() 67 | 68 | #net.load_state_dict(torch.load('ckpt/DataParallel-9.pth')) 69 | # if len(config.gpu_ids) > 1: 70 | net = nn.DataParallel(net, device_ids=config.gpu_ids) 71 | 72 | optimizer = optim.Adam(params, lr=config.init_lr) 73 | 74 | batchsize = config.onegpu * len(config.gpu_ids) 75 | train_batches = len(trainloader) 76 | 77 | config.print_conf() 78 | 79 | 80 | def criterion(output, label): 81 | cls_loss = center(output[0], label[0]) 82 | reg_loss = height(output[1], label[1]) 83 | off_loss = offset(output[2], label[2]) 84 | return cls_loss, reg_loss, off_loss 85 | 86 | 87 | def train(): 88 | 89 | print('Training start') 90 | if not os.path.exists('./ckpt'): 91 | os.mkdir('./ckpt') 92 | if not os.path.exists('./log'): 93 | os.mkdir('./log') 94 | 95 | # open log file 96 | log_file = './log/' + time.strftime('%Y%m%d', time.localtime(time.time()))+'.log' 97 | log = open(log_file, 'w') 98 | if config.val: 99 | vallog_file = log_file + '.val' 100 | vallog = open(vallog_file, 'w') 101 | 102 | best_loss = np.Inf 103 | best_loss_epoch = 0 104 | 105 | best_mr = 100 106 | best_mr_epoch = 0 107 | 108 | for epoch in range(150): 109 | print('----------') 110 | print('Epoch %d begin' % (epoch + 1)) 111 | t1 = time.time() 112 | 113 | epoch_loss = 0.0 114 | net.train() 115 | 116 | for i, data in enumerate(trainloader, 0): 117 | 118 | t3 = time.time() 119 | # get the inputs 120 | inputs, labels = data 121 | inputs = inputs.cuda() 122 | labels = [l.cuda().float() for l in labels] 123 | 124 | # zero the parameter gradients 125 | optimizer.zero_grad() 126 | 127 | # heat map 128 | outputs = net(inputs) 129 | 130 | # loss 131 | cls_loss, reg_loss, off_loss = criterion(outputs, labels) 132 | loss = cls_loss + reg_loss + off_loss 133 | 134 | # back-prop 135 | loss.backward() 136 | 137 | # update param 138 | optimizer.step() 139 | if config.teacher: 140 | for k, v in net.module.state_dict().items(): 141 | if k.find('num_batches_tracked') == -1: 142 | teacher_dict[k] = config.alpha*teacher_dict[k] + (1-config.alpha)*v 143 | else: 144 | teacher_dict[k] = 1*v 145 | 146 | # print statistics 147 | batch_loss = loss.item() 148 | batch_cls_loss = cls_loss.item() 149 | batch_reg_loss = reg_loss.item() 150 | batch_off_loss = off_loss.item() 151 | 152 | t4 = time.time() 153 | print('\r[Epoch %d/150, Batch %d/%d]$ cls: %.6f, reg: %.6f, off: %.6f, Time: %.3f sec ' % 154 | (epoch + 1, i + 1, train_batches, batch_loss, batch_cls_loss, batch_reg_loss, batch_off_loss, t4-t3)), 155 | epoch_loss += batch_loss 156 | print('') 157 | 158 | t2 = time.time() 159 | epoch_loss /= len(trainloader) 160 | print('Epoch %d end, AvgLoss is %.6f, Time used %.1f sec.' % (epoch+1, epoch_loss, int(t2-t1))) 161 | if epoch_loss < best_loss: 162 | best_loss = epoch_loss 163 | best_loss_epoch = epoch + 1 164 | print('Epoch %d has lowest loss: %.7f' % (best_loss_epoch, best_loss)) 165 | 166 | if config.val and epoch+1 > 10 and (epoch+1) % config.val_frequency == 0: 167 | cur_mr = val(vallog) 168 | if cur_mr < best_mr: 169 | best_mr = cur_mr 170 | best_mr_epoch = epoch + 1 171 | print('Epoch %d has lowest MR: %.7f' % (best_mr_epoch, best_mr)) 172 | 173 | log.write('%d %.7f\n' % (epoch + 1, epoch_loss)) 174 | 175 | print('Save checkpoint...') 176 | filename = './ckpt/%s-%d.pth' % (net.module.__class__.__name__, epoch+1) 177 | torch.save(net.module.state_dict(), filename) 178 | if config.teacher: 179 | torch.save(teacher_dict, filename+'.tea') 180 | 181 | print('%s saved.' % filename) 182 | 183 | log.close() 184 | if config.val: 185 | vallog.close() 186 | 187 | 188 | def val(log=None): 189 | net.eval() 190 | 191 | if config.teacher: 192 | print('Load teacher params') 193 | student_dict = net.module.state_dict() 194 | net.module.load_state_dict(teacher_dict) 195 | 196 | print('Perform validation...') 197 | res = [] 198 | t3 = time.time() 199 | for i, data in enumerate(testloader, 0): 200 | inputs = data.cuda() 201 | with torch.no_grad(): 202 | pos, height, offset = net(inputs) 203 | 204 | boxes = parse_det_offset(pos.cpu().numpy(), height.cpu().numpy(), offset.cpu().numpy(), config.size_test, score=0.1, down=4, nms_thresh=0.5) 205 | if len(boxes) > 0: 206 | boxes[:, [2, 3]] -= boxes[:, [0, 1]] 207 | 208 | for box in boxes: 209 | temp = dict() 210 | temp['image_id'] = i+1 211 | temp['category_id'] = 1 212 | temp['bbox'] = box[:4].tolist() 213 | temp['score'] = float(box[4]) 214 | res.append(temp) 215 | 216 | print('\r%d/%d' % (i + 1, len(testloader))), 217 | sys.stdout.flush() 218 | print('') 219 | 220 | if config.teacher: 221 | print('Load back student params') 222 | net.module.load_state_dict(student_dict) 223 | 224 | with open('./_temp_val.json', 'w') as f: 225 | json.dump(res, f) 226 | 227 | MRs = validate('./eval_city/val_gt.json', './_temp_val.json') 228 | t4 = time.time() 229 | print('Summerize: [Reasonable: %.2f%%], [Bare: %.2f%%], [Partial: %.2f%%], [Heavy: %.2f%%]' 230 | % (MRs[0]*100, MRs[1]*100, MRs[2]*100, MRs[3]*100)) 231 | if log is not None: 232 | log.write("%.7f %.7f %.7f %.7f\n" % tuple(MRs)) 233 | print('Validation time used: %.3f' % (t4 - t3)) 234 | return MRs[0] 235 | 236 | 237 | if __name__ == '__main__': 238 | train() 239 | #val() 240 | -------------------------------------------------------------------------------- /trainval_torchstyle.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import json 5 | import torch.optim as optim 6 | from copy import deepcopy 7 | from torch.utils.data import DataLoader 8 | from torchvision.transforms import ToTensor, Normalize, Compose, ColorJitter 9 | 10 | from net.loss import * 11 | from net.network import CSPNet, CSPNet_mod 12 | from config import Config 13 | from dataloader.loader import * 14 | from util.functions import parse_det_offset 15 | from eval_city.eval_script.eval_demo import validate 16 | 17 | 18 | config = Config() 19 | config.train_path = './data/citypersons' 20 | config.test_path = './data/citypersons' 21 | config.gpu_ids = [0, 1] 22 | config.onegpu = 4 23 | config.size_train = (640, 1280) 24 | config.size_test = (1024, 2048) 25 | config.init_lr = 2e-4 26 | config.num_epochs = 150 27 | config.offset = True 28 | config.val = True 29 | config.val_frequency = 1 30 | 31 | # dataset 32 | print('Dataset...') 33 | traintransform = Compose( 34 | [ColorJitter(brightness=0.5), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 35 | traindataset = CityPersons(path=config.train_path, type='train', config=config, 36 | transform=traintransform) 37 | trainloader = DataLoader(traindataset, batch_size=config.onegpu*len(config.gpu_ids)) 38 | 39 | if config.val: 40 | testtransform = Compose( 41 | [ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 42 | testdataset = CityPersons(path=config.train_path, type='val', config=config, 43 | transform=testtransform, preloaded=True) 44 | testloader = DataLoader(testdataset, batch_size=1) 45 | 46 | # net 47 | print('Net...') 48 | net = CSPNet().cuda() 49 | # To continue training 50 | #net.load_state_dict(torch.load('./ckpt/DataParallel-9.pth')) 51 | 52 | # position 53 | center = cls_pos().cuda() 54 | height = reg_pos().cuda() 55 | offset = offset_pos().cuda() 56 | 57 | # optimizer 58 | params = [] 59 | for n, p in net.named_parameters(): 60 | if p.requires_grad: 61 | params.append({'params': p}) 62 | else: 63 | print(n) 64 | 65 | if config.teacher: 66 | print('I found this teacher model is useless, I disable this training option') 67 | exit(1) 68 | teacher_dict = net.state_dict() 69 | 70 | #if len(config.gpu_ids) > 1: 71 | net = nn.DataParallel(net, device_ids=config.gpu_ids) 72 | 73 | optimizer = optim.Adam(params, lr=config.init_lr) 74 | 75 | 76 | batchsize = config.onegpu * len(config.gpu_ids) 77 | train_batches = len(trainloader) 78 | 79 | config.print_conf() 80 | 81 | 82 | def criterion(output, label): 83 | cls_loss = center(output[0], label[0]) 84 | reg_loss = height(output[1], label[1]) 85 | off_loss = offset(output[2], label[2]) 86 | return cls_loss, reg_loss, off_loss 87 | 88 | 89 | def train(): 90 | 91 | print('Training start') 92 | if not os.path.exists('./ckpt'): 93 | os.mkdir('./ckpt') 94 | if not os.path.exists('./log'): 95 | os.mkdir('./log') 96 | 97 | # open log file 98 | log_file = './log/' + time.strftime('%Y%m%d', time.localtime(time.time()))+'.log' 99 | log = open(log_file, 'w') 100 | if config.val: 101 | vallog_file = log_file + '.val' 102 | vallog = open(vallog_file, 'w') 103 | 104 | best_loss = np.Inf 105 | best_loss_epoch = 0 106 | 107 | best_mr = 100 108 | best_mr_epoch = 0 109 | 110 | for epoch in range(150): 111 | print('----------') 112 | print('Epoch %d begin' % (epoch + 1)) 113 | t1 = time.time() 114 | 115 | epoch_loss = 0.0 116 | net.train() 117 | 118 | for i, data in enumerate(trainloader, 0): 119 | 120 | t3 = time.time() 121 | # get the inputs 122 | inputs, labels = data 123 | inputs = inputs.cuda() 124 | labels = [l.cuda().float() for l in labels] 125 | 126 | # zero the parameter gradients 127 | optimizer.zero_grad() 128 | 129 | # heat map 130 | outputs = net(inputs) 131 | 132 | # loss 133 | cls_loss, reg_loss, off_loss = criterion(outputs, labels) 134 | loss = cls_loss + reg_loss + off_loss 135 | 136 | # back-prop 137 | loss.backward() 138 | 139 | # update param 140 | optimizer.step() 141 | if config.teacher: 142 | for k, v in net.module.state_dict().items(): 143 | if k.find('num_batches_tracked') == -1: 144 | teacher_dict[k] = config.alpha * teacher_dict[k] + (1 - config.alpha) * v 145 | else: 146 | teacher_dict[k] = 1 * v 147 | 148 | # print statistics 149 | batch_loss = loss.item() 150 | batch_cls_loss = cls_loss.item() 151 | batch_reg_loss = reg_loss.item() 152 | batch_off_loss = off_loss.item() 153 | 154 | t4 = time.time() 155 | print('\r[Epoch %d/150, Batch %d/%d]$ cls: %.6f, reg: %.6f, off: %.6f, Time: %.3f sec ' % 156 | (epoch + 1, i + 1, train_batches, batch_loss, batch_cls_loss, batch_reg_loss, batch_off_loss, t4-t3)), 157 | epoch_loss += batch_loss 158 | print('') 159 | 160 | t2 = time.time() 161 | epoch_loss /= len(trainloader) 162 | print('Epoch %d end, AvgLoss is %.6f, Time used %.1f sec.' % (epoch+1, epoch_loss, int(t2-t1))) 163 | if epoch_loss < best_loss: 164 | best_loss = epoch_loss 165 | best_loss_epoch = epoch + 1 166 | print('Epoch %d has lowest loss: %.7f' % (best_loss_epoch, best_loss)) 167 | 168 | if config.val and epoch + 1 > 10 and (epoch + 1) % config.val_frequency == 0: 169 | cur_mr = val(vallog) 170 | if cur_mr < best_mr: 171 | best_mr = cur_mr 172 | best_mr_epoch = epoch + 1 173 | print('Epoch %d has lowest MR: %.7f' % (best_mr_epoch, best_mr)) 174 | 175 | log.write('%d %.7f\n' % (epoch+1, epoch_loss)) 176 | 177 | print('Save checkpoint...') 178 | filename = './ckpt/%s-%d.pth' % (net.module.__class__.__name__, epoch+1) 179 | 180 | torch.save(net.module.state_dict(), filename) 181 | if config.teacher: 182 | torch.save(teacher_dict, filename+'.tea') 183 | 184 | print('%s saved.' % filename) 185 | 186 | log.close() 187 | if config.val: 188 | vallog.close() 189 | 190 | 191 | def val(log=None): 192 | net.eval() 193 | 194 | if config.teacher: 195 | print('Load teacher params') 196 | student_dict = net.module.state_dict() 197 | net.module.load_state_dict(teacher_dict) 198 | 199 | print('Perform validation...') 200 | res = [] 201 | t3 = time.time() 202 | for i, data in enumerate(testloader, 0): 203 | inputs = data.cuda() 204 | with torch.no_grad(): 205 | pos, height, offset = net(inputs) 206 | 207 | boxes = parse_det_offset(pos.cpu().numpy(), height.cpu().numpy(), offset.cpu().numpy(), config.size_test, score=0.1, down=4, nms_thresh=0.5) 208 | if len(boxes) > 0: 209 | boxes[:, [2, 3]] -= boxes[:, [0, 1]] 210 | 211 | for box in boxes: 212 | temp = dict() 213 | temp['image_id'] = i+1 214 | temp['category_id'] = 1 215 | temp['bbox'] = box[:4].tolist() 216 | temp['score'] = float(box[4]) 217 | res.append(temp) 218 | 219 | print('\r%d/%d' % (i + 1, len(testloader))), 220 | sys.stdout.flush() 221 | print('') 222 | 223 | if config.teacher: 224 | print('Load back student params') 225 | net.module.load_state_dict(student_dict) 226 | 227 | with open('./_temp_val.json', 'w') as f: 228 | json.dump(res, f) 229 | 230 | MRs = validate('./eval_city/val_gt.json', './_temp_val.json') 231 | t4 = time.time() 232 | print('Summerize: [Reasonable: %.2f%%], [Bare: %.2f%%], [Partial: %.2f%%], [Heavy: %.2f%%]' 233 | % (MRs[0]*100, MRs[1]*100, MRs[2]*100, MRs[3]*100)) 234 | if log is not None: 235 | log.write("%.7f %.7f %.7f %.7f\n" % tuple(MRs)) 236 | print('Validation time used: %.3f' % (t4 - t3)) 237 | return MRs[0] 238 | 239 | 240 | if __name__ == '__main__': 241 | train() 242 | #val() 243 | -------------------------------------------------------------------------------- /util/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf ./build 4 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/util/__init__.py -------------------------------------------------------------------------------- /util/functions.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import cv2 3 | import numpy as np 4 | from nms_wrapper import nms 5 | 6 | 7 | def resize(image, min_side=800, max_side=1400): 8 | rows, cols, cns = image.shape 9 | smallest_side = min(rows, cols) 10 | scale = 1.0 * min_side / smallest_side 11 | largest_side = max(rows, cols) 12 | 13 | if largest_side * scale > max_side: 14 | scale = 1.0 * max_side / largest_side 15 | image = cv2.resize(image, (int(round((cols * scale))), int(round((rows * scale))))) 16 | 17 | rows, cols, cns = image.shape 18 | 19 | pad_w = (-rows) % 32 20 | pad_h = (-cols) % 32 21 | 22 | new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32) 23 | new_image[:rows, :cols, :] = image.astype(np.float32) 24 | 25 | return new_image, scale 26 | 27 | 28 | def vis_detections(im, class_det, w=None): 29 | for det in class_det: 30 | bbox = det[:4] 31 | score = det[4] 32 | cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (127, 255, 0), 1) 33 | cv2.putText(im, '{:.3f}'.format(score), (int(bbox[0]), int(bbox[1] - 9)), cv2.FONT_HERSHEY_SIMPLEX, 34 | 0.6, (0, 0, 0), thickness=1, lineType=8) 35 | 36 | if w is not None: 37 | cv2.imwrite(w, im) 38 | 39 | 40 | def parse_det_offset(pos, height, offset, size, score=0.1, down=4, nms_thresh=0.3): 41 | pos = np.squeeze(pos) 42 | height = np.squeeze(height) 43 | offset_y = offset[0, 0, :, :] 44 | offset_x = offset[0, 1, :, :] 45 | y_c, x_c = np.where(pos > score) 46 | boxs = [] 47 | if len(y_c) > 0: 48 | for i in range(len(y_c)): 49 | h = np.exp(height[y_c[i], x_c[i]]) * down 50 | w = 0.41 * h 51 | o_y = offset_y[y_c[i], x_c[i]] 52 | o_x = offset_x[y_c[i], x_c[i]] 53 | s = pos[y_c[i], x_c[i]] 54 | x1, y1 = max(0, (x_c[i] + o_x + 0.5) * down - w / 2), max(0, (y_c[i] + o_y + 0.5) * down - h / 2) 55 | boxs.append([x1, y1, min(x1 + w, size[1]), min(y1 + h, size[0]), s]) 56 | boxs = np.asarray(boxs, dtype=np.float32) 57 | keep = nms(boxs, nms_thresh, usegpu=False, gpu_id=0) 58 | boxs = boxs[keep, :] 59 | return boxs -------------------------------------------------------------------------------- /util/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /util/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lwpyr/CSP-pedestrian-detection-in-pytorch/f280cd08bebb5daeaba27d3ea6ac5f317b42f878/util/nms/__init__.py -------------------------------------------------------------------------------- /util/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /util/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /util/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /util/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /util/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /util/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from nms.gpu_nms import gpu_nms 9 | from nms.cpu_nms import cpu_nms 10 | import numpy as np 11 | 12 | def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.001, method=1): 13 | 14 | keep = cpu_soft_nms(np.ascontiguousarray(dets, dtype=np.float32), 15 | np.float32(sigma), np.float32(Nt), 16 | np.float32(threshold), 17 | np.uint8(method)) 18 | return keep 19 | 20 | def nms(dets, thresh, usegpu, gpu_id): 21 | """Dispatch to either CPU or GPU NMS implementations.""" 22 | 23 | if dets.shape[0] == 0: 24 | return [] 25 | if usegpu: 26 | return gpu_nms(dets, thresh, device_id=gpu_id) 27 | else: 28 | return cpu_nms(dets, thresh) 29 | -------------------------------------------------------------------------------- /util/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import subprocess 14 | import numpy as np 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | 30 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 31 | and values giving the absolute path to each directory. 32 | 33 | Starts by looking for the CUDAHOME env variable. If not found, everything 34 | is based on finding 'nvcc' in the PATH. 35 | """ 36 | 37 | # first check if the CUDAHOME env variable is in use 38 | if 'CUDAHOME' in os.environ: 39 | home = os.environ['CUDAHOME'] 40 | nvcc = pjoin(home, 'bin', 'nvcc') 41 | else: 42 | # otherwise, search the PATH for NVCC 43 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 44 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 45 | if nvcc is None: 46 | raise EnvironmentError('The nvcc binary could not be ' 47 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 48 | home = os.path.dirname(os.path.dirname(nvcc)) 49 | 50 | cudaconfig = {'home':home, 'nvcc':nvcc, 51 | 'include': pjoin(home, 'include'), 52 | 'lib64': pjoin(home, 'lib64')} 53 | for k, v in cudaconfig.iteritems(): 54 | if not os.path.exists(v): 55 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 56 | 57 | return cudaconfig 58 | CUDA = locate_cuda() 59 | 60 | 61 | # Obtain the numpy include directory. This logic works across numpy versions. 62 | try: 63 | numpy_include = np.get_include() 64 | except AttributeError: 65 | numpy_include = np.get_numpy_include() 66 | 67 | def customize_compiler_for_nvcc(self): 68 | """inject deep into distutils to customize how the dispatch 69 | to gcc/nvcc works. 70 | 71 | If you subclass UnixCCompiler, it's not trivial to get your subclass 72 | injected in, and still have the right customizations (i.e. 73 | distutils.sysconfig.customize_compiler) run on it. So instead of going 74 | the OO route, I have this. Note, it's kindof like a wierd functional 75 | subclassing going on.""" 76 | 77 | # tell the compiler it can processes .cu 78 | self.src_extensions.append('.cu') 79 | 80 | # save references to the default compiler_so and _comple methods 81 | default_compiler_so = self.compiler_so 82 | super = self._compile 83 | 84 | # now redefine the _compile method. This gets executed for each 85 | # object but distutils doesn't have the ability to change compilers 86 | # based on source extension: we add it. 87 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 88 | if os.path.splitext(src)[1] == '.cu': 89 | # use the cuda for .cu files 90 | self.set_executable('compiler_so', CUDA['nvcc']) 91 | # use only a subset of the extra_postargs, which are 1-1 translated 92 | # from the extra_compile_args in the Extension class 93 | postargs = extra_postargs['nvcc'] 94 | else: 95 | postargs = extra_postargs['gcc'] 96 | 97 | super(obj, src, ext, cc_args, postargs, pp_opts) 98 | # reset the default compiler_so, which we might have changed for cuda 99 | self.compiler_so = default_compiler_so 100 | 101 | # inject our redefined _compile method into the class 102 | self._compile = _compile 103 | 104 | 105 | # run the customize_compiler 106 | class custom_build_ext(build_ext): 107 | def build_extensions(self): 108 | customize_compiler_for_nvcc(self.compiler) 109 | build_ext.build_extensions(self) 110 | 111 | 112 | ext_modules = [ 113 | Extension( 114 | "nms.cpu_nms", 115 | ["nms/cpu_nms.pyx"], 116 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 117 | include_dirs = [numpy_include] 118 | ), 119 | Extension('nms.gpu_nms', 120 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 121 | library_dirs=[CUDA['lib64']], 122 | libraries=['cudart'], 123 | language='c++', 124 | runtime_library_dirs=[CUDA['lib64']], 125 | # this syntax is specific to this build system 126 | # we're only going to use certain compiler args with nvcc and not with 127 | # gcc the implementation of this trick is in customize_compiler() below 128 | extra_compile_args={'gcc': ["-Wno-unused-function"], 129 | 'nvcc': ['-arch=sm_35', 130 | '--ptxas-options=-v', 131 | '-c', 132 | '--compiler-options', 133 | "'-fPIC'"]}, 134 | include_dirs = [numpy_include, CUDA['include']] 135 | ), 136 | ] 137 | 138 | setup( 139 | name='fast_rcnn', 140 | ext_modules=ext_modules, 141 | # inject our custom trigger 142 | cmdclass={'build_ext': custom_build_ext}, 143 | ) 144 | --------------------------------------------------------------------------------