├── README.md ├── create_coco_tf_record.py ├── dataset_util.py └── pycocotools ├── __init__.py ├── __init__.pyc ├── __pycache__ ├── __init__.cpython-35.pyc ├── coco.cpython-35.pyc └── mask.cpython-35.pyc ├── _mask.c ├── _mask.cpython-35m-x86_64-linux-gnu.so ├── _mask.pyx ├── coco.py ├── coco.pyc ├── cocoeval.py ├── mask.py └── mask.pyc /README.md: -------------------------------------------------------------------------------- 1 | # tensorflow_object_detection_create_coco_tfrecord 2 | Convert coco dataset to tfrecord for the tensorflow detection API. 3 | # Attention 4 | 1) For easy use of this script, Your coco dataset directory struture should like this : 5 | ``` 6 | +Your coco dataset root 7 | +train2014 8 | +val2014 9 | +annotations 10 | -instances_train2014.json 11 | -instances_val2014.json 12 | ``` 13 | 2) To use this script, you should download python coco tools from [coco website ](http://mscoco.org/dataset/#download) and make it. 14 | After make, copy the pycocotools directory to the directory of this "create_coco_tf_record.py" 15 | or add the pycocotools path to PYTHONPATH of ~/.bashrc file. 16 | **For convientient , I add pycocotools build in my computer to the project directory, you can use it with python3 directly. But if you use python2, build the python coco tool from [!coco](http://mscoco.org/dataset/#download) ** 17 | ``` 18 | git clone https://github.com/cocodataset/cocoapi 19 | cd cocoapi/PythonAPI/ 20 | make 21 | pip install Cython 22 | make 23 | ls pycocotools/ 24 | cp -rf pycocotools PATH/tensorflow_object_detection_create_coco_tfrecord/ 25 | ``` 26 | 27 | 28 | # Example usage: 29 | ``` 30 | python create_coco_tf_record.py --data_dir=/path/to/your/coco/root/directory \ 31 | --set=train \ 32 | --output_filepath=/where/you/want/to/save/pascal.record 33 | --shuffle_imgs=True 34 | ``` 35 | -------------------------------------------------------------------------------- /create_coco_tf_record.py: -------------------------------------------------------------------------------- 1 | r"""Convert raw Microsoft COCO dataset to TFRecord for object_detection. 2 | Attention Please!!! 3 | 4 | 1)For easy use of this script, Your coco dataset directory struture should like this : 5 | +Your coco dataset root 6 | +train2017 7 | +val2017 8 | +annotations 9 | -instances_train2017.json 10 | -instances_val2017.json 11 | 2)To use this script, you should download python coco tools from "http://mscoco.org/dataset/#download" and make it. 12 | After make, copy the pycocotools directory to the directory of this "create_coco_tf_record.py" 13 | or add the pycocotools path to PYTHONPATH of ~/.bashrc file. 14 | 15 | Example usage: 16 | python create_coco_tf_record.py --data_dir=/path/to/your/coco/root/directory \ 17 | --set=train \ 18 | --output_path=/where/you/want/to/save/pascal.record 19 | --shuffle_imgs=True 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | from pycocotools.coco import COCO 26 | from PIL import Image 27 | from random import shuffle 28 | import os, sys 29 | import numpy as np 30 | import tensorflow as tf 31 | import logging 32 | 33 | import dataset_util 34 | 35 | flags = tf.app.flags 36 | flags.DEFINE_string('data_dir', '', 'Root directory to raw Microsoft COCO dataset.') 37 | flags.DEFINE_string('set', 'train', 'Convert training set or validation set') 38 | flags.DEFINE_string('output_filepath', '', 'Path to output TFRecord') 39 | flags.DEFINE_bool('shuffle_imgs',True,'whether to shuffle images of coco') 40 | FLAGS = flags.FLAGS 41 | 42 | 43 | def load_coco_dection_dataset(imgs_dir, annotations_filepath, shuffle_img = True ): 44 | """Load data from dataset by pycocotools. This tools can be download from "http://mscoco.org/dataset/#download" 45 | Args: 46 | imgs_dir: directories of coco images 47 | annotations_filepath: file path of coco annotations file 48 | shuffle_img: wheter to shuffle images order 49 | Return: 50 | coco_data: list of dictionary format information of each image 51 | """ 52 | coco = COCO(annotations_filepath) 53 | img_ids = coco.getImgIds() # totally 82783 images 54 | cat_ids = coco.getCatIds() # totally 90 catagories, however, the number of categories is not continuous, \ 55 | # [0,12,26,29,30,45,66,68,69,71,83] are missing, this is the problem of coco dataset. 56 | 57 | if shuffle_img: 58 | shuffle(img_ids) 59 | 60 | coco_data = [] 61 | 62 | nb_imgs = len(img_ids) 63 | for index, img_id in enumerate(img_ids): 64 | if index % 100 == 0: 65 | print("Readling images: %d / %d "%(index, nb_imgs)) 66 | img_info = {} 67 | bboxes = [] 68 | labels = [] 69 | 70 | img_detail = coco.loadImgs(img_id)[0] 71 | pic_height = img_detail['height'] 72 | pic_width = img_detail['width'] 73 | 74 | ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) 75 | anns = coco.loadAnns(ann_ids) 76 | for ann in anns: 77 | bboxes_data = ann['bbox'] 78 | bboxes_data = [bboxes_data[0]/float(pic_width), bboxes_data[1]/float(pic_height),\ 79 | bboxes_data[2]/float(pic_width), bboxes_data[3]/float(pic_height)] 80 | # the format of coco bounding boxs is [Xmin, Ymin, width, height] 81 | bboxes.append(bboxes_data) 82 | labels.append(ann['category_id']) 83 | 84 | 85 | img_path = os.path.join(imgs_dir, img_detail['file_name']) 86 | img_bytes = tf.gfile.FastGFile(img_path,'rb').read() 87 | 88 | img_info['pixel_data'] = img_bytes 89 | img_info['height'] = pic_height 90 | img_info['width'] = pic_width 91 | img_info['bboxes'] = bboxes 92 | img_info['labels'] = labels 93 | 94 | coco_data.append(img_info) 95 | return coco_data 96 | 97 | 98 | def dict_to_coco_example(img_data): 99 | """Convert python dictionary formath data of one image to tf.Example proto. 100 | Args: 101 | img_data: infomation of one image, inclue bounding box, labels of bounding box,\ 102 | height, width, encoded pixel data. 103 | Returns: 104 | example: The converted tf.Example 105 | """ 106 | bboxes = img_data['bboxes'] 107 | xmin, xmax, ymin, ymax = [], [], [], [] 108 | for bbox in bboxes: 109 | xmin.append(bbox[0]) 110 | xmax.append(bbox[0] + bbox[2]) 111 | ymin.append(bbox[1]) 112 | ymax.append(bbox[1] + bbox[3]) 113 | 114 | example = tf.train.Example(features=tf.train.Features(feature={ 115 | 'image/height': dataset_util.int64_feature(img_data['height']), 116 | 'image/width': dataset_util.int64_feature(img_data['width']), 117 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 118 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 119 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 120 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 121 | 'image/object/class/label': dataset_util.int64_list_feature(img_data['labels']), 122 | 'image/encoded': dataset_util.bytes_feature(img_data['pixel_data']), 123 | 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf-8')), 124 | })) 125 | return example 126 | 127 | def main(_): 128 | if FLAGS.set == "train": 129 | imgs_dir = os.path.join(FLAGS.data_dir, 'train2017') 130 | annotations_filepath = os.path.join(FLAGS.data_dir,'annotations','instances_train2017.json') 131 | print("Convert coco train file to tf record") 132 | elif FLAGS.set == "val": 133 | imgs_dir = os.path.join(FLAGS.data_dir, 'val2017') 134 | annotations_filepath = os.path.join(FLAGS.data_dir,'annotations','instances_val2017.json') 135 | print("Convert coco val file to tf record") 136 | else: 137 | raise ValueError("you must either convert train data or val data") 138 | # load total coco data 139 | coco_data = load_coco_dection_dataset(imgs_dir,annotations_filepath,shuffle_img=FLAGS.shuffle_imgs) 140 | total_imgs = len(coco_data) 141 | # write coco data to tf record 142 | with tf.python_io.TFRecordWriter(FLAGS.output_filepath) as tfrecord_writer: 143 | for index, img_data in enumerate(coco_data): 144 | if index % 100 == 0: 145 | print("Converting images: %d / %d" % (index, total_imgs)) 146 | example = dict_to_coco_example(img_data) 147 | tfrecord_writer.write(example.SerializeToString()) 148 | 149 | 150 | if __name__ == "__main__": 151 | tf.app.run() 152 | -------------------------------------------------------------------------------- /dataset_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility functions for creating TFRecord data sets.""" 17 | 18 | import tensorflow as tf 19 | 20 | 21 | def int64_feature(value): 22 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 23 | 24 | 25 | def int64_list_feature(value): 26 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 27 | 28 | 29 | def bytes_feature(value): 30 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 31 | 32 | 33 | def bytes_list_feature(value): 34 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 35 | 36 | 37 | def float_list_feature(value): 38 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 39 | 40 | 41 | def read_examples_list(path): 42 | """Read list of training or validation examples. 43 | 44 | The file is assumed to contain a single example per line where the first 45 | token in the line is an identifier that allows us to find the image and 46 | annotation xml for that example. 47 | 48 | For example, the line: 49 | xyz 3 50 | would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). 51 | 52 | Args: 53 | path: absolute path to examples list file. 54 | 55 | Returns: 56 | list of example identifiers (strings). 57 | """ 58 | with tf.gfile.GFile(path) as fid: 59 | lines = fid.readlines() 60 | return [line.strip().split(' ')[0] for line in lines] 61 | 62 | 63 | def recursive_parse_xml_to_dict(xml): 64 | """Recursively parses XML contents to python dict. 65 | 66 | We assume that `object` tags are the only ones that can appear 67 | multiple times at the same level of a tree. 68 | 69 | Args: 70 | xml: xml tree obtained by parsing XML file contents using lxml.etree 71 | 72 | Returns: 73 | Python dictionary holding XML contents. 74 | """ 75 | if not xml: 76 | return {xml.tag: xml.text} 77 | result = {} 78 | for child in xml: 79 | child_result = recursive_parse_xml_to_dict(child) 80 | if child.tag != 'object': 81 | result[child.tag] = child_result[child.tag] 82 | else: 83 | if child.tag not in result: 84 | result[child.tag] = [] 85 | result[child.tag].append(child_result[child.tag]) 86 | return {xml.tag: result} 87 | -------------------------------------------------------------------------------- /pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /pycocotools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/__init__.pyc -------------------------------------------------------------------------------- /pycocotools/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /pycocotools/__pycache__/coco.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/__pycache__/coco.cpython-35.pyc -------------------------------------------------------------------------------- /pycocotools/__pycache__/mask.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/__pycache__/mask.cpython-35.pyc -------------------------------------------------------------------------------- /pycocotools/_mask.cpython-35m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/_mask.cpython-35m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /pycocotools/_mask.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c 2 | # distutils: sources = ../common/maskApi.c 3 | 4 | #************************************************************************** 5 | # Microsoft COCO Toolbox. version 2.0 6 | # Data, paper, and tutorials available at: http://mscoco.org/ 7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 8 | # Licensed under the Simplified BSD License [see coco/license.txt] 9 | #************************************************************************** 10 | 11 | __author__ = 'tsungyi' 12 | 13 | import sys 14 | PYTHON_VERSION = sys.version_info[0] 15 | 16 | # import both Python-level and C-level symbols of Numpy 17 | # the API uses Numpy to interface C and Python 18 | import numpy as np 19 | cimport numpy as np 20 | from libc.stdlib cimport malloc, free 21 | 22 | # intialized Numpy. must do. 23 | np.import_array() 24 | 25 | # import numpy C function 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management 27 | cdef extern from "numpy/arrayobject.h": 28 | void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) 29 | 30 | # Declare the prototype of the C functions in MaskApi.h 31 | cdef extern from "maskApi.h": 32 | ctypedef unsigned int uint 33 | ctypedef unsigned long siz 34 | ctypedef unsigned char byte 35 | ctypedef double* BB 36 | ctypedef struct RLE: 37 | siz h, 38 | siz w, 39 | siz m, 40 | uint* cnts, 41 | void rlesInit( RLE **R, siz n ) 42 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) 43 | void rleDecode( const RLE *R, byte *mask, siz n ) 44 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) 45 | void rleArea( const RLE *R, siz n, uint *a ) 46 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) 47 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) 48 | void rleToBbox( const RLE *R, BB bb, siz n ) 49 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) 50 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) 51 | char* rleToString( const RLE *R ) 52 | void rleFrString( RLE *R, char *s, siz h, siz w ) 53 | 54 | # python class to wrap RLE array in C 55 | # the class handles the memory allocation and deallocation 56 | cdef class RLEs: 57 | cdef RLE *_R 58 | cdef siz _n 59 | 60 | def __cinit__(self, siz n =0): 61 | rlesInit(&self._R, n) 62 | self._n = n 63 | 64 | # free the RLE array here 65 | def __dealloc__(self): 66 | if self._R is not NULL: 67 | for i in range(self._n): 68 | free(self._R[i].cnts) 69 | free(self._R) 70 | def __getattr__(self, key): 71 | if key == 'n': 72 | return self._n 73 | raise AttributeError(key) 74 | 75 | # python class to wrap Mask array in C 76 | # the class handles the memory allocation and deallocation 77 | cdef class Masks: 78 | cdef byte *_mask 79 | cdef siz _h 80 | cdef siz _w 81 | cdef siz _n 82 | 83 | def __cinit__(self, h, w, n): 84 | self._mask = malloc(h*w*n* sizeof(byte)) 85 | self._h = h 86 | self._w = w 87 | self._n = n 88 | # def __dealloc__(self): 89 | # the memory management of _mask has been passed to np.ndarray 90 | # it doesn't need to be freed here 91 | 92 | # called when passing into np.array() and return an np.ndarray in column-major order 93 | def __array__(self): 94 | cdef np.npy_intp shape[1] 95 | shape[0] = self._h*self._w*self._n 96 | # Create a 1D array, and reshape it to fortran/Matlab column-major array 97 | ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') 98 | # The _mask allocated by Masks is now handled by ndarray 99 | PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) 100 | return ndarray 101 | 102 | # internal conversion from Python RLEs object to compressed RLE format 103 | def _toString(RLEs Rs): 104 | cdef siz n = Rs.n 105 | cdef bytes py_string 106 | cdef char* c_string 107 | objs = [] 108 | for i in range(n): 109 | c_string = rleToString( &Rs._R[i] ) 110 | py_string = c_string 111 | objs.append({ 112 | 'size': [Rs._R[i].h, Rs._R[i].w], 113 | 'counts': py_string 114 | }) 115 | free(c_string) 116 | return objs 117 | 118 | # internal conversion from compressed RLE format to Python RLEs object 119 | def _frString(rleObjs): 120 | cdef siz n = len(rleObjs) 121 | Rs = RLEs(n) 122 | cdef bytes py_string 123 | cdef char* c_string 124 | for i, obj in enumerate(rleObjs): 125 | if PYTHON_VERSION == 2: 126 | py_string = str(obj['counts']).encode('utf8') 127 | elif PYTHON_VERSION == 3: 128 | py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] 129 | else: 130 | raise Exception('Python version must be 2 or 3') 131 | c_string = py_string 132 | rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) 133 | return Rs 134 | 135 | # encode mask to RLEs objects 136 | # list of RLE string can be generated by RLEs member function 137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): 138 | h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] 139 | cdef RLEs Rs = RLEs(n) 140 | rleEncode(Rs._R,mask.data,h,w,n) 141 | objs = _toString(Rs) 142 | return objs 143 | 144 | # decode mask from compressed list of RLE string or RLEs object 145 | def decode(rleObjs): 146 | cdef RLEs Rs = _frString(rleObjs) 147 | h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n 148 | masks = Masks(h, w, n) 149 | rleDecode(Rs._R, masks._mask, n); 150 | return np.array(masks) 151 | 152 | def merge(rleObjs, intersect=0): 153 | cdef RLEs Rs = _frString(rleObjs) 154 | cdef RLEs R = RLEs(1) 155 | rleMerge(Rs._R, R._R, Rs._n, intersect) 156 | obj = _toString(R)[0] 157 | return obj 158 | 159 | def area(rleObjs): 160 | cdef RLEs Rs = _frString(rleObjs) 161 | cdef uint* _a = malloc(Rs._n* sizeof(uint)) 162 | rleArea(Rs._R, Rs._n, _a) 163 | cdef np.npy_intp shape[1] 164 | shape[0] = Rs._n 165 | a = np.array((Rs._n, ), dtype=np.uint8) 166 | a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) 167 | PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) 168 | return a 169 | 170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox). 171 | def iou( dt, gt, pyiscrowd ): 172 | def _preproc(objs): 173 | if len(objs) == 0: 174 | return objs 175 | if type(objs) == np.ndarray: 176 | if len(objs.shape) == 1: 177 | objs = objs.reshape((objs[0], 1)) 178 | # check if it's Nx4 bbox 179 | if not len(objs.shape) == 2 or not objs.shape[1] == 4: 180 | raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') 181 | objs = objs.astype(np.double) 182 | elif type(objs) == list: 183 | # check if list is in box format and convert it to np.ndarray 184 | isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) 185 | isrle = np.all(np.array([type(obj) == dict for obj in objs])) 186 | if isbox: 187 | objs = np.array(objs, dtype=np.double) 188 | if len(objs.shape) == 1: 189 | objs = objs.reshape((1,objs.shape[0])) 190 | elif isrle: 191 | objs = _frString(objs) 192 | else: 193 | raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') 194 | else: 195 | raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') 196 | return objs 197 | def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 198 | rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) 199 | def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 200 | bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) 201 | def _len(obj): 202 | cdef siz N = 0 203 | if type(obj) == RLEs: 204 | N = obj.n 205 | elif len(obj)==0: 206 | pass 207 | elif type(obj) == np.ndarray: 208 | N = obj.shape[0] 209 | return N 210 | # convert iscrowd to numpy array 211 | cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) 212 | # simple type checking 213 | cdef siz m, n 214 | dt = _preproc(dt) 215 | gt = _preproc(gt) 216 | m = _len(dt) 217 | n = _len(gt) 218 | if m == 0 or n == 0: 219 | return [] 220 | if not type(dt) == type(gt): 221 | raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') 222 | 223 | # define local variables 224 | cdef double* _iou = 0 225 | cdef np.npy_intp shape[1] 226 | # check type and assign iou function 227 | if type(dt) == RLEs: 228 | _iouFun = _rleIou 229 | elif type(dt) == np.ndarray: 230 | _iouFun = _bbIou 231 | else: 232 | raise Exception('input data type not allowed.') 233 | _iou = malloc(m*n* sizeof(double)) 234 | iou = np.zeros((m*n, ), dtype=np.double) 235 | shape[0] = m*n 236 | iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) 237 | PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) 238 | _iouFun(dt, gt, iscrowd, m, n, iou) 239 | return iou.reshape((m,n), order='F') 240 | 241 | def toBbox( rleObjs ): 242 | cdef RLEs Rs = _frString(rleObjs) 243 | cdef siz n = Rs.n 244 | cdef BB _bb = malloc(4*n* sizeof(double)) 245 | rleToBbox( Rs._R, _bb, n ) 246 | cdef np.npy_intp shape[1] 247 | shape[0] = 4*n 248 | bb = np.array((1,4*n), dtype=np.double) 249 | bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) 250 | PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) 251 | return bb 252 | 253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): 254 | cdef siz n = bb.shape[0] 255 | Rs = RLEs(n) 256 | rleFrBbox( Rs._R, bb.data, h, w, n ) 257 | objs = _toString(Rs) 258 | return objs 259 | 260 | def frPoly( poly, siz h, siz w ): 261 | cdef np.ndarray[np.double_t, ndim=1] np_poly 262 | n = len(poly) 263 | Rs = RLEs(n) 264 | for i, p in enumerate(poly): 265 | np_poly = np.array(p, dtype=np.double, order='F') 266 | rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) 267 | objs = _toString(Rs) 268 | return objs 269 | 270 | def frUncompressedRLE(ucRles, siz h, siz w): 271 | cdef np.ndarray[np.uint32_t, ndim=1] cnts 272 | cdef RLE R 273 | cdef uint *data 274 | n = len(ucRles) 275 | objs = [] 276 | for i in range(n): 277 | Rs = RLEs(1) 278 | cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) 279 | # time for malloc can be saved here but it's fine 280 | data = malloc(len(cnts)* sizeof(uint)) 281 | for j in range(len(cnts)): 282 | data[j] = cnts[j] 283 | R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) 284 | Rs._R[0] = R 285 | objs.append(_toString(Rs)[0]) 286 | return objs 287 | 288 | def frPyObjects(pyobj, h, w): 289 | # encode rle from a list of python objects 290 | if type(pyobj) == np.ndarray: 291 | objs = frBbox(pyobj, h, w) 292 | elif type(pyobj) == list and len(pyobj[0]) == 4: 293 | objs = frBbox(pyobj, h, w) 294 | elif type(pyobj) == list and len(pyobj[0]) > 4: 295 | objs = frPoly(pyobj, h, w) 296 | elif type(pyobj) == list and type(pyobj[0]) == dict \ 297 | and 'counts' in pyobj[0] and 'size' in pyobj[0]: 298 | objs = frUncompressedRLE(pyobj, h, w) 299 | # encode rle from single python object 300 | elif type(pyobj) == list and len(pyobj) == 4: 301 | objs = frBbox([pyobj], h, w)[0] 302 | elif type(pyobj) == list and len(pyobj) > 4: 303 | objs = frPoly([pyobj], h, w)[0] 304 | elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: 305 | objs = frUncompressedRLE([pyobj], h, w)[0] 306 | else: 307 | raise Exception('input type is not supported.') 308 | return objs 309 | -------------------------------------------------------------------------------- /pycocotools/coco.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | __version__ = '2.0' 3 | # Interface for accessing the Microsoft COCO dataset. 4 | 5 | # Microsoft COCO is a large image dataset designed for object detection, 6 | # segmentation, and caption generation. pycocotools is a Python API that 7 | # assists in loading, parsing and visualizing the annotations in COCO. 8 | # Please visit http://mscoco.org/ for more information on COCO, including 9 | # for the data, paper, and tutorials. The exact format of the annotations 10 | # is also described on the COCO website. For example usage of the pycocotools 11 | # please see pycocotools_demo.ipynb. In addition to this API, please download both 12 | # the COCO images and annotations in order to run the demo. 13 | 14 | # An alternative to using the API is to load the annotations directly 15 | # into Python dictionary 16 | # Using the API provides additional utility functions. Note that this API 17 | # supports both *instance* and *caption* annotations. In the case of 18 | # captions not all functions are defined (e.g. categories are undefined). 19 | 20 | # The following API functions are defined: 21 | # COCO - COCO api class that loads COCO annotation file and prepare data structures. 22 | # decodeMask - Decode binary mask M encoded via run-length encoding. 23 | # encodeMask - Encode binary mask M using run-length encoding. 24 | # getAnnIds - Get ann ids that satisfy given filter conditions. 25 | # getCatIds - Get cat ids that satisfy given filter conditions. 26 | # getImgIds - Get img ids that satisfy given filter conditions. 27 | # loadAnns - Load anns with the specified ids. 28 | # loadCats - Load cats with the specified ids. 29 | # loadImgs - Load imgs with the specified ids. 30 | # annToMask - Convert segmentation in an annotation to binary mask. 31 | # showAnns - Display the specified annotations. 32 | # loadRes - Load algorithm results and create API for accessing them. 33 | # download - Download COCO images from mscoco.org server. 34 | # Throughout the API "ann"=annotation, "cat"=category, and "img"=image. 35 | # Help on each functions can be accessed by: "help COCO>function". 36 | 37 | # See also COCO>decodeMask, 38 | # COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds, 39 | # COCO>getImgIds, COCO>loadAnns, COCO>loadCats, 40 | # COCO>loadImgs, COCO>annToMask, COCO>showAnns 41 | 42 | # Microsoft COCO Toolbox. version 2.0 43 | # Data, paper, and tutorials available at: http://mscoco.org/ 44 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2014. 45 | # Licensed under the Simplified BSD License [see bsd.txt] 46 | 47 | import json 48 | import time 49 | import matplotlib.pyplot as plt 50 | from matplotlib.collections import PatchCollection 51 | from matplotlib.patches import Polygon 52 | import numpy as np 53 | import copy 54 | import itertools 55 | from . import mask as maskUtils 56 | import os 57 | from collections import defaultdict 58 | import sys 59 | PYTHON_VERSION = sys.version_info[0] 60 | if PYTHON_VERSION == 2: 61 | from urllib import urlretrieve 62 | elif PYTHON_VERSION == 3: 63 | from urllib.request import urlretrieve 64 | 65 | class COCO: 66 | def __init__(self, annotation_file=None): 67 | """ 68 | Constructor of Microsoft COCO helper class for reading and visualizing annotations. 69 | :param annotation_file (str): location of annotation file 70 | :param image_folder (str): location to the folder that hosts images. 71 | :return: 72 | """ 73 | # load dataset 74 | self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict() 75 | self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list) 76 | if not annotation_file == None: 77 | print('loading annotations into memory...') 78 | tic = time.time() 79 | dataset = json.load(open(annotation_file, 'r')) 80 | assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) 81 | print('Done (t={:0.2f}s)'.format(time.time()- tic)) 82 | self.dataset = dataset 83 | self.createIndex() 84 | 85 | def createIndex(self): 86 | # create index 87 | print('creating index...') 88 | anns, cats, imgs = {}, {}, {} 89 | imgToAnns,catToImgs = defaultdict(list),defaultdict(list) 90 | if 'annotations' in self.dataset: 91 | for ann in self.dataset['annotations']: 92 | imgToAnns[ann['image_id']].append(ann) 93 | anns[ann['id']] = ann 94 | 95 | if 'images' in self.dataset: 96 | for img in self.dataset['images']: 97 | imgs[img['id']] = img 98 | 99 | if 'categories' in self.dataset: 100 | for cat in self.dataset['categories']: 101 | cats[cat['id']] = cat 102 | 103 | if 'annotations' in self.dataset and 'categories' in self.dataset: 104 | for ann in self.dataset['annotations']: 105 | catToImgs[ann['category_id']].append(ann['image_id']) 106 | 107 | print('index created!') 108 | 109 | # create class members 110 | self.anns = anns 111 | self.imgToAnns = imgToAnns 112 | self.catToImgs = catToImgs 113 | self.imgs = imgs 114 | self.cats = cats 115 | 116 | def info(self): 117 | """ 118 | Print information about the annotation file. 119 | :return: 120 | """ 121 | for key, value in self.dataset['info'].items(): 122 | print('{}: {}'.format(key, value)) 123 | 124 | def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): 125 | """ 126 | Get ann ids that satisfy given filter conditions. default skips that filter 127 | :param imgIds (int array) : get anns for given imgs 128 | catIds (int array) : get anns for given cats 129 | areaRng (float array) : get anns for given area range (e.g. [0 inf]) 130 | iscrowd (boolean) : get anns for given crowd label (False or True) 131 | :return: ids (int array) : integer array of ann ids 132 | """ 133 | imgIds = imgIds if type(imgIds) == list else [imgIds] 134 | catIds = catIds if type(catIds) == list else [catIds] 135 | 136 | if len(imgIds) == len(catIds) == len(areaRng) == 0: 137 | anns = self.dataset['annotations'] 138 | else: 139 | if not len(imgIds) == 0: 140 | lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns] 141 | anns = list(itertools.chain.from_iterable(lists)) 142 | else: 143 | anns = self.dataset['annotations'] 144 | anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds] 145 | anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]] 146 | if not iscrowd == None: 147 | ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd] 148 | else: 149 | ids = [ann['id'] for ann in anns] 150 | return ids 151 | 152 | def getCatIds(self, catNms=[], supNms=[], catIds=[]): 153 | """ 154 | filtering parameters. default skips that filter. 155 | :param catNms (str array) : get cats for given cat names 156 | :param supNms (str array) : get cats for given supercategory names 157 | :param catIds (int array) : get cats for given cat ids 158 | :return: ids (int array) : integer array of cat ids 159 | """ 160 | catNms = catNms if type(catNms) == list else [catNms] 161 | supNms = supNms if type(supNms) == list else [supNms] 162 | catIds = catIds if type(catIds) == list else [catIds] 163 | 164 | if len(catNms) == len(supNms) == len(catIds) == 0: 165 | cats = self.dataset['categories'] 166 | else: 167 | cats = self.dataset['categories'] 168 | cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms] 169 | cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms] 170 | cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds] 171 | ids = [cat['id'] for cat in cats] 172 | return ids 173 | 174 | def getImgIds(self, imgIds=[], catIds=[]): 175 | ''' 176 | Get img ids that satisfy given filter conditions. 177 | :param imgIds (int array) : get imgs for given ids 178 | :param catIds (int array) : get imgs with all given cats 179 | :return: ids (int array) : integer array of img ids 180 | ''' 181 | imgIds = imgIds if type(imgIds) == list else [imgIds] 182 | catIds = catIds if type(catIds) == list else [catIds] 183 | 184 | if len(imgIds) == len(catIds) == 0: 185 | ids = self.imgs.keys() 186 | else: 187 | ids = set(imgIds) 188 | for i, catId in enumerate(catIds): 189 | if i == 0 and len(ids) == 0: 190 | ids = set(self.catToImgs[catId]) 191 | else: 192 | ids &= set(self.catToImgs[catId]) 193 | return list(ids) 194 | 195 | def loadAnns(self, ids=[]): 196 | """ 197 | Load anns with the specified ids. 198 | :param ids (int array) : integer ids specifying anns 199 | :return: anns (object array) : loaded ann objects 200 | """ 201 | if type(ids) == list: 202 | return [self.anns[id] for id in ids] 203 | elif type(ids) == int: 204 | return [self.anns[ids]] 205 | 206 | def loadCats(self, ids=[]): 207 | """ 208 | Load cats with the specified ids. 209 | :param ids (int array) : integer ids specifying cats 210 | :return: cats (object array) : loaded cat objects 211 | """ 212 | if type(ids) == list: 213 | return [self.cats[id] for id in ids] 214 | elif type(ids) == int: 215 | return [self.cats[ids]] 216 | 217 | def loadImgs(self, ids=[]): 218 | """ 219 | Load anns with the specified ids. 220 | :param ids (int array) : integer ids specifying img 221 | :return: imgs (object array) : loaded img objects 222 | """ 223 | if type(ids) == list: 224 | return [self.imgs[id] for id in ids] 225 | elif type(ids) == int: 226 | return [self.imgs[ids]] 227 | 228 | def showAnns(self, anns): 229 | """ 230 | Display the specified annotations. 231 | :param anns (array of object): annotations to display 232 | :return: None 233 | """ 234 | if len(anns) == 0: 235 | return 0 236 | if 'segmentation' in anns[0] or 'keypoints' in anns[0]: 237 | datasetType = 'instances' 238 | elif 'caption' in anns[0]: 239 | datasetType = 'captions' 240 | else: 241 | raise Exception('datasetType not supported') 242 | if datasetType == 'instances': 243 | ax = plt.gca() 244 | ax.set_autoscale_on(False) 245 | polygons = [] 246 | color = [] 247 | for ann in anns: 248 | c = (np.random.random((1, 3))*0.6+0.4).tolist()[0] 249 | if 'segmentation' in ann: 250 | if type(ann['segmentation']) == list: 251 | # polygon 252 | for seg in ann['segmentation']: 253 | poly = np.array(seg).reshape((int(len(seg)/2), 2)) 254 | polygons.append(Polygon(poly)) 255 | color.append(c) 256 | else: 257 | # mask 258 | t = self.imgs[ann['image_id']] 259 | if type(ann['segmentation']['counts']) == list: 260 | rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) 261 | else: 262 | rle = [ann['segmentation']] 263 | m = maskUtils.decode(rle) 264 | img = np.ones( (m.shape[0], m.shape[1], 3) ) 265 | if ann['iscrowd'] == 1: 266 | color_mask = np.array([2.0,166.0,101.0])/255 267 | if ann['iscrowd'] == 0: 268 | color_mask = np.random.random((1, 3)).tolist()[0] 269 | for i in range(3): 270 | img[:,:,i] = color_mask[i] 271 | ax.imshow(np.dstack( (img, m*0.5) )) 272 | if 'keypoints' in ann and type(ann['keypoints']) == list: 273 | # turn skeleton into zero-based index 274 | sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1 275 | kp = np.array(ann['keypoints']) 276 | x = kp[0::3] 277 | y = kp[1::3] 278 | v = kp[2::3] 279 | for sk in sks: 280 | if np.all(v[sk]>0): 281 | plt.plot(x[sk],y[sk], linewidth=3, color=c) 282 | plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2) 283 | plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2) 284 | p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) 285 | ax.add_collection(p) 286 | p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) 287 | ax.add_collection(p) 288 | elif datasetType == 'captions': 289 | for ann in anns: 290 | print(ann['caption']) 291 | 292 | def loadRes(self, resFile): 293 | """ 294 | Load result file and return a result api object. 295 | :param resFile (str) : file name of result file 296 | :return: res (obj) : result api object 297 | """ 298 | res = COCO() 299 | res.dataset['images'] = [img for img in self.dataset['images']] 300 | 301 | print('Loading and preparing results...') 302 | tic = time.time() 303 | if type(resFile) == str or type(resFile) == unicode: 304 | anns = json.load(open(resFile)) 305 | elif type(resFile) == np.ndarray: 306 | anns = self.loadNumpyAnnotations(resFile) 307 | else: 308 | anns = resFile 309 | assert type(anns) == list, 'results in not an array of objects' 310 | annsImgIds = [ann['image_id'] for ann in anns] 311 | assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 312 | 'Results do not correspond to current coco set' 313 | if 'caption' in anns[0]: 314 | imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) 315 | res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] 316 | for id, ann in enumerate(anns): 317 | ann['id'] = id+1 318 | elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: 319 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 320 | for id, ann in enumerate(anns): 321 | bb = ann['bbox'] 322 | x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]] 323 | if not 'segmentation' in ann: 324 | ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] 325 | ann['area'] = bb[2]*bb[3] 326 | ann['id'] = id+1 327 | ann['iscrowd'] = 0 328 | elif 'segmentation' in anns[0]: 329 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 330 | for id, ann in enumerate(anns): 331 | # now only support compressed RLE format as segmentation results 332 | ann['area'] = maskUtils.area(ann['segmentation']) 333 | if not 'bbox' in ann: 334 | ann['bbox'] = maskUtils.toBbox(ann['segmentation']) 335 | ann['id'] = id+1 336 | ann['iscrowd'] = 0 337 | elif 'keypoints' in anns[0]: 338 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 339 | for id, ann in enumerate(anns): 340 | s = ann['keypoints'] 341 | x = s[0::3] 342 | y = s[1::3] 343 | x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) 344 | ann['area'] = (x1-x0)*(y1-y0) 345 | ann['id'] = id + 1 346 | ann['bbox'] = [x0,y0,x1-x0,y1-y0] 347 | print('DONE (t={:0.2f}s)'.format(time.time()- tic)) 348 | 349 | res.dataset['annotations'] = anns 350 | res.createIndex() 351 | return res 352 | 353 | def download(self, tarDir = None, imgIds = [] ): 354 | ''' 355 | Download COCO images from mscoco.org server. 356 | :param tarDir (str): COCO results directory name 357 | imgIds (list): images to be downloaded 358 | :return: 359 | ''' 360 | if tarDir is None: 361 | print('Please specify target directory') 362 | return -1 363 | if len(imgIds) == 0: 364 | imgs = self.imgs.values() 365 | else: 366 | imgs = self.loadImgs(imgIds) 367 | N = len(imgs) 368 | if not os.path.exists(tarDir): 369 | os.makedirs(tarDir) 370 | for i, img in enumerate(imgs): 371 | tic = time.time() 372 | fname = os.path.join(tarDir, img['file_name']) 373 | if not os.path.exists(fname): 374 | urlretrieve(img['coco_url'], fname) 375 | print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic)) 376 | 377 | def loadNumpyAnnotations(self, data): 378 | """ 379 | Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class} 380 | :param data (numpy.ndarray) 381 | :return: annotations (python nested list) 382 | """ 383 | print('Converting ndarray to lists...') 384 | assert(type(data) == np.ndarray) 385 | print(data.shape) 386 | assert(data.shape[1] == 7) 387 | N = data.shape[0] 388 | ann = [] 389 | for i in range(N): 390 | if i % 1000000 == 0: 391 | print('{}/{}'.format(i,N)) 392 | ann += [{ 393 | 'image_id' : int(data[i, 0]), 394 | 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ], 395 | 'score' : data[i, 5], 396 | 'category_id': int(data[i, 6]), 397 | }] 398 | return ann 399 | 400 | def annToRLE(self, ann): 401 | """ 402 | Convert annotation which can be polygons, uncompressed RLE to RLE. 403 | :return: binary mask (numpy 2D array) 404 | """ 405 | t = self.imgs[ann['image_id']] 406 | h, w = t['height'], t['width'] 407 | segm = ann['segmentation'] 408 | if type(segm) == list: 409 | # polygon -- a single object might consist of multiple parts 410 | # we merge all parts into one mask rle code 411 | rles = maskUtils.frPyObjects(segm, h, w) 412 | rle = maskUtils.merge(rles) 413 | elif type(segm['counts']) == list: 414 | # uncompressed RLE 415 | rle = maskUtils.frPyObjects(segm, h, w) 416 | else: 417 | # rle 418 | rle = ann['segmentation'] 419 | return rle 420 | 421 | def annToMask(self, ann): 422 | """ 423 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 424 | :return: binary mask (numpy 2D array) 425 | """ 426 | rle = self.annToRLE(ann) 427 | m = maskUtils.decode(rle) 428 | return m -------------------------------------------------------------------------------- /pycocotools/coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/coco.pyc -------------------------------------------------------------------------------- /pycocotools/cocoeval.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import numpy as np 4 | import datetime 5 | import time 6 | from collections import defaultdict 7 | from . import mask as maskUtils 8 | import copy 9 | 10 | class COCOeval: 11 | # Interface for evaluating detection on the Microsoft COCO dataset. 12 | # 13 | # The usage for CocoEval is as follows: 14 | # cocoGt=..., cocoDt=... # load dataset and results 15 | # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object 16 | # E.params.recThrs = ...; # set parameters as desired 17 | # E.evaluate(); # run per image evaluation 18 | # E.accumulate(); # accumulate per image results 19 | # E.summarize(); # display summary metrics of results 20 | # For example usage see evalDemo.m and http://mscoco.org/. 21 | # 22 | # The evaluation parameters are as follows (defaults in brackets): 23 | # imgIds - [all] N img ids to use for evaluation 24 | # catIds - [all] K cat ids to use for evaluation 25 | # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation 26 | # recThrs - [0:.01:1] R=101 recall thresholds for evaluation 27 | # areaRng - [...] A=4 object area ranges for evaluation 28 | # maxDets - [1 10 100] M=3 thresholds on max detections per image 29 | # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints' 30 | # iouType replaced the now DEPRECATED useSegm parameter. 31 | # useCats - [1] if true use category labels for evaluation 32 | # Note: if useCats=0 category labels are ignored as in proposal scoring. 33 | # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. 34 | # 35 | # evaluate(): evaluates detections on every image and every category and 36 | # concats the results into the "evalImgs" with fields: 37 | # dtIds - [1xD] id for each of the D detections (dt) 38 | # gtIds - [1xG] id for each of the G ground truths (gt) 39 | # dtMatches - [TxD] matching gt id at each IoU or 0 40 | # gtMatches - [TxG] matching dt id at each IoU or 0 41 | # dtScores - [1xD] confidence of each dt 42 | # gtIgnore - [1xG] ignore flag for each gt 43 | # dtIgnore - [TxD] ignore flag for each dt at each IoU 44 | # 45 | # accumulate(): accumulates the per-image, per-category evaluation 46 | # results in "evalImgs" into the dictionary "eval" with fields: 47 | # params - parameters used for evaluation 48 | # date - date evaluation was performed 49 | # counts - [T,R,K,A,M] parameter dimensions (see above) 50 | # precision - [TxRxKxAxM] precision for every evaluation setting 51 | # recall - [TxKxAxM] max recall for every evaluation setting 52 | # Note: precision and recall==-1 for settings with no gt objects. 53 | # 54 | # See also coco, mask, pycocoDemo, pycocoEvalDemo 55 | # 56 | # Microsoft COCO Toolbox. version 2.0 57 | # Data, paper, and tutorials available at: http://mscoco.org/ 58 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 59 | # Licensed under the Simplified BSD License [see coco/license.txt] 60 | def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'): 61 | ''' 62 | Initialize CocoEval using coco APIs for gt and dt 63 | :param cocoGt: coco object with ground truth annotations 64 | :param cocoDt: coco object with detection results 65 | :return: None 66 | ''' 67 | if not iouType: 68 | print('iouType not specified. use default iouType segm') 69 | self.cocoGt = cocoGt # ground truth COCO API 70 | self.cocoDt = cocoDt # detections COCO API 71 | self.params = {} # evaluation parameters 72 | self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements 73 | self.eval = {} # accumulated evaluation results 74 | self._gts = defaultdict(list) # gt for evaluation 75 | self._dts = defaultdict(list) # dt for evaluation 76 | self.params = Params(iouType=iouType) # parameters 77 | self._paramsEval = {} # parameters for evaluation 78 | self.stats = [] # result summarization 79 | self.ious = {} # ious between all gts and dts 80 | if not cocoGt is None: 81 | self.params.imgIds = sorted(cocoGt.getImgIds()) 82 | self.params.catIds = sorted(cocoGt.getCatIds()) 83 | 84 | 85 | def _prepare(self): 86 | ''' 87 | Prepare ._gts and ._dts for evaluation based on params 88 | :return: None 89 | ''' 90 | def _toMask(anns, coco): 91 | # modify ann['segmentation'] by reference 92 | for ann in anns: 93 | rle = coco.annToRLE(ann) 94 | ann['segmentation'] = rle 95 | p = self.params 96 | if p.useCats: 97 | gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) 98 | dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) 99 | else: 100 | gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) 101 | dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) 102 | 103 | # convert ground truth to mask if iouType == 'segm' 104 | if p.iouType == 'segm': 105 | _toMask(gts, self.cocoGt) 106 | _toMask(dts, self.cocoDt) 107 | # set ignore flag 108 | for gt in gts: 109 | gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0 110 | gt['ignore'] = 'iscrowd' in gt and gt['iscrowd'] 111 | if p.iouType == 'keypoints': 112 | gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore'] 113 | self._gts = defaultdict(list) # gt for evaluation 114 | self._dts = defaultdict(list) # dt for evaluation 115 | for gt in gts: 116 | self._gts[gt['image_id'], gt['category_id']].append(gt) 117 | for dt in dts: 118 | self._dts[dt['image_id'], dt['category_id']].append(dt) 119 | self.evalImgs = defaultdict(list) # per-image per-category evaluation results 120 | self.eval = {} # accumulated evaluation results 121 | 122 | def evaluate(self): 123 | ''' 124 | Run per image evaluation on given images and store results (a list of dict) in self.evalImgs 125 | :return: None 126 | ''' 127 | tic = time.time() 128 | print('Running per image evaluation...') 129 | p = self.params 130 | # add backward compatibility if useSegm is specified in params 131 | if not p.useSegm is None: 132 | p.iouType = 'segm' if p.useSegm == 1 else 'bbox' 133 | print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) 134 | print('Evaluate annotation type *{}*'.format(p.iouType)) 135 | p.imgIds = list(np.unique(p.imgIds)) 136 | if p.useCats: 137 | p.catIds = list(np.unique(p.catIds)) 138 | p.maxDets = sorted(p.maxDets) 139 | self.params=p 140 | 141 | self._prepare() 142 | # loop through images, area range, max detection number 143 | catIds = p.catIds if p.useCats else [-1] 144 | 145 | if p.iouType == 'segm' or p.iouType == 'bbox': 146 | computeIoU = self.computeIoU 147 | elif p.iouType == 'keypoints': 148 | computeIoU = self.computeOks 149 | self.ious = {(imgId, catId): computeIoU(imgId, catId) \ 150 | for imgId in p.imgIds 151 | for catId in catIds} 152 | 153 | evaluateImg = self.evaluateImg 154 | maxDet = p.maxDets[-1] 155 | self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet) 156 | for catId in catIds 157 | for areaRng in p.areaRng 158 | for imgId in p.imgIds 159 | ] 160 | self._paramsEval = copy.deepcopy(self.params) 161 | toc = time.time() 162 | print('DONE (t={:0.2f}s).'.format(toc-tic)) 163 | 164 | def computeIoU(self, imgId, catId): 165 | p = self.params 166 | if p.useCats: 167 | gt = self._gts[imgId,catId] 168 | dt = self._dts[imgId,catId] 169 | else: 170 | gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] 171 | dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] 172 | if len(gt) == 0 and len(dt) ==0: 173 | return [] 174 | inds = np.argsort([-d['score'] for d in dt], kind='mergesort') 175 | dt = [dt[i] for i in inds] 176 | if len(dt) > p.maxDets[-1]: 177 | dt=dt[0:p.maxDets[-1]] 178 | 179 | if p.iouType == 'segm': 180 | g = [g['segmentation'] for g in gt] 181 | d = [d['segmentation'] for d in dt] 182 | elif p.iouType == 'bbox': 183 | g = [g['bbox'] for g in gt] 184 | d = [d['bbox'] for d in dt] 185 | else: 186 | raise Exception('unknown iouType for iou computation') 187 | 188 | # compute iou between each dt and gt region 189 | iscrowd = [int(o['iscrowd']) for o in gt] 190 | ious = maskUtils.iou(d,g,iscrowd) 191 | return ious 192 | 193 | def computeOks(self, imgId, catId): 194 | p = self.params 195 | # dimention here should be Nxm 196 | gts = self._gts[imgId, catId] 197 | dts = self._dts[imgId, catId] 198 | inds = np.argsort([-d['score'] for d in dts], kind='mergesort') 199 | dts = [dts[i] for i in inds] 200 | if len(dts) > p.maxDets[-1]: 201 | dts = dts[0:p.maxDets[-1]] 202 | # if len(gts) == 0 and len(dts) == 0: 203 | if len(gts) == 0 or len(dts) == 0: 204 | return [] 205 | ious = np.zeros((len(dts), len(gts))) 206 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0 207 | vars = (sigmas * 2)**2 208 | k = len(sigmas) 209 | # compute oks between each detection and ground truth object 210 | for j, gt in enumerate(gts): 211 | # create bounds for ignore regions(double the gt bbox) 212 | g = np.array(gt['keypoints']) 213 | xg = g[0::3]; yg = g[1::3]; vg = g[2::3] 214 | k1 = np.count_nonzero(vg > 0) 215 | bb = gt['bbox'] 216 | x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 217 | y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 218 | for i, dt in enumerate(dts): 219 | d = np.array(dt['keypoints']) 220 | xd = d[0::3]; yd = d[1::3] 221 | if k1>0: 222 | # measure the per-keypoint distance if keypoints visible 223 | dx = xd - xg 224 | dy = yd - yg 225 | else: 226 | # measure minimum distance to keypoints in (x0,y0) & (x1,y1) 227 | z = np.zeros((k)) 228 | dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0) 229 | dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0) 230 | e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2 231 | if k1 > 0: 232 | e=e[vg > 0] 233 | ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] 234 | return ious 235 | 236 | def evaluateImg(self, imgId, catId, aRng, maxDet): 237 | ''' 238 | perform evaluation for single category and image 239 | :return: dict (single image results) 240 | ''' 241 | p = self.params 242 | if p.useCats: 243 | gt = self._gts[imgId,catId] 244 | dt = self._dts[imgId,catId] 245 | else: 246 | gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] 247 | dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] 248 | if len(gt) == 0 and len(dt) ==0: 249 | return None 250 | 251 | for g in gt: 252 | if g['ignore'] or (g['area']aRng[1]): 253 | g['_ignore'] = 1 254 | else: 255 | g['_ignore'] = 0 256 | 257 | # sort dt highest score first, sort gt ignore last 258 | gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort') 259 | gt = [gt[i] for i in gtind] 260 | dtind = np.argsort([-d['score'] for d in dt], kind='mergesort') 261 | dt = [dt[i] for i in dtind[0:maxDet]] 262 | iscrowd = [int(o['iscrowd']) for o in gt] 263 | # load computed ious 264 | ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId] 265 | 266 | T = len(p.iouThrs) 267 | G = len(gt) 268 | D = len(dt) 269 | gtm = np.zeros((T,G)) 270 | dtm = np.zeros((T,D)) 271 | gtIg = np.array([g['_ignore'] for g in gt]) 272 | dtIg = np.zeros((T,D)) 273 | if not len(ious)==0: 274 | for tind, t in enumerate(p.iouThrs): 275 | for dind, d in enumerate(dt): 276 | # information about best match so far (m=-1 -> unmatched) 277 | iou = min([t,1-1e-10]) 278 | m = -1 279 | for gind, g in enumerate(gt): 280 | # if this gt already matched, and not a crowd, continue 281 | if gtm[tind,gind]>0 and not iscrowd[gind]: 282 | continue 283 | # if dt matched to reg gt, and on ignore gt, stop 284 | if m>-1 and gtIg[m]==0 and gtIg[gind]==1: 285 | break 286 | # continue to next gt unless better match made 287 | if ious[dind,gind] < iou: 288 | continue 289 | # if match successful and best so far, store appropriately 290 | iou=ious[dind,gind] 291 | m=gind 292 | # if match made store id of match for both dt and gt 293 | if m ==-1: 294 | continue 295 | dtIg[tind,dind] = gtIg[m] 296 | dtm[tind,dind] = gt[m]['id'] 297 | gtm[tind,m] = d['id'] 298 | # set unmatched detections outside of area range to ignore 299 | a = np.array([d['area']aRng[1] for d in dt]).reshape((1, len(dt))) 300 | dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0))) 301 | # store results for given image and category 302 | return { 303 | 'image_id': imgId, 304 | 'category_id': catId, 305 | 'aRng': aRng, 306 | 'maxDet': maxDet, 307 | 'dtIds': [d['id'] for d in dt], 308 | 'gtIds': [g['id'] for g in gt], 309 | 'dtMatches': dtm, 310 | 'gtMatches': gtm, 311 | 'dtScores': [d['score'] for d in dt], 312 | 'gtIgnore': gtIg, 313 | 'dtIgnore': dtIg, 314 | } 315 | 316 | def accumulate(self, p = None): 317 | ''' 318 | Accumulate per image evaluation results and store the result in self.eval 319 | :param p: input params for evaluation 320 | :return: None 321 | ''' 322 | print('Accumulating evaluation results...') 323 | tic = time.time() 324 | if not self.evalImgs: 325 | print('Please run evaluate() first') 326 | # allows input customized parameters 327 | if p is None: 328 | p = self.params 329 | p.catIds = p.catIds if p.useCats == 1 else [-1] 330 | T = len(p.iouThrs) 331 | R = len(p.recThrs) 332 | K = len(p.catIds) if p.useCats else 1 333 | A = len(p.areaRng) 334 | M = len(p.maxDets) 335 | precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories 336 | recall = -np.ones((T,K,A,M)) 337 | 338 | # create dictionary for future indexing 339 | _pe = self._paramsEval 340 | catIds = _pe.catIds if _pe.useCats else [-1] 341 | setK = set(catIds) 342 | setA = set(map(tuple, _pe.areaRng)) 343 | setM = set(_pe.maxDets) 344 | setI = set(_pe.imgIds) 345 | # get inds to evaluate 346 | k_list = [n for n, k in enumerate(p.catIds) if k in setK] 347 | m_list = [m for n, m in enumerate(p.maxDets) if m in setM] 348 | a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] 349 | i_list = [n for n, i in enumerate(p.imgIds) if i in setI] 350 | I0 = len(_pe.imgIds) 351 | A0 = len(_pe.areaRng) 352 | # retrieve E at each category, area range, and max number of detections 353 | for k, k0 in enumerate(k_list): 354 | Nk = k0*A0*I0 355 | for a, a0 in enumerate(a_list): 356 | Na = a0*I0 357 | for m, maxDet in enumerate(m_list): 358 | E = [self.evalImgs[Nk + Na + i] for i in i_list] 359 | E = [e for e in E if not e is None] 360 | if len(E) == 0: 361 | continue 362 | dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) 363 | 364 | # different sorting method generates slightly different results. 365 | # mergesort is used to be consistent as Matlab implementation. 366 | inds = np.argsort(-dtScores, kind='mergesort') 367 | 368 | dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] 369 | dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] 370 | gtIg = np.concatenate([e['gtIgnore'] for e in E]) 371 | npig = np.count_nonzero(gtIg==0 ) 372 | if npig == 0: 373 | continue 374 | tps = np.logical_and( dtm, np.logical_not(dtIg) ) 375 | fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) 376 | 377 | tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) 378 | fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) 379 | for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): 380 | tp = np.array(tp) 381 | fp = np.array(fp) 382 | nd = len(tp) 383 | rc = tp / npig 384 | pr = tp / (fp+tp+np.spacing(1)) 385 | q = np.zeros((R,)) 386 | 387 | if nd: 388 | recall[t,k,a,m] = rc[-1] 389 | else: 390 | recall[t,k,a,m] = 0 391 | 392 | # numpy is slow without cython optimization for accessing elements 393 | # use python array gets significant speed improvement 394 | pr = pr.tolist(); q = q.tolist() 395 | 396 | for i in range(nd-1, 0, -1): 397 | if pr[i] > pr[i-1]: 398 | pr[i-1] = pr[i] 399 | 400 | inds = np.searchsorted(rc, p.recThrs, side='left') 401 | try: 402 | for ri, pi in enumerate(inds): 403 | q[ri] = pr[pi] 404 | except: 405 | pass 406 | precision[t,:,k,a,m] = np.array(q) 407 | self.eval = { 408 | 'params': p, 409 | 'counts': [T, R, K, A, M], 410 | 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 411 | 'precision': precision, 412 | 'recall': recall, 413 | } 414 | toc = time.time() 415 | print('DONE (t={:0.2f}s).'.format( toc-tic)) 416 | 417 | def summarize(self): 418 | ''' 419 | Compute and display summary metrics for evaluation results. 420 | Note this functin can *only* be applied on the default parameter setting 421 | ''' 422 | def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): 423 | p = self.params 424 | iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' 425 | titleStr = 'Average Precision' if ap == 1 else 'Average Recall' 426 | typeStr = '(AP)' if ap==1 else '(AR)' 427 | iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ 428 | if iouThr is None else '{:0.2f}'.format(iouThr) 429 | 430 | aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] 431 | mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] 432 | if ap == 1: 433 | # dimension of precision: [TxRxKxAxM] 434 | s = self.eval['precision'] 435 | # IoU 436 | if iouThr is not None: 437 | t = np.where(iouThr == p.iouThrs)[0] 438 | s = s[t] 439 | s = s[:,:,:,aind,mind] 440 | else: 441 | # dimension of recall: [TxKxAxM] 442 | s = self.eval['recall'] 443 | if iouThr is not None: 444 | t = np.where(iouThr == p.iouThrs)[0] 445 | s = s[t] 446 | s = s[:,:,aind,mind] 447 | if len(s[s>-1])==0: 448 | mean_s = -1 449 | else: 450 | mean_s = np.mean(s[s>-1]) 451 | print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) 452 | return mean_s 453 | def _summarizeDets(): 454 | stats = np.zeros((12,)) 455 | stats[0] = _summarize(1) 456 | stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) 457 | stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) 458 | stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) 459 | stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) 460 | stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) 461 | stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) 462 | stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) 463 | stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) 464 | stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) 465 | stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) 466 | stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) 467 | return stats 468 | def _summarizeKps(): 469 | stats = np.zeros((10,)) 470 | stats[0] = _summarize(1, maxDets=20) 471 | stats[1] = _summarize(1, maxDets=20, iouThr=.5) 472 | stats[2] = _summarize(1, maxDets=20, iouThr=.75) 473 | stats[3] = _summarize(1, maxDets=20, areaRng='medium') 474 | stats[4] = _summarize(1, maxDets=20, areaRng='large') 475 | stats[5] = _summarize(0, maxDets=20) 476 | stats[6] = _summarize(0, maxDets=20, iouThr=.5) 477 | stats[7] = _summarize(0, maxDets=20, iouThr=.75) 478 | stats[8] = _summarize(0, maxDets=20, areaRng='medium') 479 | stats[9] = _summarize(0, maxDets=20, areaRng='large') 480 | return stats 481 | if not self.eval: 482 | raise Exception('Please run accumulate() first') 483 | iouType = self.params.iouType 484 | if iouType == 'segm' or iouType == 'bbox': 485 | summarize = _summarizeDets 486 | elif iouType == 'keypoints': 487 | summarize = _summarizeKps 488 | self.stats = summarize() 489 | 490 | def __str__(self): 491 | self.summarize() 492 | 493 | class Params: 494 | ''' 495 | Params for coco evaluation api 496 | ''' 497 | def setDetParams(self): 498 | self.imgIds = [] 499 | self.catIds = [] 500 | # np.arange causes trouble. the data point on arange is slightly larger than the true value 501 | self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True) 502 | self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True) 503 | self.maxDets = [1, 10, 100] 504 | self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] 505 | self.areaRngLbl = ['all', 'small', 'medium', 'large'] 506 | self.useCats = 1 507 | 508 | def setKpParams(self): 509 | self.imgIds = [] 510 | self.catIds = [] 511 | # np.arange causes trouble. the data point on arange is slightly larger than the true value 512 | self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True) 513 | self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True) 514 | self.maxDets = [20] 515 | self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] 516 | self.areaRngLbl = ['all', 'medium', 'large'] 517 | self.useCats = 1 518 | 519 | def __init__(self, iouType='segm'): 520 | if iouType == 'segm' or iouType == 'bbox': 521 | self.setDetParams() 522 | elif iouType == 'keypoints': 523 | self.setKpParams() 524 | else: 525 | raise Exception('iouType not supported') 526 | self.iouType = iouType 527 | # useSegm is deprecated 528 | self.useSegm = None -------------------------------------------------------------------------------- /pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /pycocotools/mask.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/mask.pyc --------------------------------------------------------------------------------