├── README.md
├── create_coco_tf_record.py
├── dataset_util.py
└── pycocotools
    ├── __init__.py
    ├── __init__.pyc
    ├── __pycache__
        ├── __init__.cpython-35.pyc
        ├── coco.cpython-35.pyc
        └── mask.cpython-35.pyc
    ├── _mask.c
    ├── _mask.cpython-35m-x86_64-linux-gnu.so
    ├── _mask.pyx
    ├── coco.py
    ├── coco.pyc
    ├── cocoeval.py
    ├── mask.py
    └── mask.pyc


/README.md:
--------------------------------------------------------------------------------
 1 | # tensorflow_object_detection_create_coco_tfrecord
 2 | Convert coco dataset to tfrecord for the tensorflow detection API.
 3 | # Attention
 4 | 1) For easy use of this script, Your coco dataset directory struture should like this :
 5 | ```
 6 |     +Your coco dataset root
 7 |         +train2014
 8 |         +val2014
 9 |         +annotations
10 |             -instances_train2014.json
11 |             -instances_val2014.json
12 | ```
13 | 2) To use this script, you should download python coco tools from [coco website ](http://mscoco.org/dataset/#download) and make it.
14 | After make, copy the pycocotools directory to the directory of this "create_coco_tf_record.py"
15 | or add the pycocotools path to  PYTHONPATH of ~/.bashrc file.
16 | **For convientient , I add pycocotools build in my computer to the project directory, you can use it with python3 directly. But if you use python2, build the python coco tool from [!coco](http://mscoco.org/dataset/#download) **
17 | ```
18 |    git clone https://github.com/cocodataset/cocoapi
19 |    cd  cocoapi/PythonAPI/
20 |    make
21 |    pip install Cython
22 |    make
23 |    ls pycocotools/
24 |    cp -rf pycocotools  PATH/tensorflow_object_detection_create_coco_tfrecord/
25 | ```
26 | 
27 | 
28 | # Example usage:
29 | ```
30 |     python create_coco_tf_record.py --data_dir=/path/to/your/coco/root/directory \
31 |         --set=train \
32 |         --output_filepath=/where/you/want/to/save/pascal.record
33 |         --shuffle_imgs=True
34 | ```
35 | 


--------------------------------------------------------------------------------
/create_coco_tf_record.py:
--------------------------------------------------------------------------------
  1 | r"""Convert raw Microsoft COCO dataset to TFRecord for object_detection.
  2 | Attention Please!!!
  3 | 
  4 | 1)For easy use of this script, Your coco dataset directory struture should like this :
  5 |     +Your coco dataset root
  6 |         +train2017
  7 |         +val2017
  8 |         +annotations
  9 |             -instances_train2017.json
 10 |             -instances_val2017.json
 11 | 2)To use this script, you should download python coco tools from "http://mscoco.org/dataset/#download" and make it.
 12 | After make, copy the pycocotools directory to the directory of this "create_coco_tf_record.py"
 13 | or add the pycocotools path to  PYTHONPATH of ~/.bashrc file.
 14 | 
 15 | Example usage:
 16 |     python create_coco_tf_record.py --data_dir=/path/to/your/coco/root/directory \
 17 |         --set=train \
 18 |         --output_path=/where/you/want/to/save/pascal.record
 19 |         --shuffle_imgs=True
 20 | """
 21 | 
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | from pycocotools.coco import COCO
 26 | from PIL import Image
 27 | from random import shuffle
 28 | import os, sys
 29 | import numpy as np
 30 | import tensorflow as tf
 31 | import logging
 32 | 
 33 | import dataset_util
 34 | 
 35 | flags = tf.app.flags
 36 | flags.DEFINE_string('data_dir', '', 'Root directory to raw Microsoft COCO dataset.')
 37 | flags.DEFINE_string('set', 'train', 'Convert training set or validation set')
 38 | flags.DEFINE_string('output_filepath', '', 'Path to output TFRecord')
 39 | flags.DEFINE_bool('shuffle_imgs',True,'whether to shuffle images of coco')
 40 | FLAGS = flags.FLAGS
 41 | 
 42 | 
 43 | def load_coco_dection_dataset(imgs_dir, annotations_filepath, shuffle_img = True ):
 44 |     """Load data from dataset by pycocotools. This tools can be download from "http://mscoco.org/dataset/#download"
 45 |     Args:
 46 |         imgs_dir: directories of coco images
 47 |         annotations_filepath: file path of coco annotations file
 48 |         shuffle_img: wheter to shuffle images order
 49 |     Return:
 50 |         coco_data: list of dictionary format information of each image
 51 |     """
 52 |     coco = COCO(annotations_filepath)
 53 |     img_ids = coco.getImgIds() # totally 82783 images
 54 |     cat_ids = coco.getCatIds() # totally 90 catagories, however, the number of categories is not continuous, \
 55 |                                # [0,12,26,29,30,45,66,68,69,71,83] are missing, this is the problem of coco dataset.
 56 | 
 57 |     if shuffle_img:
 58 |         shuffle(img_ids)
 59 | 
 60 |     coco_data = []
 61 | 
 62 |     nb_imgs = len(img_ids)
 63 |     for index, img_id in enumerate(img_ids):
 64 |         if index % 100 == 0:
 65 |             print("Readling images: %d / %d "%(index, nb_imgs))
 66 |         img_info = {}
 67 |         bboxes = []
 68 |         labels = []
 69 | 
 70 |         img_detail = coco.loadImgs(img_id)[0]
 71 |         pic_height = img_detail['height']
 72 |         pic_width = img_detail['width']
 73 | 
 74 |         ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
 75 |         anns = coco.loadAnns(ann_ids)
 76 |         for ann in anns:
 77 |             bboxes_data = ann['bbox']
 78 |             bboxes_data = [bboxes_data[0]/float(pic_width), bboxes_data[1]/float(pic_height),\
 79 |                                   bboxes_data[2]/float(pic_width), bboxes_data[3]/float(pic_height)]
 80 |                          # the format of coco bounding boxs is [Xmin, Ymin, width, height]
 81 |             bboxes.append(bboxes_data)
 82 |             labels.append(ann['category_id'])
 83 | 
 84 | 
 85 |         img_path = os.path.join(imgs_dir, img_detail['file_name'])
 86 |         img_bytes = tf.gfile.FastGFile(img_path,'rb').read()
 87 | 
 88 |         img_info['pixel_data'] = img_bytes
 89 |         img_info['height'] = pic_height
 90 |         img_info['width'] = pic_width
 91 |         img_info['bboxes'] = bboxes
 92 |         img_info['labels'] = labels
 93 | 
 94 |         coco_data.append(img_info)
 95 |     return coco_data
 96 | 
 97 | 
 98 | def dict_to_coco_example(img_data):
 99 |     """Convert python dictionary formath data of one image to tf.Example proto.
100 |     Args:
101 |         img_data: infomation of one image, inclue bounding box, labels of bounding box,\
102 |             height, width, encoded pixel data.
103 |     Returns:
104 |         example: The converted tf.Example
105 |     """
106 |     bboxes = img_data['bboxes']
107 |     xmin, xmax, ymin, ymax = [], [], [], []
108 |     for bbox in bboxes:
109 |         xmin.append(bbox[0])
110 |         xmax.append(bbox[0] + bbox[2])
111 |         ymin.append(bbox[1])
112 |         ymax.append(bbox[1] + bbox[3])
113 | 
114 |     example = tf.train.Example(features=tf.train.Features(feature={
115 |         'image/height': dataset_util.int64_feature(img_data['height']),
116 |         'image/width': dataset_util.int64_feature(img_data['width']),
117 |         'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
118 |         'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
119 |         'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
120 |         'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
121 |         'image/object/class/label': dataset_util.int64_list_feature(img_data['labels']),
122 |         'image/encoded': dataset_util.bytes_feature(img_data['pixel_data']),
123 |         'image/format': dataset_util.bytes_feature('jpeg'.encode('utf-8')),
124 |     }))
125 |     return example
126 | 
127 | def main(_):
128 |     if FLAGS.set == "train":
129 |         imgs_dir = os.path.join(FLAGS.data_dir, 'train2017')
130 |         annotations_filepath = os.path.join(FLAGS.data_dir,'annotations','instances_train2017.json')
131 |         print("Convert coco train file to tf record")
132 |     elif FLAGS.set == "val":
133 |         imgs_dir = os.path.join(FLAGS.data_dir, 'val2017')
134 |         annotations_filepath = os.path.join(FLAGS.data_dir,'annotations','instances_val2017.json')
135 |         print("Convert coco val file to tf record")
136 |     else:
137 |         raise ValueError("you must either convert train data or val data")
138 |     # load total coco data
139 |     coco_data = load_coco_dection_dataset(imgs_dir,annotations_filepath,shuffle_img=FLAGS.shuffle_imgs)
140 |     total_imgs = len(coco_data)
141 |     # write coco data to tf record
142 |     with tf.python_io.TFRecordWriter(FLAGS.output_filepath) as tfrecord_writer:
143 |         for index, img_data in enumerate(coco_data):
144 |             if index % 100 == 0:
145 |                 print("Converting images: %d / %d" % (index, total_imgs))
146 |             example = dict_to_coco_example(img_data)
147 |             tfrecord_writer.write(example.SerializeToString())
148 | 
149 | 
150 | if __name__ == "__main__":
151 |     tf.app.run()
152 | 


--------------------------------------------------------------------------------
/dataset_util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Utility functions for creating TFRecord data sets."""
17 | 
18 | import tensorflow as tf
19 | 
20 | 
21 | def int64_feature(value):
22 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
23 | 
24 | 
25 | def int64_list_feature(value):
26 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
27 | 
28 | 
29 | def bytes_feature(value):
30 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
31 | 
32 | 
33 | def bytes_list_feature(value):
34 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
35 | 
36 | 
37 | def float_list_feature(value):
38 |   return tf.train.Feature(float_list=tf.train.FloatList(value=value))
39 | 
40 | 
41 | def read_examples_list(path):
42 |   """Read list of training or validation examples.
43 | 
44 |   The file is assumed to contain a single example per line where the first
45 |   token in the line is an identifier that allows us to find the image and
46 |   annotation xml for that example.
47 | 
48 |   For example, the line:
49 |   xyz 3
50 |   would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored).
51 | 
52 |   Args:
53 |     path: absolute path to examples list file.
54 | 
55 |   Returns:
56 |     list of example identifiers (strings).
57 |   """
58 |   with tf.gfile.GFile(path) as fid:
59 |     lines = fid.readlines()
60 |   return [line.strip().split(' ')[0] for line in lines]
61 | 
62 | 
63 | def recursive_parse_xml_to_dict(xml):
64 |   """Recursively parses XML contents to python dict.
65 | 
66 |   We assume that `object` tags are the only ones that can appear
67 |   multiple times at the same level of a tree.
68 | 
69 |   Args:
70 |     xml: xml tree obtained by parsing XML file contents using lxml.etree
71 | 
72 |   Returns:
73 |     Python dictionary holding XML contents.
74 |   """
75 |   if not xml:
76 |     return {xml.tag: xml.text}
77 |   result = {}
78 |   for child in xml:
79 |     child_result = recursive_parse_xml_to_dict(child)
80 |     if child.tag != 'object':
81 |       result[child.tag] = child_result[child.tag]
82 |     else:
83 |       if child.tag not in result:
84 |         result[child.tag] = []
85 |       result[child.tag].append(child_result[child.tag])
86 |   return {xml.tag: result}
87 | 


--------------------------------------------------------------------------------
/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/pycocotools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/__init__.pyc


--------------------------------------------------------------------------------
/pycocotools/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/pycocotools/__pycache__/coco.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/__pycache__/coco.cpython-35.pyc


--------------------------------------------------------------------------------
/pycocotools/__pycache__/mask.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/__pycache__/mask.cpython-35.pyc


--------------------------------------------------------------------------------
/pycocotools/_mask.cpython-35m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/_mask.cpython-35m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/pycocotools/_mask.pyx:
--------------------------------------------------------------------------------
  1 | # distutils: language = c
  2 | # distutils: sources = ../common/maskApi.c
  3 | 
  4 | #**************************************************************************
  5 | # Microsoft COCO Toolbox.      version 2.0
  6 | # Data, paper, and tutorials available at:  http://mscoco.org/
  7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  8 | # Licensed under the Simplified BSD License [see coco/license.txt]
  9 | #**************************************************************************
 10 | 
 11 | __author__ = 'tsungyi'
 12 | 
 13 | import sys
 14 | PYTHON_VERSION = sys.version_info[0]
 15 | 
 16 | # import both Python-level and C-level symbols of Numpy
 17 | # the API uses Numpy to interface C and Python
 18 | import numpy as np
 19 | cimport numpy as np
 20 | from libc.stdlib cimport malloc, free
 21 | 
 22 | # intialized Numpy. must do.
 23 | np.import_array()
 24 | 
 25 | # import numpy C function
 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
 27 | cdef extern from "numpy/arrayobject.h":
 28 |     void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
 29 | 
 30 | # Declare the prototype of the C functions in MaskApi.h
 31 | cdef extern from "maskApi.h":
 32 |     ctypedef unsigned int uint
 33 |     ctypedef unsigned long siz
 34 |     ctypedef unsigned char byte
 35 |     ctypedef double* BB
 36 |     ctypedef struct RLE:
 37 |         siz h,
 38 |         siz w,
 39 |         siz m,
 40 |         uint* cnts,
 41 |     void rlesInit( RLE **R, siz n )
 42 |     void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
 43 |     void rleDecode( const RLE *R, byte *mask, siz n )
 44 |     void rleMerge( const RLE *R, RLE *M, siz n, int intersect )
 45 |     void rleArea( const RLE *R, siz n, uint *a )
 46 |     void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
 47 |     void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
 48 |     void rleToBbox( const RLE *R, BB bb, siz n )
 49 |     void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
 50 |     void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
 51 |     char* rleToString( const RLE *R )
 52 |     void rleFrString( RLE *R, char *s, siz h, siz w )
 53 | 
 54 | # python class to wrap RLE array in C
 55 | # the class handles the memory allocation and deallocation
 56 | cdef class RLEs:
 57 |     cdef RLE *_R
 58 |     cdef siz _n
 59 | 
 60 |     def __cinit__(self, siz n =0):
 61 |         rlesInit(&self._R, n)
 62 |         self._n = n
 63 | 
 64 |     # free the RLE array here
 65 |     def __dealloc__(self):
 66 |         if self._R is not NULL:
 67 |             for i in range(self._n):
 68 |                 free(self._R[i].cnts)
 69 |             free(self._R)
 70 |     def __getattr__(self, key):
 71 |         if key == 'n':
 72 |             return self._n
 73 |         raise AttributeError(key)
 74 | 
 75 | # python class to wrap Mask array in C
 76 | # the class handles the memory allocation and deallocation
 77 | cdef class Masks:
 78 |     cdef byte *_mask
 79 |     cdef siz _h
 80 |     cdef siz _w
 81 |     cdef siz _n
 82 | 
 83 |     def __cinit__(self, h, w, n):
 84 |         self._mask = <byte*> malloc(h*w*n* sizeof(byte))
 85 |         self._h = h
 86 |         self._w = w
 87 |         self._n = n
 88 |     # def __dealloc__(self):
 89 |         # the memory management of _mask has been passed to np.ndarray
 90 |         # it doesn't need to be freed here
 91 | 
 92 |     # called when passing into np.array() and return an np.ndarray in column-major order
 93 |     def __array__(self):
 94 |         cdef np.npy_intp shape[1]
 95 |         shape[0] = <np.npy_intp> self._h*self._w*self._n
 96 |         # Create a 1D array, and reshape it to fortran/Matlab column-major array
 97 |         ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
 98 |         # The _mask allocated by Masks is now handled by ndarray
 99 |         PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
100 |         return ndarray
101 | 
102 | # internal conversion from Python RLEs object to compressed RLE format
103 | def _toString(RLEs Rs):
104 |     cdef siz n = Rs.n
105 |     cdef bytes py_string
106 |     cdef char* c_string
107 |     objs = []
108 |     for i in range(n):
109 |         c_string = rleToString( <RLE*> &Rs._R[i] )
110 |         py_string = c_string
111 |         objs.append({
112 |             'size': [Rs._R[i].h, Rs._R[i].w],
113 |             'counts': py_string
114 |         })
115 |         free(c_string)
116 |     return objs
117 | 
118 | # internal conversion from compressed RLE format to Python RLEs object
119 | def _frString(rleObjs):
120 |     cdef siz n = len(rleObjs)
121 |     Rs = RLEs(n)
122 |     cdef bytes py_string
123 |     cdef char* c_string
124 |     for i, obj in enumerate(rleObjs):
125 |         if PYTHON_VERSION == 2:
126 |             py_string = str(obj['counts']).encode('utf8')
127 |         elif PYTHON_VERSION == 3:
128 |             py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts']
129 |         else:
130 |             raise Exception('Python version must be 2 or 3')
131 |         c_string = py_string
132 |         rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
133 |     return Rs
134 | 
135 | # encode mask to RLEs objects
136 | # list of RLE string can be generated by RLEs member function
137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
138 |     h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
139 |     cdef RLEs Rs = RLEs(n)
140 |     rleEncode(Rs._R,<byte*>mask.data,h,w,n)
141 |     objs = _toString(Rs)
142 |     return objs
143 | 
144 | # decode mask from compressed list of RLE string or RLEs object
145 | def decode(rleObjs):
146 |     cdef RLEs Rs = _frString(rleObjs)
147 |     h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
148 |     masks = Masks(h, w, n)
149 |     rleDecode(<RLE*>Rs._R, masks._mask, n);
150 |     return np.array(masks)
151 | 
152 | def merge(rleObjs, intersect=0):
153 |     cdef RLEs Rs = _frString(rleObjs)
154 |     cdef RLEs R = RLEs(1)
155 |     rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
156 |     obj = _toString(R)[0]
157 |     return obj
158 | 
159 | def area(rleObjs):
160 |     cdef RLEs Rs = _frString(rleObjs)
161 |     cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
162 |     rleArea(Rs._R, Rs._n, _a)
163 |     cdef np.npy_intp shape[1]
164 |     shape[0] = <np.npy_intp> Rs._n
165 |     a = np.array((Rs._n, ), dtype=np.uint8)
166 |     a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
167 |     PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
168 |     return a
169 | 
170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox).
171 | def iou( dt, gt, pyiscrowd ):
172 |     def _preproc(objs):
173 |         if len(objs) == 0:
174 |             return objs
175 |         if type(objs) == np.ndarray:
176 |             if len(objs.shape) == 1:
177 |                 objs = objs.reshape((objs[0], 1))
178 |             # check if it's Nx4 bbox
179 |             if not len(objs.shape) == 2 or not objs.shape[1] == 4:
180 |                 raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
181 |             objs = objs.astype(np.double)
182 |         elif type(objs) == list:
183 |             # check if list is in box format and convert it to np.ndarray
184 |             isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
185 |             isrle = np.all(np.array([type(obj) == dict for obj in objs]))
186 |             if isbox:
187 |                 objs = np.array(objs, dtype=np.double)
188 |                 if len(objs.shape) == 1:
189 |                     objs = objs.reshape((1,objs.shape[0]))
190 |             elif isrle:
191 |                 objs = _frString(objs)
192 |             else:
193 |                 raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
194 |         else:
195 |             raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
196 |         return objs
197 |     def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
198 |         rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
199 |     def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
200 |         bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
201 |     def _len(obj):
202 |         cdef siz N = 0
203 |         if type(obj) == RLEs:
204 |             N = obj.n
205 |         elif len(obj)==0:
206 |             pass
207 |         elif type(obj) == np.ndarray:
208 |             N = obj.shape[0]
209 |         return N
210 |     # convert iscrowd to numpy array
211 |     cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
212 |     # simple type checking
213 |     cdef siz m, n
214 |     dt = _preproc(dt)
215 |     gt = _preproc(gt)
216 |     m = _len(dt)
217 |     n = _len(gt)
218 |     if m == 0 or n == 0:
219 |         return []
220 |     if not type(dt) == type(gt):
221 |         raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
222 | 
223 |     # define local variables
224 |     cdef double* _iou = <double*> 0
225 |     cdef np.npy_intp shape[1]
226 |     # check type and assign iou function
227 |     if type(dt) == RLEs:
228 |         _iouFun = _rleIou
229 |     elif type(dt) == np.ndarray:
230 |         _iouFun = _bbIou
231 |     else:
232 |         raise Exception('input data type not allowed.')
233 |     _iou = <double*> malloc(m*n* sizeof(double))
234 |     iou = np.zeros((m*n, ), dtype=np.double)
235 |     shape[0] = <np.npy_intp> m*n
236 |     iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
237 |     PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
238 |     _iouFun(dt, gt, iscrowd, m, n, iou)
239 |     return iou.reshape((m,n), order='F')
240 | 
241 | def toBbox( rleObjs ):
242 |     cdef RLEs Rs = _frString(rleObjs)
243 |     cdef siz n = Rs.n
244 |     cdef BB _bb = <BB> malloc(4*n* sizeof(double))
245 |     rleToBbox( <const RLE*> Rs._R, _bb, n )
246 |     cdef np.npy_intp shape[1]
247 |     shape[0] = <np.npy_intp> 4*n
248 |     bb = np.array((1,4*n), dtype=np.double)
249 |     bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
250 |     PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
251 |     return bb
252 | 
253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
254 |     cdef siz n = bb.shape[0]
255 |     Rs = RLEs(n)
256 |     rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
257 |     objs = _toString(Rs)
258 |     return objs
259 | 
260 | def frPoly( poly, siz h, siz w ):
261 |     cdef np.ndarray[np.double_t, ndim=1] np_poly
262 |     n = len(poly)
263 |     Rs = RLEs(n)
264 |     for i, p in enumerate(poly):
265 |         np_poly = np.array(p, dtype=np.double, order='F')
266 |         rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, int(len(p)/2), h, w )
267 |     objs = _toString(Rs)
268 |     return objs
269 | 
270 | def frUncompressedRLE(ucRles, siz h, siz w):
271 |     cdef np.ndarray[np.uint32_t, ndim=1] cnts
272 |     cdef RLE R
273 |     cdef uint *data
274 |     n = len(ucRles)
275 |     objs = []
276 |     for i in range(n):
277 |         Rs = RLEs(1)
278 |         cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
279 |         # time for malloc can be saved here but it's fine
280 |         data = <uint*> malloc(len(cnts)* sizeof(uint))
281 |         for j in range(len(cnts)):
282 |             data[j] = <uint> cnts[j]
283 |         R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
284 |         Rs._R[0] = R
285 |         objs.append(_toString(Rs)[0])
286 |     return objs
287 | 
288 | def frPyObjects(pyobj, h, w):
289 |     # encode rle from a list of python objects
290 |     if type(pyobj) == np.ndarray:
291 |         objs = frBbox(pyobj, h, w)
292 |     elif type(pyobj) == list and len(pyobj[0]) == 4:
293 |         objs = frBbox(pyobj, h, w)
294 |     elif type(pyobj) == list and len(pyobj[0]) > 4:
295 |         objs = frPoly(pyobj, h, w)
296 |     elif type(pyobj) == list and type(pyobj[0]) == dict \
297 |         and 'counts' in pyobj[0] and 'size' in pyobj[0]:
298 |         objs = frUncompressedRLE(pyobj, h, w)
299 |     # encode rle from single python object
300 |     elif type(pyobj) == list and len(pyobj) == 4:
301 |         objs = frBbox([pyobj], h, w)[0]
302 |     elif type(pyobj) == list and len(pyobj) > 4:
303 |         objs = frPoly([pyobj], h, w)[0]
304 |     elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj:
305 |         objs = frUncompressedRLE([pyobj], h, w)[0]
306 |     else:
307 |         raise Exception('input type is not supported.')
308 |     return objs
309 | 


--------------------------------------------------------------------------------
/pycocotools/coco.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tylin'
  2 | __version__ = '2.0'
  3 | # Interface for accessing the Microsoft COCO dataset.
  4 | 
  5 | # Microsoft COCO is a large image dataset designed for object detection,
  6 | # segmentation, and caption generation. pycocotools is a Python API that
  7 | # assists in loading, parsing and visualizing the annotations in COCO.
  8 | # Please visit http://mscoco.org/ for more information on COCO, including
  9 | # for the data, paper, and tutorials. The exact format of the annotations
 10 | # is also described on the COCO website. For example usage of the pycocotools
 11 | # please see pycocotools_demo.ipynb. In addition to this API, please download both
 12 | # the COCO images and annotations in order to run the demo.
 13 | 
 14 | # An alternative to using the API is to load the annotations directly
 15 | # into Python dictionary
 16 | # Using the API provides additional utility functions. Note that this API
 17 | # supports both *instance* and *caption* annotations. In the case of
 18 | # captions not all functions are defined (e.g. categories are undefined).
 19 | 
 20 | # The following API functions are defined:
 21 | #  COCO       - COCO api class that loads COCO annotation file and prepare data structures.
 22 | #  decodeMask - Decode binary mask M encoded via run-length encoding.
 23 | #  encodeMask - Encode binary mask M using run-length encoding.
 24 | #  getAnnIds  - Get ann ids that satisfy given filter conditions.
 25 | #  getCatIds  - Get cat ids that satisfy given filter conditions.
 26 | #  getImgIds  - Get img ids that satisfy given filter conditions.
 27 | #  loadAnns   - Load anns with the specified ids.
 28 | #  loadCats   - Load cats with the specified ids.
 29 | #  loadImgs   - Load imgs with the specified ids.
 30 | #  annToMask  - Convert segmentation in an annotation to binary mask.
 31 | #  showAnns   - Display the specified annotations.
 32 | #  loadRes    - Load algorithm results and create API for accessing them.
 33 | #  download   - Download COCO images from mscoco.org server.
 34 | # Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
 35 | # Help on each functions can be accessed by: "help COCO>function".
 36 | 
 37 | # See also COCO>decodeMask,
 38 | # COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
 39 | # COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
 40 | # COCO>loadImgs, COCO>annToMask, COCO>showAnns
 41 | 
 42 | # Microsoft COCO Toolbox.      version 2.0
 43 | # Data, paper, and tutorials available at:  http://mscoco.org/
 44 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
 45 | # Licensed under the Simplified BSD License [see bsd.txt]
 46 | 
 47 | import json
 48 | import time
 49 | import matplotlib.pyplot as plt
 50 | from matplotlib.collections import PatchCollection
 51 | from matplotlib.patches import Polygon
 52 | import numpy as np
 53 | import copy
 54 | import itertools
 55 | from . import mask as maskUtils
 56 | import os
 57 | from collections import defaultdict
 58 | import sys
 59 | PYTHON_VERSION = sys.version_info[0]
 60 | if PYTHON_VERSION == 2:
 61 |     from urllib import urlretrieve
 62 | elif PYTHON_VERSION == 3:
 63 |     from urllib.request import urlretrieve
 64 | 
 65 | class COCO:
 66 |     def __init__(self, annotation_file=None):
 67 |         """
 68 |         Constructor of Microsoft COCO helper class for reading and visualizing annotations.
 69 |         :param annotation_file (str): location of annotation file
 70 |         :param image_folder (str): location to the folder that hosts images.
 71 |         :return:
 72 |         """
 73 |         # load dataset
 74 |         self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
 75 |         self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
 76 |         if not annotation_file == None:
 77 |             print('loading annotations into memory...')
 78 |             tic = time.time()
 79 |             dataset = json.load(open(annotation_file, 'r'))
 80 |             assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
 81 |             print('Done (t={:0.2f}s)'.format(time.time()- tic))
 82 |             self.dataset = dataset
 83 |             self.createIndex()
 84 | 
 85 |     def createIndex(self):
 86 |         # create index
 87 |         print('creating index...')
 88 |         anns, cats, imgs = {}, {}, {}
 89 |         imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
 90 |         if 'annotations' in self.dataset:
 91 |             for ann in self.dataset['annotations']:
 92 |                 imgToAnns[ann['image_id']].append(ann)
 93 |                 anns[ann['id']] = ann
 94 | 
 95 |         if 'images' in self.dataset:
 96 |             for img in self.dataset['images']:
 97 |                 imgs[img['id']] = img
 98 | 
 99 |         if 'categories' in self.dataset:
100 |             for cat in self.dataset['categories']:
101 |                 cats[cat['id']] = cat
102 | 
103 |         if 'annotations' in self.dataset and 'categories' in self.dataset:
104 |             for ann in self.dataset['annotations']:
105 |                 catToImgs[ann['category_id']].append(ann['image_id'])
106 | 
107 |         print('index created!')
108 | 
109 |         # create class members
110 |         self.anns = anns
111 |         self.imgToAnns = imgToAnns
112 |         self.catToImgs = catToImgs
113 |         self.imgs = imgs
114 |         self.cats = cats
115 | 
116 |     def info(self):
117 |         """
118 |         Print information about the annotation file.
119 |         :return:
120 |         """
121 |         for key, value in self.dataset['info'].items():
122 |             print('{}: {}'.format(key, value))
123 | 
124 |     def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
125 |         """
126 |         Get ann ids that satisfy given filter conditions. default skips that filter
127 |         :param imgIds  (int array)     : get anns for given imgs
128 |                catIds  (int array)     : get anns for given cats
129 |                areaRng (float array)   : get anns for given area range (e.g. [0 inf])
130 |                iscrowd (boolean)       : get anns for given crowd label (False or True)
131 |         :return: ids (int array)       : integer array of ann ids
132 |         """
133 |         imgIds = imgIds if type(imgIds) == list else [imgIds]
134 |         catIds = catIds if type(catIds) == list else [catIds]
135 | 
136 |         if len(imgIds) == len(catIds) == len(areaRng) == 0:
137 |             anns = self.dataset['annotations']
138 |         else:
139 |             if not len(imgIds) == 0:
140 |                 lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
141 |                 anns = list(itertools.chain.from_iterable(lists))
142 |             else:
143 |                 anns = self.dataset['annotations']
144 |             anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
145 |             anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
146 |         if not iscrowd == None:
147 |             ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
148 |         else:
149 |             ids = [ann['id'] for ann in anns]
150 |         return ids
151 | 
152 |     def getCatIds(self, catNms=[], supNms=[], catIds=[]):
153 |         """
154 |         filtering parameters. default skips that filter.
155 |         :param catNms (str array)  : get cats for given cat names
156 |         :param supNms (str array)  : get cats for given supercategory names
157 |         :param catIds (int array)  : get cats for given cat ids
158 |         :return: ids (int array)   : integer array of cat ids
159 |         """
160 |         catNms = catNms if type(catNms) == list else [catNms]
161 |         supNms = supNms if type(supNms) == list else [supNms]
162 |         catIds = catIds if type(catIds) == list else [catIds]
163 | 
164 |         if len(catNms) == len(supNms) == len(catIds) == 0:
165 |             cats = self.dataset['categories']
166 |         else:
167 |             cats = self.dataset['categories']
168 |             cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name']          in catNms]
169 |             cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
170 |             cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id']            in catIds]
171 |         ids = [cat['id'] for cat in cats]
172 |         return ids
173 | 
174 |     def getImgIds(self, imgIds=[], catIds=[]):
175 |         '''
176 |         Get img ids that satisfy given filter conditions.
177 |         :param imgIds (int array) : get imgs for given ids
178 |         :param catIds (int array) : get imgs with all given cats
179 |         :return: ids (int array)  : integer array of img ids
180 |         '''
181 |         imgIds = imgIds if type(imgIds) == list else [imgIds]
182 |         catIds = catIds if type(catIds) == list else [catIds]
183 | 
184 |         if len(imgIds) == len(catIds) == 0:
185 |             ids = self.imgs.keys()
186 |         else:
187 |             ids = set(imgIds)
188 |             for i, catId in enumerate(catIds):
189 |                 if i == 0 and len(ids) == 0:
190 |                     ids = set(self.catToImgs[catId])
191 |                 else:
192 |                     ids &= set(self.catToImgs[catId])
193 |         return list(ids)
194 | 
195 |     def loadAnns(self, ids=[]):
196 |         """
197 |         Load anns with the specified ids.
198 |         :param ids (int array)       : integer ids specifying anns
199 |         :return: anns (object array) : loaded ann objects
200 |         """
201 |         if type(ids) == list:
202 |             return [self.anns[id] for id in ids]
203 |         elif type(ids) == int:
204 |             return [self.anns[ids]]
205 | 
206 |     def loadCats(self, ids=[]):
207 |         """
208 |         Load cats with the specified ids.
209 |         :param ids (int array)       : integer ids specifying cats
210 |         :return: cats (object array) : loaded cat objects
211 |         """
212 |         if type(ids) == list:
213 |             return [self.cats[id] for id in ids]
214 |         elif type(ids) == int:
215 |             return [self.cats[ids]]
216 | 
217 |     def loadImgs(self, ids=[]):
218 |         """
219 |         Load anns with the specified ids.
220 |         :param ids (int array)       : integer ids specifying img
221 |         :return: imgs (object array) : loaded img objects
222 |         """
223 |         if type(ids) == list:
224 |             return [self.imgs[id] for id in ids]
225 |         elif type(ids) == int:
226 |             return [self.imgs[ids]]
227 | 
228 |     def showAnns(self, anns):
229 |         """
230 |         Display the specified annotations.
231 |         :param anns (array of object): annotations to display
232 |         :return: None
233 |         """
234 |         if len(anns) == 0:
235 |             return 0
236 |         if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
237 |             datasetType = 'instances'
238 |         elif 'caption' in anns[0]:
239 |             datasetType = 'captions'
240 |         else:
241 |             raise Exception('datasetType not supported')
242 |         if datasetType == 'instances':
243 |             ax = plt.gca()
244 |             ax.set_autoscale_on(False)
245 |             polygons = []
246 |             color = []
247 |             for ann in anns:
248 |                 c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
249 |                 if 'segmentation' in ann:
250 |                     if type(ann['segmentation']) == list:
251 |                         # polygon
252 |                         for seg in ann['segmentation']:
253 |                             poly = np.array(seg).reshape((int(len(seg)/2), 2))
254 |                             polygons.append(Polygon(poly))
255 |                             color.append(c)
256 |                     else:
257 |                         # mask
258 |                         t = self.imgs[ann['image_id']]
259 |                         if type(ann['segmentation']['counts']) == list:
260 |                             rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
261 |                         else:
262 |                             rle = [ann['segmentation']]
263 |                         m = maskUtils.decode(rle)
264 |                         img = np.ones( (m.shape[0], m.shape[1], 3) )
265 |                         if ann['iscrowd'] == 1:
266 |                             color_mask = np.array([2.0,166.0,101.0])/255
267 |                         if ann['iscrowd'] == 0:
268 |                             color_mask = np.random.random((1, 3)).tolist()[0]
269 |                         for i in range(3):
270 |                             img[:,:,i] = color_mask[i]
271 |                         ax.imshow(np.dstack( (img, m*0.5) ))
272 |                 if 'keypoints' in ann and type(ann['keypoints']) == list:
273 |                     # turn skeleton into zero-based index
274 |                     sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
275 |                     kp = np.array(ann['keypoints'])
276 |                     x = kp[0::3]
277 |                     y = kp[1::3]
278 |                     v = kp[2::3]
279 |                     for sk in sks:
280 |                         if np.all(v[sk]>0):
281 |                             plt.plot(x[sk],y[sk], linewidth=3, color=c)
282 |                     plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
283 |                     plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
284 |             p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
285 |             ax.add_collection(p)
286 |             p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
287 |             ax.add_collection(p)
288 |         elif datasetType == 'captions':
289 |             for ann in anns:
290 |                 print(ann['caption'])
291 | 
292 |     def loadRes(self, resFile):
293 |         """
294 |         Load result file and return a result api object.
295 |         :param   resFile (str)     : file name of result file
296 |         :return: res (obj)         : result api object
297 |         """
298 |         res = COCO()
299 |         res.dataset['images'] = [img for img in self.dataset['images']]
300 | 
301 |         print('Loading and preparing results...')
302 |         tic = time.time()
303 |         if type(resFile) == str or type(resFile) == unicode:
304 |             anns = json.load(open(resFile))
305 |         elif type(resFile) == np.ndarray:
306 |             anns = self.loadNumpyAnnotations(resFile)
307 |         else:
308 |             anns = resFile
309 |         assert type(anns) == list, 'results in not an array of objects'
310 |         annsImgIds = [ann['image_id'] for ann in anns]
311 |         assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
312 |                'Results do not correspond to current coco set'
313 |         if 'caption' in anns[0]:
314 |             imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
315 |             res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
316 |             for id, ann in enumerate(anns):
317 |                 ann['id'] = id+1
318 |         elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
319 |             res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
320 |             for id, ann in enumerate(anns):
321 |                 bb = ann['bbox']
322 |                 x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
323 |                 if not 'segmentation' in ann:
324 |                     ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
325 |                 ann['area'] = bb[2]*bb[3]
326 |                 ann['id'] = id+1
327 |                 ann['iscrowd'] = 0
328 |         elif 'segmentation' in anns[0]:
329 |             res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
330 |             for id, ann in enumerate(anns):
331 |                 # now only support compressed RLE format as segmentation results
332 |                 ann['area'] = maskUtils.area(ann['segmentation'])
333 |                 if not 'bbox' in ann:
334 |                     ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
335 |                 ann['id'] = id+1
336 |                 ann['iscrowd'] = 0
337 |         elif 'keypoints' in anns[0]:
338 |             res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
339 |             for id, ann in enumerate(anns):
340 |                 s = ann['keypoints']
341 |                 x = s[0::3]
342 |                 y = s[1::3]
343 |                 x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
344 |                 ann['area'] = (x1-x0)*(y1-y0)
345 |                 ann['id'] = id + 1
346 |                 ann['bbox'] = [x0,y0,x1-x0,y1-y0]
347 |         print('DONE (t={:0.2f}s)'.format(time.time()- tic))
348 | 
349 |         res.dataset['annotations'] = anns
350 |         res.createIndex()
351 |         return res
352 | 
353 |     def download(self, tarDir = None, imgIds = [] ):
354 |         '''
355 |         Download COCO images from mscoco.org server.
356 |         :param tarDir (str): COCO results directory name
357 |                imgIds (list): images to be downloaded
358 |         :return:
359 |         '''
360 |         if tarDir is None:
361 |             print('Please specify target directory')
362 |             return -1
363 |         if len(imgIds) == 0:
364 |             imgs = self.imgs.values()
365 |         else:
366 |             imgs = self.loadImgs(imgIds)
367 |         N = len(imgs)
368 |         if not os.path.exists(tarDir):
369 |             os.makedirs(tarDir)
370 |         for i, img in enumerate(imgs):
371 |             tic = time.time()
372 |             fname = os.path.join(tarDir, img['file_name'])
373 |             if not os.path.exists(fname):
374 |                 urlretrieve(img['coco_url'], fname)
375 |             print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
376 | 
377 |     def loadNumpyAnnotations(self, data):
378 |         """
379 |         Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
380 |         :param  data (numpy.ndarray)
381 |         :return: annotations (python nested list)
382 |         """
383 |         print('Converting ndarray to lists...')
384 |         assert(type(data) == np.ndarray)
385 |         print(data.shape)
386 |         assert(data.shape[1] == 7)
387 |         N = data.shape[0]
388 |         ann = []
389 |         for i in range(N):
390 |             if i % 1000000 == 0:
391 |                 print('{}/{}'.format(i,N))
392 |             ann += [{
393 |                 'image_id'  : int(data[i, 0]),
394 |                 'bbox'  : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
395 |                 'score' : data[i, 5],
396 |                 'category_id': int(data[i, 6]),
397 |                 }]
398 |         return ann
399 | 
400 |     def annToRLE(self, ann):
401 |         """
402 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
403 |         :return: binary mask (numpy 2D array)
404 |         """
405 |         t = self.imgs[ann['image_id']]
406 |         h, w = t['height'], t['width']
407 |         segm = ann['segmentation']
408 |         if type(segm) == list:
409 |             # polygon -- a single object might consist of multiple parts
410 |             # we merge all parts into one mask rle code
411 |             rles = maskUtils.frPyObjects(segm, h, w)
412 |             rle = maskUtils.merge(rles)
413 |         elif type(segm['counts']) == list:
414 |             # uncompressed RLE
415 |             rle = maskUtils.frPyObjects(segm, h, w)
416 |         else:
417 |             # rle
418 |             rle = ann['segmentation']
419 |         return rle
420 | 
421 |     def annToMask(self, ann):
422 |         """
423 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
424 |         :return: binary mask (numpy 2D array)
425 |         """
426 |         rle = self.annToRLE(ann)
427 |         m = maskUtils.decode(rle)
428 |         return m


--------------------------------------------------------------------------------
/pycocotools/coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/coco.pyc


--------------------------------------------------------------------------------
/pycocotools/cocoeval.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import numpy as np
  4 | import datetime
  5 | import time
  6 | from collections import defaultdict
  7 | from . import mask as maskUtils
  8 | import copy
  9 | 
 10 | class COCOeval:
 11 |     # Interface for evaluating detection on the Microsoft COCO dataset.
 12 |     #
 13 |     # The usage for CocoEval is as follows:
 14 |     #  cocoGt=..., cocoDt=...       # load dataset and results
 15 |     #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
 16 |     #  E.params.recThrs = ...;      # set parameters as desired
 17 |     #  E.evaluate();                # run per image evaluation
 18 |     #  E.accumulate();              # accumulate per image results
 19 |     #  E.summarize();               # display summary metrics of results
 20 |     # For example usage see evalDemo.m and http://mscoco.org/.
 21 |     #
 22 |     # The evaluation parameters are as follows (defaults in brackets):
 23 |     #  imgIds     - [all] N img ids to use for evaluation
 24 |     #  catIds     - [all] K cat ids to use for evaluation
 25 |     #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
 26 |     #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
 27 |     #  areaRng    - [...] A=4 object area ranges for evaluation
 28 |     #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
 29 |     #  iouType    - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
 30 |     #  iouType replaced the now DEPRECATED useSegm parameter.
 31 |     #  useCats    - [1] if true use category labels for evaluation
 32 |     # Note: if useCats=0 category labels are ignored as in proposal scoring.
 33 |     # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
 34 |     #
 35 |     # evaluate(): evaluates detections on every image and every category and
 36 |     # concats the results into the "evalImgs" with fields:
 37 |     #  dtIds      - [1xD] id for each of the D detections (dt)
 38 |     #  gtIds      - [1xG] id for each of the G ground truths (gt)
 39 |     #  dtMatches  - [TxD] matching gt id at each IoU or 0
 40 |     #  gtMatches  - [TxG] matching dt id at each IoU or 0
 41 |     #  dtScores   - [1xD] confidence of each dt
 42 |     #  gtIgnore   - [1xG] ignore flag for each gt
 43 |     #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
 44 |     #
 45 |     # accumulate(): accumulates the per-image, per-category evaluation
 46 |     # results in "evalImgs" into the dictionary "eval" with fields:
 47 |     #  params     - parameters used for evaluation
 48 |     #  date       - date evaluation was performed
 49 |     #  counts     - [T,R,K,A,M] parameter dimensions (see above)
 50 |     #  precision  - [TxRxKxAxM] precision for every evaluation setting
 51 |     #  recall     - [TxKxAxM] max recall for every evaluation setting
 52 |     # Note: precision and recall==-1 for settings with no gt objects.
 53 |     #
 54 |     # See also coco, mask, pycocoDemo, pycocoEvalDemo
 55 |     #
 56 |     # Microsoft COCO Toolbox.      version 2.0
 57 |     # Data, paper, and tutorials available at:  http://mscoco.org/
 58 |     # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 59 |     # Licensed under the Simplified BSD License [see coco/license.txt]
 60 |     def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'):
 61 |         '''
 62 |         Initialize CocoEval using coco APIs for gt and dt
 63 |         :param cocoGt: coco object with ground truth annotations
 64 |         :param cocoDt: coco object with detection results
 65 |         :return: None
 66 |         '''
 67 |         if not iouType:
 68 |             print('iouType not specified. use default iouType segm')
 69 |         self.cocoGt   = cocoGt              # ground truth COCO API
 70 |         self.cocoDt   = cocoDt              # detections COCO API
 71 |         self.params   = {}                  # evaluation parameters
 72 |         self.evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
 73 |         self.eval     = {}                  # accumulated evaluation results
 74 |         self._gts = defaultdict(list)       # gt for evaluation
 75 |         self._dts = defaultdict(list)       # dt for evaluation
 76 |         self.params = Params(iouType=iouType) # parameters
 77 |         self._paramsEval = {}               # parameters for evaluation
 78 |         self.stats = []                     # result summarization
 79 |         self.ious = {}                      # ious between all gts and dts
 80 |         if not cocoGt is None:
 81 |             self.params.imgIds = sorted(cocoGt.getImgIds())
 82 |             self.params.catIds = sorted(cocoGt.getCatIds())
 83 | 
 84 | 
 85 |     def _prepare(self):
 86 |         '''
 87 |         Prepare ._gts and ._dts for evaluation based on params
 88 |         :return: None
 89 |         '''
 90 |         def _toMask(anns, coco):
 91 |             # modify ann['segmentation'] by reference
 92 |             for ann in anns:
 93 |                 rle = coco.annToRLE(ann)
 94 |                 ann['segmentation'] = rle
 95 |         p = self.params
 96 |         if p.useCats:
 97 |             gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
 98 |             dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
 99 |         else:
100 |             gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
101 |             dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
102 | 
103 |         # convert ground truth to mask if iouType == 'segm'
104 |         if p.iouType == 'segm':
105 |             _toMask(gts, self.cocoGt)
106 |             _toMask(dts, self.cocoDt)
107 |         # set ignore flag
108 |         for gt in gts:
109 |             gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
110 |             gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
111 |             if p.iouType == 'keypoints':
112 |                 gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
113 |         self._gts = defaultdict(list)       # gt for evaluation
114 |         self._dts = defaultdict(list)       # dt for evaluation
115 |         for gt in gts:
116 |             self._gts[gt['image_id'], gt['category_id']].append(gt)
117 |         for dt in dts:
118 |             self._dts[dt['image_id'], dt['category_id']].append(dt)
119 |         self.evalImgs = defaultdict(list)   # per-image per-category evaluation results
120 |         self.eval     = {}                  # accumulated evaluation results
121 | 
122 |     def evaluate(self):
123 |         '''
124 |         Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
125 |         :return: None
126 |         '''
127 |         tic = time.time()
128 |         print('Running per image evaluation...')
129 |         p = self.params
130 |         # add backward compatibility if useSegm is specified in params
131 |         if not p.useSegm is None:
132 |             p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
133 |             print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
134 |         print('Evaluate annotation type *{}*'.format(p.iouType))
135 |         p.imgIds = list(np.unique(p.imgIds))
136 |         if p.useCats:
137 |             p.catIds = list(np.unique(p.catIds))
138 |         p.maxDets = sorted(p.maxDets)
139 |         self.params=p
140 | 
141 |         self._prepare()
142 |         # loop through images, area range, max detection number
143 |         catIds = p.catIds if p.useCats else [-1]
144 | 
145 |         if p.iouType == 'segm' or p.iouType == 'bbox':
146 |             computeIoU = self.computeIoU
147 |         elif p.iouType == 'keypoints':
148 |             computeIoU = self.computeOks
149 |         self.ious = {(imgId, catId): computeIoU(imgId, catId) \
150 |                         for imgId in p.imgIds
151 |                         for catId in catIds}
152 | 
153 |         evaluateImg = self.evaluateImg
154 |         maxDet = p.maxDets[-1]
155 |         self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
156 |                  for catId in catIds
157 |                  for areaRng in p.areaRng
158 |                  for imgId in p.imgIds
159 |              ]
160 |         self._paramsEval = copy.deepcopy(self.params)
161 |         toc = time.time()
162 |         print('DONE (t={:0.2f}s).'.format(toc-tic))
163 | 
164 |     def computeIoU(self, imgId, catId):
165 |         p = self.params
166 |         if p.useCats:
167 |             gt = self._gts[imgId,catId]
168 |             dt = self._dts[imgId,catId]
169 |         else:
170 |             gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
171 |             dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
172 |         if len(gt) == 0 and len(dt) ==0:
173 |             return []
174 |         inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
175 |         dt = [dt[i] for i in inds]
176 |         if len(dt) > p.maxDets[-1]:
177 |             dt=dt[0:p.maxDets[-1]]
178 | 
179 |         if p.iouType == 'segm':
180 |             g = [g['segmentation'] for g in gt]
181 |             d = [d['segmentation'] for d in dt]
182 |         elif p.iouType == 'bbox':
183 |             g = [g['bbox'] for g in gt]
184 |             d = [d['bbox'] for d in dt]
185 |         else:
186 |             raise Exception('unknown iouType for iou computation')
187 | 
188 |         # compute iou between each dt and gt region
189 |         iscrowd = [int(o['iscrowd']) for o in gt]
190 |         ious = maskUtils.iou(d,g,iscrowd)
191 |         return ious
192 | 
193 |     def computeOks(self, imgId, catId):
194 |         p = self.params
195 |         # dimention here should be Nxm
196 |         gts = self._gts[imgId, catId]
197 |         dts = self._dts[imgId, catId]
198 |         inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
199 |         dts = [dts[i] for i in inds]
200 |         if len(dts) > p.maxDets[-1]:
201 |             dts = dts[0:p.maxDets[-1]]
202 |         # if len(gts) == 0 and len(dts) == 0:
203 |         if len(gts) == 0 or len(dts) == 0:
204 |             return []
205 |         ious = np.zeros((len(dts), len(gts)))
206 |         sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
207 |         vars = (sigmas * 2)**2
208 |         k = len(sigmas)
209 |         # compute oks between each detection and ground truth object
210 |         for j, gt in enumerate(gts):
211 |             # create bounds for ignore regions(double the gt bbox)
212 |             g = np.array(gt['keypoints'])
213 |             xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
214 |             k1 = np.count_nonzero(vg > 0)
215 |             bb = gt['bbox']
216 |             x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
217 |             y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
218 |             for i, dt in enumerate(dts):
219 |                 d = np.array(dt['keypoints'])
220 |                 xd = d[0::3]; yd = d[1::3]
221 |                 if k1>0:
222 |                     # measure the per-keypoint distance if keypoints visible
223 |                     dx = xd - xg
224 |                     dy = yd - yg
225 |                 else:
226 |                     # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
227 |                     z = np.zeros((k))
228 |                     dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
229 |                     dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
230 |                 e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
231 |                 if k1 > 0:
232 |                     e=e[vg > 0]
233 |                 ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
234 |         return ious
235 | 
236 |     def evaluateImg(self, imgId, catId, aRng, maxDet):
237 |         '''
238 |         perform evaluation for single category and image
239 |         :return: dict (single image results)
240 |         '''
241 |         p = self.params
242 |         if p.useCats:
243 |             gt = self._gts[imgId,catId]
244 |             dt = self._dts[imgId,catId]
245 |         else:
246 |             gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
247 |             dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
248 |         if len(gt) == 0 and len(dt) ==0:
249 |             return None
250 | 
251 |         for g in gt:
252 |             if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
253 |                 g['_ignore'] = 1
254 |             else:
255 |                 g['_ignore'] = 0
256 | 
257 |         # sort dt highest score first, sort gt ignore last
258 |         gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
259 |         gt = [gt[i] for i in gtind]
260 |         dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
261 |         dt = [dt[i] for i in dtind[0:maxDet]]
262 |         iscrowd = [int(o['iscrowd']) for o in gt]
263 |         # load computed ious
264 |         ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
265 | 
266 |         T = len(p.iouThrs)
267 |         G = len(gt)
268 |         D = len(dt)
269 |         gtm  = np.zeros((T,G))
270 |         dtm  = np.zeros((T,D))
271 |         gtIg = np.array([g['_ignore'] for g in gt])
272 |         dtIg = np.zeros((T,D))
273 |         if not len(ious)==0:
274 |             for tind, t in enumerate(p.iouThrs):
275 |                 for dind, d in enumerate(dt):
276 |                     # information about best match so far (m=-1 -> unmatched)
277 |                     iou = min([t,1-1e-10])
278 |                     m   = -1
279 |                     for gind, g in enumerate(gt):
280 |                         # if this gt already matched, and not a crowd, continue
281 |                         if gtm[tind,gind]>0 and not iscrowd[gind]:
282 |                             continue
283 |                         # if dt matched to reg gt, and on ignore gt, stop
284 |                         if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
285 |                             break
286 |                         # continue to next gt unless better match made
287 |                         if ious[dind,gind] < iou:
288 |                             continue
289 |                         # if match successful and best so far, store appropriately
290 |                         iou=ious[dind,gind]
291 |                         m=gind
292 |                     # if match made store id of match for both dt and gt
293 |                     if m ==-1:
294 |                         continue
295 |                     dtIg[tind,dind] = gtIg[m]
296 |                     dtm[tind,dind]  = gt[m]['id']
297 |                     gtm[tind,m]     = d['id']
298 |         # set unmatched detections outside of area range to ignore
299 |         a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
300 |         dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
301 |         # store results for given image and category
302 |         return {
303 |                 'image_id':     imgId,
304 |                 'category_id':  catId,
305 |                 'aRng':         aRng,
306 |                 'maxDet':       maxDet,
307 |                 'dtIds':        [d['id'] for d in dt],
308 |                 'gtIds':        [g['id'] for g in gt],
309 |                 'dtMatches':    dtm,
310 |                 'gtMatches':    gtm,
311 |                 'dtScores':     [d['score'] for d in dt],
312 |                 'gtIgnore':     gtIg,
313 |                 'dtIgnore':     dtIg,
314 |             }
315 | 
316 |     def accumulate(self, p = None):
317 |         '''
318 |         Accumulate per image evaluation results and store the result in self.eval
319 |         :param p: input params for evaluation
320 |         :return: None
321 |         '''
322 |         print('Accumulating evaluation results...')
323 |         tic = time.time()
324 |         if not self.evalImgs:
325 |             print('Please run evaluate() first')
326 |         # allows input customized parameters
327 |         if p is None:
328 |             p = self.params
329 |         p.catIds = p.catIds if p.useCats == 1 else [-1]
330 |         T           = len(p.iouThrs)
331 |         R           = len(p.recThrs)
332 |         K           = len(p.catIds) if p.useCats else 1
333 |         A           = len(p.areaRng)
334 |         M           = len(p.maxDets)
335 |         precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
336 |         recall      = -np.ones((T,K,A,M))
337 | 
338 |         # create dictionary for future indexing
339 |         _pe = self._paramsEval
340 |         catIds = _pe.catIds if _pe.useCats else [-1]
341 |         setK = set(catIds)
342 |         setA = set(map(tuple, _pe.areaRng))
343 |         setM = set(_pe.maxDets)
344 |         setI = set(_pe.imgIds)
345 |         # get inds to evaluate
346 |         k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
347 |         m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
348 |         a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
349 |         i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
350 |         I0 = len(_pe.imgIds)
351 |         A0 = len(_pe.areaRng)
352 |         # retrieve E at each category, area range, and max number of detections
353 |         for k, k0 in enumerate(k_list):
354 |             Nk = k0*A0*I0
355 |             for a, a0 in enumerate(a_list):
356 |                 Na = a0*I0
357 |                 for m, maxDet in enumerate(m_list):
358 |                     E = [self.evalImgs[Nk + Na + i] for i in i_list]
359 |                     E = [e for e in E if not e is None]
360 |                     if len(E) == 0:
361 |                         continue
362 |                     dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
363 | 
364 |                     # different sorting method generates slightly different results.
365 |                     # mergesort is used to be consistent as Matlab implementation.
366 |                     inds = np.argsort(-dtScores, kind='mergesort')
367 | 
368 |                     dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
369 |                     dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
370 |                     gtIg = np.concatenate([e['gtIgnore'] for e in E])
371 |                     npig = np.count_nonzero(gtIg==0 )
372 |                     if npig == 0:
373 |                         continue
374 |                     tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
375 |                     fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
376 | 
377 |                     tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
378 |                     fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
379 |                     for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
380 |                         tp = np.array(tp)
381 |                         fp = np.array(fp)
382 |                         nd = len(tp)
383 |                         rc = tp / npig
384 |                         pr = tp / (fp+tp+np.spacing(1))
385 |                         q  = np.zeros((R,))
386 | 
387 |                         if nd:
388 |                             recall[t,k,a,m] = rc[-1]
389 |                         else:
390 |                             recall[t,k,a,m] = 0
391 | 
392 |                         # numpy is slow without cython optimization for accessing elements
393 |                         # use python array gets significant speed improvement
394 |                         pr = pr.tolist(); q = q.tolist()
395 | 
396 |                         for i in range(nd-1, 0, -1):
397 |                             if pr[i] > pr[i-1]:
398 |                                 pr[i-1] = pr[i]
399 | 
400 |                         inds = np.searchsorted(rc, p.recThrs, side='left')
401 |                         try:
402 |                             for ri, pi in enumerate(inds):
403 |                                 q[ri] = pr[pi]
404 |                         except:
405 |                             pass
406 |                         precision[t,:,k,a,m] = np.array(q)
407 |         self.eval = {
408 |             'params': p,
409 |             'counts': [T, R, K, A, M],
410 |             'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
411 |             'precision': precision,
412 |             'recall':   recall,
413 |         }
414 |         toc = time.time()
415 |         print('DONE (t={:0.2f}s).'.format( toc-tic))
416 | 
417 |     def summarize(self):
418 |         '''
419 |         Compute and display summary metrics for evaluation results.
420 |         Note this functin can *only* be applied on the default parameter setting
421 |         '''
422 |         def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
423 |             p = self.params
424 |             iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
425 |             titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
426 |             typeStr = '(AP)' if ap==1 else '(AR)'
427 |             iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
428 |                 if iouThr is None else '{:0.2f}'.format(iouThr)
429 | 
430 |             aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
431 |             mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
432 |             if ap == 1:
433 |                 # dimension of precision: [TxRxKxAxM]
434 |                 s = self.eval['precision']
435 |                 # IoU
436 |                 if iouThr is not None:
437 |                     t = np.where(iouThr == p.iouThrs)[0]
438 |                     s = s[t]
439 |                 s = s[:,:,:,aind,mind]
440 |             else:
441 |                 # dimension of recall: [TxKxAxM]
442 |                 s = self.eval['recall']
443 |                 if iouThr is not None:
444 |                     t = np.where(iouThr == p.iouThrs)[0]
445 |                     s = s[t]
446 |                 s = s[:,:,aind,mind]
447 |             if len(s[s>-1])==0:
448 |                 mean_s = -1
449 |             else:
450 |                 mean_s = np.mean(s[s>-1])
451 |             print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
452 |             return mean_s
453 |         def _summarizeDets():
454 |             stats = np.zeros((12,))
455 |             stats[0] = _summarize(1)
456 |             stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
457 |             stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
458 |             stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
459 |             stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
460 |             stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
461 |             stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
462 |             stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
463 |             stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
464 |             stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
465 |             stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
466 |             stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
467 |             return stats
468 |         def _summarizeKps():
469 |             stats = np.zeros((10,))
470 |             stats[0] = _summarize(1, maxDets=20)
471 |             stats[1] = _summarize(1, maxDets=20, iouThr=.5)
472 |             stats[2] = _summarize(1, maxDets=20, iouThr=.75)
473 |             stats[3] = _summarize(1, maxDets=20, areaRng='medium')
474 |             stats[4] = _summarize(1, maxDets=20, areaRng='large')
475 |             stats[5] = _summarize(0, maxDets=20)
476 |             stats[6] = _summarize(0, maxDets=20, iouThr=.5)
477 |             stats[7] = _summarize(0, maxDets=20, iouThr=.75)
478 |             stats[8] = _summarize(0, maxDets=20, areaRng='medium')
479 |             stats[9] = _summarize(0, maxDets=20, areaRng='large')
480 |             return stats
481 |         if not self.eval:
482 |             raise Exception('Please run accumulate() first')
483 |         iouType = self.params.iouType
484 |         if iouType == 'segm' or iouType == 'bbox':
485 |             summarize = _summarizeDets
486 |         elif iouType == 'keypoints':
487 |             summarize = _summarizeKps
488 |         self.stats = summarize()
489 | 
490 |     def __str__(self):
491 |         self.summarize()
492 | 
493 | class Params:
494 |     '''
495 |     Params for coco evaluation api
496 |     '''
497 |     def setDetParams(self):
498 |         self.imgIds = []
499 |         self.catIds = []
500 |         # np.arange causes trouble.  the data point on arange is slightly larger than the true value
501 |         self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
502 |         self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
503 |         self.maxDets = [1, 10, 100]
504 |         self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
505 |         self.areaRngLbl = ['all', 'small', 'medium', 'large']
506 |         self.useCats = 1
507 | 
508 |     def setKpParams(self):
509 |         self.imgIds = []
510 |         self.catIds = []
511 |         # np.arange causes trouble.  the data point on arange is slightly larger than the true value
512 |         self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
513 |         self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
514 |         self.maxDets = [20]
515 |         self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
516 |         self.areaRngLbl = ['all', 'medium', 'large']
517 |         self.useCats = 1
518 | 
519 |     def __init__(self, iouType='segm'):
520 |         if iouType == 'segm' or iouType == 'bbox':
521 |             self.setDetParams()
522 |         elif iouType == 'keypoints':
523 |             self.setKpParams()
524 |         else:
525 |             raise Exception('iouType not supported')
526 |         self.iouType = iouType
527 |         # useSegm is deprecated
528 |         self.useSegm = None


--------------------------------------------------------------------------------
/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/pycocotools/mask.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaPeak/tensorflow_object_detection_create_coco_tfrecord/6f8bdedd255e0eae4767d62a1ebd670117360dd6/pycocotools/mask.pyc


--------------------------------------------------------------------------------