├── .gitignore
├── Algorithm_DetEva.py
├── Algorithm_IoU.py
├── README.md
├── gt.zip
├── requirements.txt
├── rrc_evaluation_funcs.py
├── script.py
└── submit.zip


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .idea
3 | *.pyc
4 | 
5 | 


--------------------------------------------------------------------------------
/Algorithm_DetEva.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Algorithm named DetEval
  5 | It is slightly different from original algorithm(see https://perso.liris.cnrs.fr/christian.wolf/software/deteval/index.html)
  6 | Please read《 Object Count / Area Graphs for the Evaluation of Object Detection and Segmentation Algorithms 》for details
  7 | '''
  8 | from collections import namedtuple
  9 | import rrc_evaluation_funcs
 10 | import importlib
 11 | 
 12 | def evaluation_imports():
 13 |     """
 14 |     evaluation_imports: Dictionary ( key = module name , value = alias  )  with python modules used in the evaluation.
 15 |     """
 16 |     return {
 17 |         'math': 'math',
 18 |         'numpy': 'np'
 19 |     }
 20 | 
 21 | 
 22 | def default_evaluation_params():
 23 |     """
 24 |     default_evaluation_params: Default parameters to use for the validation and evaluation.
 25 |     """
 26 |     return {
 27 |         'AREA_RECALL_CONSTRAINT': 0.8,
 28 |         'AREA_PRECISION_CONSTRAINT': 0.4,
 29 |         'EV_PARAM_IND_CENTER_DIFF_THR': 1,
 30 |         'MTYPE_OO_O': 1.,
 31 |         'MTYPE_OM_O': 0.8,
 32 |         'MTYPE_OM_M': 1.,
 33 |         'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt',
 34 |         'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt',
 35 |         'CRLF': False  # Lines are delimited by Windows CRLF format
 36 |     }
 37 | 
 38 | 
 39 | def validate_data(gtFilePath, submFilePath, evaluationParams):
 40 |     """
 41 |     Method validate_data: validates that all files in the results folder are correct (have the correct name contents).
 42 |                             Validates also that there are no missing files in the folder.
 43 |                             If some error detected, the method raises the error
 44 |     """
 45 |     gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
 46 | 
 47 |     subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
 48 | 
 49 |     # Validate format of GroundTruth
 50 |     for k in gt:
 51 |         rrc_evaluation_funcs.validate_lines_in_file(k, gt[k], evaluationParams['CRLF'], True, True)
 52 | 
 53 |     # Validate format of results
 54 |     for k in subm:
 55 |         if (k in gt) == False:
 56 |             raise Exception("The sample %s not present in GT" % k)
 57 | 
 58 |         rrc_evaluation_funcs.validate_lines_in_file(k, subm[k], evaluationParams['CRLF'], True, True)
 59 | 
 60 | 
 61 | def evaluate_method(gtFilePath, submFilePath, evaluationParams):
 62 |     """
 63 |     Method evaluate_method: evaluate method and returns the results
 64 |         Results. Dictionary with the following values:
 65 |         - method (required)  Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 }
 66 |         - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 }
 67 |     """
 68 | 
 69 |     for module, alias in evaluation_imports().items():
 70 |         globals()[alias] = importlib.import_module(module)
 71 | 
 72 |     def one_to_one_match(row, col):
 73 |         cont = 0
 74 |         for j in range(len(recallMat[0])):
 75 |             if recallMat[row, j] >= evaluationParams['AREA_RECALL_CONSTRAINT'] and precisionMat[row, j] >= \
 76 |                     evaluationParams['AREA_PRECISION_CONSTRAINT']:
 77 |                 cont = cont + 1
 78 |         if (cont != 1):
 79 |             return False
 80 |         cont = 0
 81 |         for i in range(len(recallMat)):
 82 |             if recallMat[i, col] >= evaluationParams['AREA_RECALL_CONSTRAINT'] and precisionMat[i, col] >= \
 83 |                     evaluationParams['AREA_PRECISION_CONSTRAINT']:
 84 |                 cont = cont + 1
 85 |         if (cont != 1):
 86 |             return False
 87 | 
 88 |         if recallMat[row, col] >= evaluationParams['AREA_RECALL_CONSTRAINT'] and precisionMat[row, col] >= \
 89 |                 evaluationParams['AREA_PRECISION_CONSTRAINT']:
 90 |             return True
 91 |         return False
 92 | 
 93 |     def one_to_many_match(gtNum):
 94 |         many_sum = 0
 95 |         detRects = []
 96 |         for detNum in range(len(recallMat[0])):
 97 |             if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0 and detNum not in detDontCareRectsNum:
 98 |                 if precisionMat[gtNum, detNum] >= evaluationParams['AREA_PRECISION_CONSTRAINT']:
 99 |                     many_sum += recallMat[gtNum, detNum]
100 |                     detRects.append(detNum)
101 |         if many_sum >= evaluationParams['AREA_RECALL_CONSTRAINT']:
102 |             return True, detRects
103 |         else:
104 |             return False, []
105 | 
106 |     def many_to_one_match(detNum):
107 |         many_sum = 0
108 |         gtRects = []
109 |         for gtNum in range(len(recallMat)):
110 |             if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0 and gtNum not in gtDontCareRectsNum:
111 |                 if recallMat[gtNum, detNum] >= evaluationParams['AREA_RECALL_CONSTRAINT']:
112 |                     many_sum += precisionMat[gtNum, detNum]
113 |                     gtRects.append(gtNum)
114 |         if many_sum >= evaluationParams['AREA_PRECISION_CONSTRAINT']:
115 |             return True, gtRects
116 |         else:
117 |             return False, []
118 | 
119 |     def area(a, b):
120 |         dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin) + 1
121 |         dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin) + 1
122 |         if (dx >= 0) and (dy >= 0):
123 |             return dx * dy
124 |         else:
125 |             return 0.
126 | 
127 |     def center(r):
128 |         x = float(r.xmin) + float(r.xmax - r.xmin + 1) / 2.
129 |         y = float(r.ymin) + float(r.ymax - r.ymin + 1) / 2.
130 |         return Point(x, y)
131 | 
132 |     def point_distance(r1, r2):
133 |         distx = math.fabs(r1.x - r2.x)
134 |         disty = math.fabs(r1.y - r2.y)
135 |         return math.sqrt(distx * distx + disty * disty)
136 | 
137 |     def center_distance(r1, r2):
138 |         return point_distance(center(r1), center(r2))
139 | 
140 |     def diag(r):
141 |         w = (r.xmax - r.xmin + 1)
142 |         h = (r.ymax - r.ymin + 1)
143 |         return math.sqrt(h * h + w * w)
144 | 
145 |     perSampleMetrics = {}
146 | 
147 |     methodRecallSum = 0
148 |     methodPrecisionSum = 0
149 | 
150 |     Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
151 |     Point = namedtuple('Point', 'x y')
152 | 
153 |     gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
154 |     subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
155 | 
156 |     numGt = 0
157 |     numDet = 0
158 | 
159 |     for resFile in gt:
160 | 
161 |         gtFile = rrc_evaluation_funcs.decode_utf8(gt[resFile])
162 |         recall = 0
163 |         precision = 0
164 |         hmean = 0
165 |         recallAccum = 0.
166 |         precisionAccum = 0.
167 |         gtRects = []
168 |         detRects = []
169 |         gtPolPoints = []
170 |         detPolPoints = []
171 |         gtDontCareRectsNum = []  # Array of Ground Truth Rectangles' keys marked as don't Care
172 |         detDontCareRectsNum = []  # Array of Detected Rectangles' matched with a don't Care GT
173 |         pairs = []
174 |         evaluationLog = ""
175 | 
176 |         recallMat = np.empty([1, 1])
177 |         precisionMat = np.empty([1, 1])
178 | 
179 |         pointsList, _, transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile,
180 |                                                                                                        evaluationParams[
181 |                                                                                                            'CRLF'],
182 |                                                                                                        True, True,
183 |                                                                                                        False)
184 |         for n in range(len(pointsList)):
185 |             points = pointsList[n]
186 |             transcription = transcriptionsList[n]
187 |             dontCare = transcription == "###"
188 | 
189 |             # convert x1,y1,x2,y2,x3,y3,x4,y4 to xmin,ymin,xmax,ymax
190 |             if len(points) == 8:
191 |                 points_tmp = np.array(points).reshape(4, 2)
192 |                 points_x = points_tmp[:, 0]
193 |                 points_y = points_tmp[:, 1]
194 |                 xmin = points_x[np.argmin(points_x)]
195 |                 xmax = points_x[np.argmax(points_x)]
196 |                 ymin = points_y[np.argmin(points_y)]
197 |                 ymax = points_y[np.argmax(points_y)]
198 |                 points = [xmin, ymin, xmax, ymax]
199 |             gtRect = Rectangle(*points)
200 |             gtRects.append(gtRect)
201 |             gtPolPoints.append(points)
202 |             if dontCare:
203 |                 gtDontCareRectsNum.append(len(gtRects) - 1)
204 | 
205 |         evaluationLog += "GT rectangles: " + str(len(gtRects)) + (
206 |         " (" + str(len(gtDontCareRectsNum)) + " don't care)\n" if len(gtDontCareRectsNum) > 0 else "\n")
207 | 
208 |         if resFile in subm:
209 |             detFile = rrc_evaluation_funcs.decode_utf8(subm[resFile])
210 |             pointsList, _, _ = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(detFile,
211 |                                                                                           evaluationParams['CRLF'],
212 |                                                                                           True, True, False)
213 |             for n in range(len(pointsList)):
214 |                 points = pointsList[n]
215 |                 # print points
216 |                 detRect = Rectangle(*points)
217 |                 detRects.append(detRect)
218 |                 detPolPoints.append(points)
219 |                 if len(gtDontCareRectsNum) > 0:
220 |                     for dontCareRectNum in gtDontCareRectsNum:
221 |                         dontCareRect = gtRects[dontCareRectNum]
222 |                         intersected_area = area(dontCareRect, detRect)
223 |                         rdDimensions = ((detRect.xmax - detRect.xmin + 1) * (detRect.ymax - detRect.ymin + 1))
224 |                         if (rdDimensions == 0):
225 |                             precision = 0
226 |                         else:
227 |                             precision = intersected_area / rdDimensions
228 |                         if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT']):
229 |                             detDontCareRectsNum.append(len(detRects) - 1)
230 |                             break
231 | 
232 |             evaluationLog += "DET rectangles: " + str(len(detRects)) + (
233 |             " (" + str(len(detDontCareRectsNum)) + " don't care)\n" if len(detDontCareRectsNum) > 0 else "\n")
234 | 
235 |             if len(gtRects) == 0:
236 |                 recall = 1
237 |                 precision = 0 if len(detRects) > 0 else 1
238 | 
239 |             if len(detRects) > 0:
240 |                 # Calculate recall and precision matrixs
241 |                 outputShape = [len(gtRects), len(detRects)]
242 |                 recallMat = np.empty(outputShape)
243 |                 precisionMat = np.empty(outputShape)
244 |                 gtRectMat = np.zeros(len(gtRects), np.int8)
245 |                 detRectMat = np.zeros(len(detRects), np.int8)
246 |                 for gtNum in range(len(gtRects)):
247 |                     for detNum in range(len(detRects)):
248 |                         rG = gtRects[gtNum]
249 |                         rD = detRects[detNum]
250 |                         intersected_area = area(rG, rD)
251 |                         rgDimensions = ((rG.xmax - rG.xmin + 1) * (rG.ymax - rG.ymin + 1))
252 |                         rdDimensions = ((rD.xmax - rD.xmin + 1) * (rD.ymax - rD.ymin + 1))
253 |                         recallMat[gtNum, detNum] = 0 if rgDimensions == 0 else  intersected_area / rgDimensions
254 |                         precisionMat[gtNum, detNum] = 0 if rdDimensions == 0 else intersected_area / rdDimensions
255 | 
256 |                 # Find one-to-one matches
257 |                 evaluationLog += "Find one-to-one matches\n"
258 |                 for gtNum in range(len(gtRects)):
259 |                     for detNum in range(len(detRects)):
260 |                         if gtRectMat[gtNum] == 0 and detRectMat[
261 |                             detNum] == 0 and gtNum not in gtDontCareRectsNum and detNum not in detDontCareRectsNum:
262 |                             match = one_to_one_match(gtNum, detNum)
263 |                             if match is True:
264 |                                 rG = gtRects[gtNum]
265 |                                 rD = detRects[detNum]
266 |                                 normDist = center_distance(rG, rD)
267 |                                 normDist /= diag(rG) + diag(rD)
268 |                                 normDist *= 2.0
269 |                                 if normDist < evaluationParams['EV_PARAM_IND_CENTER_DIFF_THR']:
270 |                                     gtRectMat[gtNum] = 1
271 |                                     detRectMat[detNum] = 1
272 |                                     recallAccum += evaluationParams['MTYPE_OO_O']
273 |                                     precisionAccum += evaluationParams['MTYPE_OO_O']
274 |                                     pairs.append({'gt': gtNum, 'det': detNum, 'type': 'OO'})
275 |                                     evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(detNum) + "\n"
276 |                                 else:
277 |                                     evaluationLog += "Match Discarded GT #" + str(gtNum) + " with Det #" + str(
278 |                                         detNum) + " normDist: " + str(normDist) + " \n"
279 |                 # Find one-to-many matches
280 |                 evaluationLog += "Find one-to-many matches\n"
281 |                 for gtNum in range(len(gtRects)):
282 |                     if gtNum not in gtDontCareRectsNum:
283 |                         match, matchesDet = one_to_many_match(gtNum)
284 |                         if match is True:
285 |                             gtRectMat[gtNum] = 1
286 |                             recallAccum += evaluationParams['MTYPE_OM_O']
287 |                             precisionAccum += evaluationParams['MTYPE_OM_O'] * len(matchesDet)
288 |                             pairs.append({'gt': gtNum, 'det': matchesDet, 'type': 'OM'})
289 |                             for detNum in matchesDet:
290 |                                 detRectMat[detNum] = 1
291 |                             evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(matchesDet) + "\n"
292 | 
293 |                             # Find many-to-one matches
294 |                 evaluationLog += "Find many-to-one matches\n"
295 |                 for detNum in range(len(detRects)):
296 |                     if detNum not in detDontCareRectsNum:
297 |                         match, matchesGt = many_to_one_match(detNum)
298 |                         if match is True:
299 |                             detRectMat[detNum] = 1
300 |                             recallAccum += evaluationParams['MTYPE_OM_M'] * len(matchesGt)
301 |                             precisionAccum += evaluationParams['MTYPE_OM_M']
302 |                             pairs.append({'gt': matchesGt, 'det': detNum, 'type': 'MO'})
303 |                             for gtNum in matchesGt:
304 |                                 gtRectMat[gtNum] = 1
305 |                             evaluationLog += "Match GT #" + str(matchesGt) + " with Det #" + str(detNum) + "\n"
306 | 
307 |                 numGtCare = (len(gtRects) - len(gtDontCareRectsNum))
308 |                 if numGtCare == 0:
309 |                     recall = float(1)
310 |                     precision = float(0) if len(detRects) > 0 else float(1)
311 |                 else:
312 |                     recall = float(recallAccum) / numGtCare
313 |                     precision = float(0) if (len(detRects) - len(detDontCareRectsNum)) == 0 else float(
314 |                         precisionAccum) / (len(detRects) - len(detDontCareRectsNum))
315 |                 hmean = 0 if (precision + recall) == 0 else 2.0 * precision * recall / (precision + recall)
316 | 
317 |         evaluationLog += "Recall = " + str(recall) + "\n"
318 |         evaluationLog += "Precision = " + str(precision) + "\n"
319 | 
320 |         methodRecallSum += recallAccum
321 |         methodPrecisionSum += precisionAccum
322 |         numGt += len(gtRects) - len(gtDontCareRectsNum)
323 |         numDet += len(detRects) - len(detDontCareRectsNum)
324 | 
325 |         perSampleMetrics[resFile] = {
326 |             'precision': precision,
327 |             'recall': recall,
328 |             'hmean': hmean,
329 |             'pairs': pairs,
330 |             'recallMat': [] if len(detRects) > 100 else recallMat.tolist(),
331 |             'precisionMat': [] if len(detRects) > 100 else precisionMat.tolist(),
332 |             'gtPolPoints': gtPolPoints,
333 |             'detPolPoints': detPolPoints,
334 |             'gtDontCare': gtDontCareRectsNum,
335 |             'detDontCare': detDontCareRectsNum,
336 |             'evaluationParams': evaluationParams,
337 |             'evaluationLog': evaluationLog
338 |         }
339 | 
340 |     methodRecall = 0 if numGt == 0 else methodRecallSum / numGt
341 |     methodPrecision = 0 if numDet == 0 else methodPrecisionSum / numDet
342 |     methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / (
343 |     methodRecall + methodPrecision)
344 | 
345 |     methodMetrics = {'precision': methodPrecision, 'recall': methodRecall, 'hmean': methodHmean}
346 | 
347 |     resDict = {'calculated': True, 'Message': '', 'method': methodMetrics, 'per_sample': perSampleMetrics}
348 | 
349 |     return resDict
350 | 


--------------------------------------------------------------------------------
/Algorithm_IoU.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | from collections import namedtuple
  4 | import rrc_evaluation_funcs
  5 | from rrc_evaluation_funcs import logger
  6 | import importlib
  7 | import re
  8 | from shapely.geometry import Polygon
  9 | import numpy as np
 10 | 
 11 | 
 12 | def default_evaluation_params():
 13 |     """
 14 |     default_evaluation_params: Default parameters to use for the validation and evaluation.
 15 |     """
 16 |     return {
 17 |         'IOU_CONSTRAINT': 0.5,
 18 |         'AREA_PRECISION_CONSTRAINT': 0.5,
 19 |         'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt',
 20 |         'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt',
 21 |         'LTRB': False,  # LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
 22 |         'CRLF': False,  # Lines are delimited by Windows CRLF format
 23 |         'CONFIDENCES': False,  # Detections must include confidence value. AP will be calculated
 24 |         'PER_SAMPLE_RESULTS': True,  # Generate per sample results and produce data for visualization
 25 |         'E2E': False   #compute average edit distance for end to end evaluation
 26 |     }
 27 | 
 28 | 
 29 | def validate_data(gtFilePath, submFilePath, evaluationParams):
 30 |     """
 31 |     Method validate_data: validates that all files in the results folder are correct (have the correct name contents).
 32 |                             Validates also that there are no missing files in the folder.
 33 |                             If some error detected, the method raises the error
 34 |     """
 35 |     gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
 36 | 
 37 |     subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
 38 | 
 39 |     # Validate format of GroundTruth
 40 |     for k in gt:
 41 |         rrc_evaluation_funcs.validate_lines_in_file(k, gt[k], evaluationParams['CRLF'], evaluationParams['LTRB'], True)
 42 | 
 43 |     # Validate format of results
 44 |     for k in subm:
 45 |         if (k in gt) == False:
 46 |             raise Exception("The sample %s not present in GT" % k)
 47 | 
 48 |         rrc_evaluation_funcs.validate_lines_in_file(k, subm[k], evaluationParams['CRLF'], evaluationParams['LTRB'],
 49 |                                                     evaluationParams['E2E'], evaluationParams['CONFIDENCES'])
 50 | 
 51 | 
 52 | def evaluate_method(gtFilePath, submFilePath, evaluationParams):
 53 |     """
 54 |     Method evaluate_method: evaluate method and returns the results
 55 |         Results. Dictionary with the following values:
 56 |         - method (required)  Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 }
 57 |         - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 }
 58 |     """
 59 |     if evaluationParams['E2E']:
 60 |         from hanziconv import HanziConv
 61 |         import editdistance
 62 | 
 63 |     def polygon_from_points(points):
 64 |         """
 65 |         Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4
 66 |         """
 67 |         resBoxes = np.empty([1, 8], dtype='int32')
 68 |         resBoxes[0, 0] = int(points[0])
 69 |         resBoxes[0, 4] = int(points[1])
 70 |         resBoxes[0, 1] = int(points[2])
 71 |         resBoxes[0, 5] = int(points[3])
 72 |         resBoxes[0, 2] = int(points[4])
 73 |         resBoxes[0, 6] = int(points[5])
 74 |         resBoxes[0, 3] = int(points[6])
 75 |         resBoxes[0, 7] = int(points[7])
 76 |         pointMat = resBoxes[0].reshape([2, 4]).T
 77 |         return Polygon(pointMat)
 78 | 
 79 |     def rectangle_to_polygon(rect):
 80 |         resBoxes = np.empty([1, 8], dtype='int32')
 81 |         resBoxes[0, 0] = int(rect.xmin)
 82 |         resBoxes[0, 4] = int(rect.ymax)
 83 |         resBoxes[0, 1] = int(rect.xmin)
 84 |         resBoxes[0, 5] = int(rect.ymin)
 85 |         resBoxes[0, 2] = int(rect.xmax)
 86 |         resBoxes[0, 6] = int(rect.ymin)
 87 |         resBoxes[0, 3] = int(rect.xmax)
 88 |         resBoxes[0, 7] = int(rect.ymax)
 89 | 
 90 |         pointMat = resBoxes[0].reshape([2, 4]).T
 91 | 
 92 |         return Polygon(pointMat)
 93 | 
 94 |     def rectangle_to_points(rect):
 95 |         points = [int(rect.xmin), int(rect.ymax), int(rect.xmax), int(rect.ymax), int(rect.xmax), int(rect.ymin),
 96 |                   int(rect.xmin), int(rect.ymin)]
 97 |         return points
 98 | 
 99 |     def get_union(pD, pG):
100 |         areaA = pD.area
101 |         areaB = pG.area
102 |         return areaA + areaB - get_intersection(pD, pG)
103 | 
104 |     def get_intersection_over_union(pD, pG):
105 |         try:
106 |             return get_intersection(pD, pG) / get_union(pD, pG)
107 |         except:
108 |             return 0
109 | 
110 |     def get_intersection(pD, pG):
111 |         pInt = pD & pG
112 |         if pInt.is_empty:
113 |             return 0
114 |         return pInt.area
115 | 
116 |     def compute_ap(confList, matchList, numGtCare):
117 |         correct = 0
118 |         AP = 0
119 |         if len(confList) > 0:
120 |             confList = np.array(confList)
121 |             matchList = np.array(matchList)
122 |             sorted_ind = np.argsort(-confList)
123 |             confList = confList[sorted_ind]
124 |             matchList = matchList[sorted_ind]
125 |             for n in range(len(confList)):
126 |                 match = matchList[n]
127 |                 if match:
128 |                     correct += 1
129 |                     AP += float(correct) / (n + 1)
130 | 
131 |             if numGtCare > 0:
132 |                 AP /= numGtCare
133 | 
134 |         return AP
135 | 
136 |     #from RTWC17
137 |     def normalize_txt(st):
138 |         """
139 |         Normalize Chinese text strings by:
140 |           - remove puncutations and other symbols
141 |           - convert traditional Chinese to simplified
142 |           - convert English characters to lower cases
143 |         """
144 |         st = ''.join(st.split(' '))
145 |         st = re.sub("\"", "", st)
146 |         # remove any this not one of Chinese character, ascii 0-9, and ascii a-z and A-Z
147 |         new_st = re.sub(r'[^\u4e00-\u9fa5\u0041-\u005a\u0061-\u007a0-9]+', '', st)
148 |         # convert Traditional Chinese to Simplified Chinese
149 |         new_st = HanziConv.toSimplified(new_st)
150 |         # convert uppercase English letters to lowercase
151 |         new_st = new_st.lower()
152 |         return new_st
153 | 
154 |     def text_distance(str1, str2):
155 |         str1 = normalize_txt(str1)
156 |         str2 = normalize_txt(str2)
157 |         return editdistance.eval(str1, str2)
158 | 
159 |     perSampleMetrics = {}
160 | 
161 |     matchedSum = 0
162 | 
163 |     Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
164 | 
165 |     gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
166 |     subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
167 | 
168 |     numGlobalCareGt = 0
169 |     numGlobalCareDet = 0
170 | 
171 |     arrGlobalConfidences = []
172 |     arrGlobalMatches = []
173 | 
174 |     #total edit distance
175 |     total_dist = 0
176 | 
177 |     for resFile in gt:
178 |         gtFile = rrc_evaluation_funcs.decode_utf8(gt[resFile])
179 | 
180 |         detMatched = 0
181 | 
182 |         iouMat = np.empty([1, 1])
183 | 
184 |         gtPols = []
185 |         detPols = []
186 | 
187 |         gtTrans = []
188 |         detTrans = []
189 | 
190 |         gtPolPoints = []
191 |         detPolPoints = []
192 | 
193 |         # Array of Ground Truth Polygons' keys marked as don't Care
194 |         gtDontCarePolsNum = []
195 |         # Array of Detected Polygons' matched with a don't Care GT
196 |         detDontCarePolsNum = []
197 | 
198 |         pairs = []
199 |         detMatchedNums = []
200 | 
201 |         arrSampleConfidences = []
202 |         arrSampleMatch = []
203 |         sampleAP = 0
204 | 
205 |         example_dist = 0
206 |         match_tuples = []
207 | 
208 |         evaluationLog = ""
209 | 
210 |         pointsList, _, transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True, False)
211 |         for n in range(len(pointsList)):
212 |             points = pointsList[n]
213 |             transcription = transcriptionsList[n]
214 |             dontCare = (transcription == "###") or (transcription=="?")
215 |             if evaluationParams['LTRB']:
216 |                 gtRect = Rectangle(*points)
217 |                 gtPol = rectangle_to_polygon(gtRect)
218 |             else:
219 |                 gtPol = polygon_from_points(points)
220 |             gtPols.append(gtPol)
221 |             gtPolPoints.append(points)
222 |             gtTrans.append(transcription)
223 |             if dontCare:
224 |                 gtDontCarePolsNum.append(len(gtPols) - 1)
225 | 
226 |         evaluationLog += "GT polygons: " + str(len(gtPols)) + (
227 |         " (" + str(len(gtDontCarePolsNum)) + " don't care)\n" if len(gtDontCarePolsNum) > 0 else "\n")
228 | 
229 |         if resFile in subm:
230 | 
231 |             detFile = rrc_evaluation_funcs.decode_utf8(subm[resFile])
232 | 
233 |             pointsList, confidencesList, transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(detFile,evaluationParams['CRLF'],evaluationParams['LTRB'],evaluationParams['E2E'],evaluationParams['CONFIDENCES'])
234 |             for n in range(len(pointsList)):
235 |                 points = pointsList[n]
236 | 
237 |                 if evaluationParams['LTRB']:
238 |                     detRect = Rectangle(*points)
239 |                     detPol = rectangle_to_polygon(detRect)
240 |                 else:
241 |                     detPol = polygon_from_points(points)
242 | 
243 |                 detPols.append(detPol)
244 |                 detPolPoints.append(points)
245 |                 if evaluationParams['E2E']:
246 |                     transcription = transcriptionsList[n]
247 |                     detTrans.append(transcription)
248 |                 if len(gtDontCarePolsNum) > 0:
249 |                     for dontCarePol in gtDontCarePolsNum:
250 |                         dontCarePol = gtPols[dontCarePol]
251 |                         intersected_area = get_intersection(dontCarePol, detPol)
252 |                         pdDimensions = detPol.area
253 |                         precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
254 |                         if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT']):
255 |                             detDontCarePolsNum.append(len(detPols) - 1)
256 |                             break
257 | 
258 |             evaluationLog += "DET polygons: " + str(len(detPols)) + (
259 |             " (" + str(len(detDontCarePolsNum)) + " don't care)\n" if len(detDontCarePolsNum) > 0 else "\n")
260 | 
261 |             if len(gtPols) > 0 and len(detPols) > 0:
262 |                 # Calculate IoU and precision matrixs
263 |                 outputShape = [len(gtPols), len(detPols)]
264 |                 iouMat = np.empty(outputShape)
265 |                 gtRectMat = np.zeros(len(gtPols), np.int8)
266 |                 detRectMat = np.zeros(len(detPols), np.int8)
267 |                 for gtNum in range(len(gtPols)):
268 |                     for detNum in range(len(detPols)):
269 |                         pG = gtPols[gtNum]
270 |                         pD = detPols[detNum]
271 |                         iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG)
272 | 
273 |                 # match dt index of every gt
274 |                 gtMatch = np.empty(len(gtPols), np.int8)
275 |                 gtMatch.fill(-1)
276 |                 # match gt index of every dt
277 |                 dtMatch = np.empty(len(detPols), dtype=np.int8)
278 |                 dtMatch.fill(-1)
279 | 
280 |                 for gtNum in range(len(gtPols)):
281 |                     max_iou = 0
282 |                     match_dt_idx = -1
283 | 
284 |                     for detNum in range(len(detPols)):
285 |                         if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0\
286 |                                 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
287 |                             if iouMat[gtNum, detNum] > evaluationParams['IOU_CONSTRAINT']:
288 |                                 gtRectMat[gtNum] = 1
289 |                                 detRectMat[detNum] = 1
290 |                                 detMatched += 1
291 |                                 pairs.append({'gt': gtNum, 'det': detNum})
292 |                                 detMatchedNums.append(detNum)
293 |                                 evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(detNum) + "\n"
294 | 
295 |                         if evaluationParams['E2E'] and gtMatch[gtNum] == -1 and dtMatch[detNum] == -1\
296 |                                 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
297 |                             if iouMat[gtNum, detNum] > evaluationParams['IOU_CONSTRAINT'] and iouMat[gtNum, detNum] > max_iou:
298 |                                 max_iou = iouMat[gtNum, detNum]
299 |                                 match_dt_idx = detNum
300 | 
301 |                     if evaluationParams['E2E'] and match_dt_idx >= 0:
302 |                         gtMatch[gtNum] = match_dt_idx
303 |                         dtMatch[match_dt_idx] = gtNum
304 | 
305 |                 if evaluationParams['E2E']:
306 |                     for gtNum in range(len(gtPols)):
307 |                         if gtNum in gtDontCarePolsNum:
308 |                             continue
309 |                         gt_text = gtTrans[gtNum]
310 |                         if gtMatch[gtNum] >= 0:
311 |                             dt_text = detTrans[gtMatch[gtNum]]
312 |                         else:
313 |                             dt_text = u''
314 |                         dist = text_distance(gt_text, dt_text)
315 |                         example_dist += dist
316 |                         match_tuples.append((gt_text, dt_text, dist))
317 |                     match_tuples.append(("===============","==============", -1))
318 |                     for detNum in range(len(detPols)):
319 |                         if detNum in detDontCarePolsNum:
320 |                             continue
321 |                         if dtMatch[detNum] == -1:
322 |                             gt_text = u''
323 |                             dt_text = detTrans[detNum]
324 |                             dist = text_distance(gt_text, dt_text)
325 |                             example_dist += dist
326 |                             match_tuples.append((gt_text, dt_text, dist))
327 | 
328 |             if evaluationParams['CONFIDENCES']:
329 |                 for detNum in range(len(detPols)):
330 |                     if detNum not in detDontCarePolsNum:
331 |                         # we exclude the don't care detections
332 |                         match = detNum in detMatchedNums
333 | 
334 |                         arrSampleConfidences.append(confidencesList[detNum])
335 |                         arrSampleMatch.append(match)
336 | 
337 |                         arrGlobalConfidences.append(confidencesList[detNum])
338 |                         arrGlobalMatches.append(match)
339 |         #avoid when det file don't exist, example_dist=0
340 |         elif evaluationParams['E2E']:
341 |             match_tuples.append(("===============", "==============", -1))
342 |             dt_text = u''
343 |             for gtNum in range(len(gtPols)):
344 |                 if gtNum in gtDontCarePolsNum:
345 |                     continue
346 |                 gt_text = gtTrans[gtNum]
347 |                 dist = text_distance(gt_text, dt_text)
348 |                 example_dist += dist
349 |                 match_tuples.append((gt_text, dt_text, dist))
350 |         total_dist += example_dist
351 | 
352 |         if evaluationParams['E2E']:
353 |             logger.debug('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
354 |             logger.debug("file:{}".format(resFile))
355 |             for tp in match_tuples:
356 |                 gt_text, dt_text, dist = tp
357 |                 logger.debug(u'GT: "{}" matched to DT: "{}", distance = {}'.format(gt_text, dt_text, dist))
358 |             logger.debug('Distance = {:f}'.format(example_dist))
359 |             logger.debug('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
360 | 
361 |         numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
362 |         numDetCare = (len(detPols) - len(detDontCarePolsNum))
363 |         if numGtCare == 0:
364 |             recall = float(1)
365 |             precision = float(0) if numDetCare > 0 else float(1)
366 |             sampleAP = precision
367 |         else:
368 |             recall = float(detMatched) / numGtCare
369 |             precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
370 |             if evaluationParams['CONFIDENCES'] and evaluationParams['PER_SAMPLE_RESULTS']:
371 |                 sampleAP = compute_ap(arrSampleConfidences, arrSampleMatch, numGtCare)
372 | 
373 |         hmean = 0 if (precision + recall) == 0 else 2.0 * precision * recall / (precision + recall)
374 |         matchedSum += detMatched
375 |         numGlobalCareGt += numGtCare
376 |         numGlobalCareDet += numDetCare
377 |         if evaluationParams['PER_SAMPLE_RESULTS']:
378 |             perSampleMetrics[resFile] = {
379 |                 'precision': precision,
380 |                 'recall': recall,
381 |                 'hmean': hmean,
382 |                 'pairs': pairs,
383 |                 'AP': sampleAP,
384 |                 'iouMat': [] if len(detPols) > 100 else iouMat.tolist(),
385 |                 'gtPolPoints': gtPolPoints,
386 |                 'detPolPoints': detPolPoints,
387 |                 'gtDontCare': gtDontCarePolsNum,
388 |                 'detDontCare': detDontCarePolsNum,
389 |                 'evaluationParams': evaluationParams,
390 |                 'evaluationLog': evaluationLog
391 |             }
392 |             if evaluationParams['E2E']:
393 |                 perSampleMetrics[resFile]['exampleDistance'] = example_dist
394 |                 # print("file:{} exampleDistance:{}".format(resFile,example_dist))
395 | 
396 |     # Compute MAP and MAR
397 |     AP = 0
398 |     if evaluationParams['CONFIDENCES']:
399 |         AP = compute_ap(arrGlobalConfidences, arrGlobalMatches, numGlobalCareGt)
400 | 
401 |     methodRecall = 0 if numGlobalCareGt == 0 else float(matchedSum) / numGlobalCareGt
402 |     methodPrecision = 0 if numGlobalCareDet == 0 else float(matchedSum) / numGlobalCareDet
403 |     methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / (
404 |     methodRecall + methodPrecision)
405 |     methodDistance = 0 if len(gt) == 0 else float(total_dist)/len(gt)
406 | 
407 |     methodMetrics = {'precision': methodPrecision, 'recall': methodRecall, 'hmean': methodHmean, 'AP': AP, 'distance': methodDistance}
408 | 
409 |     resDict = {'calculated': True, 'Message': '', 'method': methodMetrics, 'per_sample': perSampleMetrics}
410 | 
411 |     return resDict
412 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | MODIFIED BY [ICDAR CODES](http://rrc.cvc.uab.es/?com=introduction)
 2 | INSTRUCTIONS FOR THE STANDALONE SCRIPTS
 3 | 
 4 | ### HOW TO USE
 5 | Requirements:
 6 | - Python version 3.6+
 7 | - pip install -r requirements.txt
 8 | - Each Task requires different Python modules. When running the script, if some module is not installed you will see a notification and installation instructions.
 9 |  
10 | Procedure:
11 | Download the ZIP file for the requested script and unzip it to a directory.
12 |  
13 | Open a terminal in the directory and run the command:
14 | 
15 | **python script.py –g gt.zip –s submit.zip -p '{\"LTRB\":true,\"E2E\":true}'**
16 | 
17 | Pycharm IDE use parameters
18 | 
19 | **-g gt.zip -s submit.zip -p {\\"LTRB\\":true,\\"E2E\\":true}**
20 |  
21 | If you have already installed all the required modules, then you will see the method’s results or an error message if the submitted file is not correct.
22 |  
23 | parameters:
24 | 
25 | -g: Path of the Ground Truth file. In most cases, the Ground Truth will be included in the same Zip file named 'gt.zip', gt.txt' or 'gt.json'. If not, you will be able to get it on the Downloads page of the Task.
26 | 
27 | -s: Path of your method's results file.
28 |  
29 | Optional parameters:
30 | 
31 | -o: Path to a directory where to copy the file ‘results.zip’ that contains per-sample results.
32 | 
33 | -p: JSON string parameters to override the script default parameters. The parameters that can be overrided are inside the function 'default_evaluation_params' located at the begining of the evaluation Script.
34 | 
35 | 
36 | ```
37 | when use Algorithm_IOU -p
38 | 'IOU_CONSTRAINT': 0.5,
39 | 'AREA_PRECISION_CONSTRAINT': 0.5,
40 | 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt',
41 | 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt',
42 | 'LTRB': False,  # LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
43 | 'CRLF': False,  # Lines are delimited by Windows CRLF format
44 | 'CONFIDENCES': False,  # Detections must include confidence value. AP will be calculated
45 | 'PER_SAMPLE_RESULTS': True,  # Generate per sample results and produce data for visualization
46 | 'E2E': False   #compute average edit distance for end to end evaluation
47 | ```
48 | ```
49 | when use Algorithm_DetEva -p
50 | 'AREA_RECALL_CONSTRAINT': 0.8,
51 | 'AREA_PRECISION_CONSTRAINT': 0.4,
52 | 'EV_PARAM_IND_CENTER_DIFF_THR': 1,
53 | 'MTYPE_OO_O': 1.,
54 | 'MTYPE_OM_O': 0.8,
55 | 'MTYPE_OM_M': 1.,
56 | 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt',
57 | 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt',
58 | 'CRLF': False  # Lines are delimited by Windows CRLF format
59 | ```
60 | 
61 | -c: choose algorithm for differet tasks.(Challenges 1、2 use 'DetEva' Challenges 4 use 'IoU', default 'IoU')
62 |  
63 | **Example: python script.py –g gt.zip –s submit.zip –o ./ -p  '{\"CRLF\":true}' -c DetEva**
64 | 
65 | 
66 | ### THEORY
67 | see [my blog](https://blog.csdn.net/liuxiaoheng1992/article/details/82632594)
68 | 
69 | ***If this repository helps you，please star it. Thanks.***
70 | 


--------------------------------------------------------------------------------
/gt.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelHL-ai/OCR_EVALUATION/5dfb9f4e507a44994bec0c67e54d81970683db0b/gt.zip


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | hanziconv
2 | editdistance
3 | numpy
4 | shapely


--------------------------------------------------------------------------------
/rrc_evaluation_funcs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # encoding: UTF-8
  3 | import json
  4 | import sys
  5 | 
  6 | sys.path.append('./')
  7 | import zipfile
  8 | import re
  9 | import sys
 10 | import os
 11 | import codecs
 12 | import logging
 13 | 
 14 | logging.basicConfig()
 15 | logger = logging.getLogger()
 16 | logger.setLevel(logging.INFO)
 17 | 
 18 | 
 19 | def load_zip_file_keys(file, fileNameRegExp=''):
 20 |     """
 21 |     Returns an array with the entries of the ZIP file that match with the regular expression.
 22 |     The key's are the names or the file or the capturing group definied in the fileNameRegExp
 23 |     """
 24 |     try:
 25 |         archive = zipfile.ZipFile(file, mode='r', allowZip64=True)
 26 |     except:
 27 |         raise Exception('Error loading the ZIP archive.')
 28 | 
 29 |     pairs = []
 30 | 
 31 |     for name in archive.namelist():
 32 |         addFile = True
 33 |         keyName = name
 34 |         if fileNameRegExp != "":
 35 |             m = re.match(fileNameRegExp, name)
 36 |             if m == None:
 37 |                 addFile = False
 38 |             else:
 39 |                 if len(m.groups()) > 0:
 40 |                     keyName = m.group(1)
 41 | 
 42 |         if addFile:
 43 |             pairs.append(keyName)
 44 | 
 45 |     return pairs
 46 | 
 47 | 
 48 | def load_zip_file(file, fileNameRegExp='', allEntries=False):
 49 |     """
 50 |     Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
 51 |     The key's are the names or the file or the capturing group definied in the fileNameRegExp
 52 |     allEntries validates that all entries in the ZIP file pass the fileNameRegExp
 53 |     """
 54 |     try:
 55 |         archive = zipfile.ZipFile(file, mode='r', allowZip64=True)
 56 |     except:
 57 |         raise Exception('Error loading the ZIP archive')
 58 | 
 59 |     pairs = []
 60 |     for name in archive.namelist():
 61 |         addFile = True
 62 |         keyName = name
 63 |         if fileNameRegExp != "":
 64 |             m = re.match(fileNameRegExp, name)
 65 |             if m == None:
 66 |                 addFile = False
 67 |             else:
 68 |                 if len(m.groups()) > 0:
 69 |                     keyName = m.group(1)
 70 | 
 71 |         if addFile:
 72 |             pairs.append([keyName, archive.read(name)])
 73 |         else:
 74 |             if allEntries:
 75 |                 raise Exception('ZIP entry not valid: %s' % name)
 76 | 
 77 |     return dict(pairs)
 78 | 
 79 | 
 80 | def decode_utf8(raw):
 81 |     """
 82 |     Returns a Unicode object on success, or None on failure
 83 |     """
 84 |     try:
 85 |         raw = codecs.decode(raw, 'utf-8', 'replace')
 86 |         # extracts BOM if exists
 87 |         raw = raw.encode('utf8')
 88 |         if raw.startswith(codecs.BOM_UTF8):
 89 |             raw = raw.replace(codecs.BOM_UTF8, '', 1)
 90 |         return raw.decode('utf-8')
 91 |     except:
 92 |         return None
 93 | 
 94 | 
 95 | def validate_lines_in_file(fileName, file_contents, CRLF=True, LTRB=True, withTranscription=False, withConfidence=False,
 96 |                            imWidth=0, imHeight=0):
 97 |     """
 98 |     This function validates that all lines of the file calling the Line validation function for each line
 99 |     """
100 |     utf8File = decode_utf8(file_contents)
101 |     if (utf8File is None):
102 |         raise Exception("The file %s is not UTF-8" % fileName)
103 | 
104 |     lines = utf8File.split("\r\n" if CRLF else "\n")
105 |     for line in lines:
106 |         line = line.replace("\r", "").replace("\n", "")
107 |         if (line != ""):
108 |             try:
109 |                 validate_tl_line(line, LTRB, withTranscription, withConfidence, imWidth, imHeight)
110 |             except Exception as e:
111 |                 raise Exception(
112 |                     ("Line in sample not valid. Sample: %s Line: %s Error: %s" % (fileName, line, str(e))).encode(
113 |                         'utf-8', 'replace'))
114 | 
115 | 
116 | def validate_tl_line(line, LTRB=True, withTranscription=True, withConfidence=True, imWidth=0, imHeight=0):
117 |     """
118 |     Validate the format of the line. If the line is not valid an exception will be raised.
119 |     If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
120 |     Posible values are:
121 |     LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
122 |     LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
123 |     """
124 |     get_tl_line_values(line, LTRB, withTranscription, withConfidence, imWidth, imHeight)
125 | 
126 | 
127 | def get_tl_line_values(line, LTRB=True, withTranscription=False, withConfidence=False, imWidth=0, imHeight=0):
128 |     """
129 |     Validate the format of the line. If the line is not valid an exception will be raised.
130 |     If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
131 |     Posible values are:
132 |     LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
133 |     LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
134 |     Returns values from a textline. Points , [Confidences], [Transcriptions]
135 |     """
136 |     confidence = 0.0
137 |     transcription = ""
138 |     points = []
139 | 
140 |     numPoints = 4
141 | 
142 |     if LTRB:
143 | 
144 |         numPoints = 4
145 | 
146 |         if withTranscription and withConfidence:
147 |             m = re.match(
148 |                 r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',
149 |                 line)
150 |             if m == None:
151 |                 raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence,transcription")
152 |         elif withConfidence:
153 |             m = re.match(
154 |                 r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*([0-1].?[0-9]*)\s*$',
155 |                 line)
156 |             if m == None:
157 |                 raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence")
158 |         elif withTranscription:
159 |             m = re.match(
160 |                 r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,(.*)$',
161 |                 line)
162 |             if m == None:
163 |                 raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,transcription")
164 |         else:
165 |             m = re.match(
166 |                 r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,?\s*$',
167 |                 line)
168 |             if m == None:
169 |                 raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax")
170 | 
171 |         xmin = float(m.group(1))
172 |         ymin = float(m.group(2))
173 |         xmax = float(m.group(3))
174 |         ymax = float(m.group(4))
175 |         if (xmax < xmin):
176 |             raise Exception("Xmax value (%s) not valid (Xmax < Xmin)." % (xmax))
177 |         if (ymax < ymin):
178 |             raise Exception("Ymax value (%s)  not valid (Ymax < Ymin)." % (ymax))
179 | 
180 |         points = [float(m.group(i)) for i in range(1, (numPoints + 1))]
181 | 
182 |         if (imWidth > 0 and imHeight > 0):
183 |             validate_point_inside_bounds(xmin, ymin, imWidth, imHeight)
184 |             validate_point_inside_bounds(xmax, ymax, imWidth, imHeight)
185 | 
186 |     else:
187 | 
188 |         numPoints = 8
189 | 
190 |         if withTranscription and withConfidence:
191 |             m = re.match(
192 |                 r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',
193 |                 line)
194 |             if m == None:
195 |                 raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence,transcription")
196 |         elif withConfidence:
197 |             m = re.match(
198 |                 r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*([0-1].?[0-9]*)\s*$',
199 |                 line)
200 |             if m == None:
201 |                 raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence")
202 |         elif withTranscription:
203 |             m = re.match(
204 |                 r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,(.*)$',
205 |                 line)
206 |             if m == None:
207 |                 raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,transcription")
208 |         else:
209 |             m = re.match(
210 |                 r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*$',
211 |                 line)
212 |             if m == None:
213 |                 raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4")
214 | 
215 |         points = [float(m.group(i)) for i in range(1, (numPoints + 1))]
216 | 
217 |         isClockwise = validate_clockwise_points(points)
218 |         if not isClockwise:
219 |             # convert anticlockwise to clockwise sequence
220 |             points = [points[0], points[1], points[6], points[7], points[4], points[5], points[2], points[3]]
221 | 
222 |         if (imWidth > 0 and imHeight > 0):
223 |             validate_point_inside_bounds(points[0], points[1], imWidth, imHeight)
224 |             validate_point_inside_bounds(points[2], points[3], imWidth, imHeight)
225 |             validate_point_inside_bounds(points[4], points[5], imWidth, imHeight)
226 |             validate_point_inside_bounds(points[6], points[7], imWidth, imHeight)
227 | 
228 |     if withConfidence:
229 |         try:
230 |             confidence = float(m.group(numPoints + 1))
231 |         except ValueError:
232 |             raise Exception("Confidence value must be a float")
233 | 
234 |     if withTranscription:
235 |         posTranscription = numPoints + (2 if withConfidence else 1)
236 |         transcription = m.group(posTranscription)
237 |         m2 = re.match(r'^\s*\"(.*)\"\s*$', transcription)
238 |         if m2 != None:  # Transcription with double quotes, we extract the value and replace escaped characters
239 |             transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
240 | 
241 |     return points, confidence, transcription
242 | 
243 | 
244 | def validate_point_inside_bounds(x, y, imWidth, imHeight):
245 |     if (x < 0 or x > imWidth):
246 |         raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" % (x, imWidth, imHeight))
247 |     if (y < 0 or y > imHeight):
248 |         raise Exception(
249 |             "Y value (%s)  not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" % (y, imWidth, imHeight))
250 | 
251 | 
252 | def validate_clockwise_points(points):
253 |     """
254 |     Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
255 |     """
256 | 
257 |     if len(points) != 8:
258 |         raise Exception("Points list not valid." + str(len(points)))
259 | 
260 |     point = [
261 |         [int(points[0]), int(points[1])],
262 |         [int(points[2]), int(points[3])],
263 |         [int(points[4]), int(points[5])],
264 |         [int(points[6]), int(points[7])]
265 |     ]
266 |     edge = [
267 |         (point[1][0] - point[0][0]) * (point[1][1] + point[0][1]),
268 |         (point[2][0] - point[1][0]) * (point[2][1] + point[1][1]),
269 |         (point[3][0] - point[2][0]) * (point[3][1] + point[2][1]),
270 |         (point[0][0] - point[3][0]) * (point[0][1] + point[3][1])
271 |     ]
272 | 
273 |     summatory = edge[0] + edge[1] + edge[2] + edge[3]
274 |     if summatory > 0:
275 |         logger.debug(
276 |             "Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
277 |         return False
278 |     return True
279 | 
280 | 
281 | def get_tl_line_values_from_file_contents(content, CRLF=True, LTRB=True, withTranscription=False, withConfidence=False,
282 |                                           imWidth=0, imHeight=0, sort_by_confidences=True):
283 |     """
284 |     Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
285 |     xmin,ymin,xmax,ymax,[confidence],[transcription]
286 |     x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
287 |     """
288 |     pointsList = []
289 |     transcriptionsList = []
290 |     confidencesList = []
291 | 
292 |     lines = content.split("\r\n" if CRLF else "\n")
293 |     for line in lines:
294 |         line = line.replace("\r", "").replace("\n", "")
295 |         if (line != ""):
296 |             points, confidence, transcription = get_tl_line_values(line, LTRB, withTranscription, withConfidence,
297 |                                                                    imWidth, imHeight)
298 |             pointsList.append(points)
299 |             transcriptionsList.append(transcription)
300 |             confidencesList.append(confidence)
301 | 
302 |     if withConfidence and len(confidencesList) > 0 and sort_by_confidences:
303 |         import numpy as np
304 |         sorted_ind = np.argsort(-np.array(confidencesList))
305 |         confidencesList = [confidencesList[i] for i in sorted_ind]
306 |         pointsList = [pointsList[i] for i in sorted_ind]
307 |         transcriptionsList = [transcriptionsList[i] for i in sorted_ind]
308 | 
309 |     return pointsList, confidencesList, transcriptionsList
310 | 
311 | 
312 | def main_evaluation(args, default_evaluation_params_fn, validate_data_fn, evaluate_method_fn, show_result=True,
313 |                     per_sample=True):
314 |     """
315 |     This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
316 |     Params:
317 |     args:
318 |     -g for ground truth,
319 |     -s for detect result,
320 |     -p The parameters that can be overrided are inside the function 'default_evaluation_params' located at the begining of the evaluation Script,
321 |     -o Path to a directory where to copy the file ‘results.zip’ that contains per-sample results,
322 |     evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
323 |     """
324 |     evalParams = default_evaluation_params_fn()
325 |     if args.p:
326 |         evalParams.update(json.loads(args.p))
327 | 
328 |     resDict = {'calculated': True, 'Message': '', 'method': '{}', 'per_sample': '{}'}
329 |     try:
330 |         validate_data_fn(args.g, args.s, evalParams)
331 |         evalData = evaluate_method_fn(args.g, args.s, evalParams)
332 |         resDict.update(evalData)
333 | 
334 |     except Exception as e:
335 |         import traceback
336 |         traceback.print_exc()
337 |         resDict['Message'] = str(e)
338 |         resDict['calculated'] = False
339 | 
340 |     if args.o:
341 |         if not os.path.exists(args.o):
342 |             os.makedirs(args.o)
343 | 
344 |         resultsOutputname = args.o + '/results.zip'
345 |         outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
346 | 
347 |         del resDict['per_sample']
348 |         if 'output_items' in resDict.keys():
349 |             del resDict['output_items']
350 | 
351 |         outZip.writestr('method.json', json.dumps(resDict))
352 | 
353 |     if not resDict['calculated']:
354 |         if show_result:
355 |             sys.stderr.write('Error!\n' + resDict['Message'] + '\n\n')
356 |         if args.o:
357 |             outZip.close()
358 |         return resDict
359 | 
360 |     if args.o:
361 |         if per_sample == True:
362 |             for k, v in evalData['per_sample'].iteritems():
363 |                 outZip.writestr(k + '.json', json.dumps(v))
364 | 
365 |             if 'output_items' in evalData.keys():
366 |                 for k, v in evalData['output_items'].iteritems():
367 |                     outZip.writestr(k, v)
368 | 
369 |         outZip.close()
370 | 
371 |     if show_result:
372 |         sys.stdout.write("Calculated!")
373 |         sys.stdout.write(json.dumps(resDict['method']))
374 | 
375 |     return resDict
376 | 
377 | 
378 | def main_validation(args, default_evaluation_params_fn, validate_data_fn):
379 |     """
380 |     This process validates a method
381 |     Params:
382 |     default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
383 |     validate_data_fn: points to a method that validates the correct format of the submission
384 |     """
385 |     try:
386 |         evalParams = default_evaluation_params_fn()
387 |         if args.p:
388 |             evalParams.update(json.loads(args.p[1:-1]))
389 | 
390 |         validate_data_fn(args.g, args.s, evalParams)
391 |         print('SUCCESS')
392 |         sys.exit(0)
393 |     except Exception as e:
394 |         print(e)
395 |         sys.exit(101)


--------------------------------------------------------------------------------
/script.py:
--------------------------------------------------------------------------------
 1 | #-*- coding:utf-8 -*-
 2 | '''
 3 | default parameters, you can modify them by
 4 | -p '{\"GT_SAMPLE_NAME_2_ID\":\"([0-9]+).txt\",\"DET_SAMPLE_NAME_2_ID\":\"([0-9]+).txt\",\"CONFIDENCES\":true}'
 5 | 
 6 | 'IOU_CONSTRAINT': 0.5,
 7 | 'AREA_PRECISION_CONSTRAINT': 0.5,
 8 | 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt',
 9 | 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt',
10 | 'LTRB': False,  # LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
11 | 'CRLF': False,  # Lines are delimited by Windows CRLF format
12 | 'CONFIDENCES': False,  # Detections must include confidence value. AP will be calculated
13 | 'PER_SAMPLE_RESULTS': True  # Generate per sample results and produce data for visualization
14 | 
15 | '''
16 | import argparse
17 | import rrc_evaluation_funcs
18 | 
19 | def argparser():
20 |     parse = argparse.ArgumentParser()
21 |     parse.add_argument('-g', dest='g', default='./gt.zip', help="Path of the Ground Truth file. In most cases, the Ground Truth will be included in the same Zip file named 'gt.zip', gt.txt' or 'gt.json'. If not, you will be able to get it on the Downloads page of the Task.")
22 |     parse.add_argument('-s', dest='s', default='./submit.zip', help="Path of your method's results file.")
23 |     parse.add_argument('-o', dest='o', help="Path to a directory where to copy the file 'results.zip' that containts per-sample results.")
24 |     parse.add_argument('-p', dest='p', help="JSON string parameters to override the script default parameters. The parameters that can be overrided are inside the function 'default_evaluation_params' located at the begining of the evaluation Script. use: -p  '{\"CRLF\":true}'")
25 |     parse.add_argument('-c', dest='choice', default='IoU', help="choose algorithm for differet tasks.(Challenges 1、2 use 'DetEva' Challenges 4 use 'IoU', default 'IoU')")
26 |     args = parse.parse_args()
27 |     return args
28 | 
29 | if __name__=='__main__':
30 |     args = argparser()
31 |     if args.choice=='DetEva':
32 |         from Algorithm_DetEva import default_evaluation_params,validate_data,evaluate_method
33 |     elif args.choice=='IoU':
34 |         from Algorithm_IoU import default_evaluation_params,validate_data,evaluate_method
35 | 
36 | 
37 |     rrc_evaluation_funcs.main_evaluation(args,default_evaluation_params,validate_data,evaluate_method)
38 | 


--------------------------------------------------------------------------------
/submit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelHL-ai/OCR_EVALUATION/5dfb9f4e507a44994bec0c67e54d81970683db0b/submit.zip


--------------------------------------------------------------------------------