├── .gitignore ├── Algorithm_DetEva.py ├── Algorithm_IoU.py ├── README.md ├── gt.zip ├── requirements.txt ├── rrc_evaluation_funcs.py ├── script.py └── submit.zip /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | *.pyc 4 | 5 | -------------------------------------------------------------------------------- /Algorithm_DetEva.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Algorithm named DetEval 5 | It is slightly different from original algorithm(see https://perso.liris.cnrs.fr/christian.wolf/software/deteval/index.html) 6 | Please read《 Object Count / Area Graphs for the Evaluation of Object Detection and Segmentation Algorithms 》for details 7 | ''' 8 | from collections import namedtuple 9 | import rrc_evaluation_funcs 10 | import importlib 11 | 12 | def evaluation_imports(): 13 | """ 14 | evaluation_imports: Dictionary ( key = module name , value = alias ) with python modules used in the evaluation. 15 | """ 16 | return { 17 | 'math': 'math', 18 | 'numpy': 'np' 19 | } 20 | 21 | 22 | def default_evaluation_params(): 23 | """ 24 | default_evaluation_params: Default parameters to use for the validation and evaluation. 25 | """ 26 | return { 27 | 'AREA_RECALL_CONSTRAINT': 0.8, 28 | 'AREA_PRECISION_CONSTRAINT': 0.4, 29 | 'EV_PARAM_IND_CENTER_DIFF_THR': 1, 30 | 'MTYPE_OO_O': 1., 31 | 'MTYPE_OM_O': 0.8, 32 | 'MTYPE_OM_M': 1., 33 | 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt', 34 | 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt', 35 | 'CRLF': False # Lines are delimited by Windows CRLF format 36 | } 37 | 38 | 39 | def validate_data(gtFilePath, submFilePath, evaluationParams): 40 | """ 41 | Method validate_data: validates that all files in the results folder are correct (have the correct name contents). 42 | Validates also that there are no missing files in the folder. 43 | If some error detected, the method raises the error 44 | """ 45 | gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID']) 46 | 47 | subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True) 48 | 49 | # Validate format of GroundTruth 50 | for k in gt: 51 | rrc_evaluation_funcs.validate_lines_in_file(k, gt[k], evaluationParams['CRLF'], True, True) 52 | 53 | # Validate format of results 54 | for k in subm: 55 | if (k in gt) == False: 56 | raise Exception("The sample %s not present in GT" % k) 57 | 58 | rrc_evaluation_funcs.validate_lines_in_file(k, subm[k], evaluationParams['CRLF'], True, True) 59 | 60 | 61 | def evaluate_method(gtFilePath, submFilePath, evaluationParams): 62 | """ 63 | Method evaluate_method: evaluate method and returns the results 64 | Results. Dictionary with the following values: 65 | - method (required) Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 } 66 | - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 } 67 | """ 68 | 69 | for module, alias in evaluation_imports().items(): 70 | globals()[alias] = importlib.import_module(module) 71 | 72 | def one_to_one_match(row, col): 73 | cont = 0 74 | for j in range(len(recallMat[0])): 75 | if recallMat[row, j] >= evaluationParams['AREA_RECALL_CONSTRAINT'] and precisionMat[row, j] >= \ 76 | evaluationParams['AREA_PRECISION_CONSTRAINT']: 77 | cont = cont + 1 78 | if (cont != 1): 79 | return False 80 | cont = 0 81 | for i in range(len(recallMat)): 82 | if recallMat[i, col] >= evaluationParams['AREA_RECALL_CONSTRAINT'] and precisionMat[i, col] >= \ 83 | evaluationParams['AREA_PRECISION_CONSTRAINT']: 84 | cont = cont + 1 85 | if (cont != 1): 86 | return False 87 | 88 | if recallMat[row, col] >= evaluationParams['AREA_RECALL_CONSTRAINT'] and precisionMat[row, col] >= \ 89 | evaluationParams['AREA_PRECISION_CONSTRAINT']: 90 | return True 91 | return False 92 | 93 | def one_to_many_match(gtNum): 94 | many_sum = 0 95 | detRects = [] 96 | for detNum in range(len(recallMat[0])): 97 | if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0 and detNum not in detDontCareRectsNum: 98 | if precisionMat[gtNum, detNum] >= evaluationParams['AREA_PRECISION_CONSTRAINT']: 99 | many_sum += recallMat[gtNum, detNum] 100 | detRects.append(detNum) 101 | if many_sum >= evaluationParams['AREA_RECALL_CONSTRAINT']: 102 | return True, detRects 103 | else: 104 | return False, [] 105 | 106 | def many_to_one_match(detNum): 107 | many_sum = 0 108 | gtRects = [] 109 | for gtNum in range(len(recallMat)): 110 | if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0 and gtNum not in gtDontCareRectsNum: 111 | if recallMat[gtNum, detNum] >= evaluationParams['AREA_RECALL_CONSTRAINT']: 112 | many_sum += precisionMat[gtNum, detNum] 113 | gtRects.append(gtNum) 114 | if many_sum >= evaluationParams['AREA_PRECISION_CONSTRAINT']: 115 | return True, gtRects 116 | else: 117 | return False, [] 118 | 119 | def area(a, b): 120 | dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin) + 1 121 | dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin) + 1 122 | if (dx >= 0) and (dy >= 0): 123 | return dx * dy 124 | else: 125 | return 0. 126 | 127 | def center(r): 128 | x = float(r.xmin) + float(r.xmax - r.xmin + 1) / 2. 129 | y = float(r.ymin) + float(r.ymax - r.ymin + 1) / 2. 130 | return Point(x, y) 131 | 132 | def point_distance(r1, r2): 133 | distx = math.fabs(r1.x - r2.x) 134 | disty = math.fabs(r1.y - r2.y) 135 | return math.sqrt(distx * distx + disty * disty) 136 | 137 | def center_distance(r1, r2): 138 | return point_distance(center(r1), center(r2)) 139 | 140 | def diag(r): 141 | w = (r.xmax - r.xmin + 1) 142 | h = (r.ymax - r.ymin + 1) 143 | return math.sqrt(h * h + w * w) 144 | 145 | perSampleMetrics = {} 146 | 147 | methodRecallSum = 0 148 | methodPrecisionSum = 0 149 | 150 | Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax') 151 | Point = namedtuple('Point', 'x y') 152 | 153 | gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID']) 154 | subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True) 155 | 156 | numGt = 0 157 | numDet = 0 158 | 159 | for resFile in gt: 160 | 161 | gtFile = rrc_evaluation_funcs.decode_utf8(gt[resFile]) 162 | recall = 0 163 | precision = 0 164 | hmean = 0 165 | recallAccum = 0. 166 | precisionAccum = 0. 167 | gtRects = [] 168 | detRects = [] 169 | gtPolPoints = [] 170 | detPolPoints = [] 171 | gtDontCareRectsNum = [] # Array of Ground Truth Rectangles' keys marked as don't Care 172 | detDontCareRectsNum = [] # Array of Detected Rectangles' matched with a don't Care GT 173 | pairs = [] 174 | evaluationLog = "" 175 | 176 | recallMat = np.empty([1, 1]) 177 | precisionMat = np.empty([1, 1]) 178 | 179 | pointsList, _, transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile, 180 | evaluationParams[ 181 | 'CRLF'], 182 | True, True, 183 | False) 184 | for n in range(len(pointsList)): 185 | points = pointsList[n] 186 | transcription = transcriptionsList[n] 187 | dontCare = transcription == "###" 188 | 189 | # convert x1,y1,x2,y2,x3,y3,x4,y4 to xmin,ymin,xmax,ymax 190 | if len(points) == 8: 191 | points_tmp = np.array(points).reshape(4, 2) 192 | points_x = points_tmp[:, 0] 193 | points_y = points_tmp[:, 1] 194 | xmin = points_x[np.argmin(points_x)] 195 | xmax = points_x[np.argmax(points_x)] 196 | ymin = points_y[np.argmin(points_y)] 197 | ymax = points_y[np.argmax(points_y)] 198 | points = [xmin, ymin, xmax, ymax] 199 | gtRect = Rectangle(*points) 200 | gtRects.append(gtRect) 201 | gtPolPoints.append(points) 202 | if dontCare: 203 | gtDontCareRectsNum.append(len(gtRects) - 1) 204 | 205 | evaluationLog += "GT rectangles: " + str(len(gtRects)) + ( 206 | " (" + str(len(gtDontCareRectsNum)) + " don't care)\n" if len(gtDontCareRectsNum) > 0 else "\n") 207 | 208 | if resFile in subm: 209 | detFile = rrc_evaluation_funcs.decode_utf8(subm[resFile]) 210 | pointsList, _, _ = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(detFile, 211 | evaluationParams['CRLF'], 212 | True, True, False) 213 | for n in range(len(pointsList)): 214 | points = pointsList[n] 215 | # print points 216 | detRect = Rectangle(*points) 217 | detRects.append(detRect) 218 | detPolPoints.append(points) 219 | if len(gtDontCareRectsNum) > 0: 220 | for dontCareRectNum in gtDontCareRectsNum: 221 | dontCareRect = gtRects[dontCareRectNum] 222 | intersected_area = area(dontCareRect, detRect) 223 | rdDimensions = ((detRect.xmax - detRect.xmin + 1) * (detRect.ymax - detRect.ymin + 1)) 224 | if (rdDimensions == 0): 225 | precision = 0 226 | else: 227 | precision = intersected_area / rdDimensions 228 | if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT']): 229 | detDontCareRectsNum.append(len(detRects) - 1) 230 | break 231 | 232 | evaluationLog += "DET rectangles: " + str(len(detRects)) + ( 233 | " (" + str(len(detDontCareRectsNum)) + " don't care)\n" if len(detDontCareRectsNum) > 0 else "\n") 234 | 235 | if len(gtRects) == 0: 236 | recall = 1 237 | precision = 0 if len(detRects) > 0 else 1 238 | 239 | if len(detRects) > 0: 240 | # Calculate recall and precision matrixs 241 | outputShape = [len(gtRects), len(detRects)] 242 | recallMat = np.empty(outputShape) 243 | precisionMat = np.empty(outputShape) 244 | gtRectMat = np.zeros(len(gtRects), np.int8) 245 | detRectMat = np.zeros(len(detRects), np.int8) 246 | for gtNum in range(len(gtRects)): 247 | for detNum in range(len(detRects)): 248 | rG = gtRects[gtNum] 249 | rD = detRects[detNum] 250 | intersected_area = area(rG, rD) 251 | rgDimensions = ((rG.xmax - rG.xmin + 1) * (rG.ymax - rG.ymin + 1)) 252 | rdDimensions = ((rD.xmax - rD.xmin + 1) * (rD.ymax - rD.ymin + 1)) 253 | recallMat[gtNum, detNum] = 0 if rgDimensions == 0 else intersected_area / rgDimensions 254 | precisionMat[gtNum, detNum] = 0 if rdDimensions == 0 else intersected_area / rdDimensions 255 | 256 | # Find one-to-one matches 257 | evaluationLog += "Find one-to-one matches\n" 258 | for gtNum in range(len(gtRects)): 259 | for detNum in range(len(detRects)): 260 | if gtRectMat[gtNum] == 0 and detRectMat[ 261 | detNum] == 0 and gtNum not in gtDontCareRectsNum and detNum not in detDontCareRectsNum: 262 | match = one_to_one_match(gtNum, detNum) 263 | if match is True: 264 | rG = gtRects[gtNum] 265 | rD = detRects[detNum] 266 | normDist = center_distance(rG, rD) 267 | normDist /= diag(rG) + diag(rD) 268 | normDist *= 2.0 269 | if normDist < evaluationParams['EV_PARAM_IND_CENTER_DIFF_THR']: 270 | gtRectMat[gtNum] = 1 271 | detRectMat[detNum] = 1 272 | recallAccum += evaluationParams['MTYPE_OO_O'] 273 | precisionAccum += evaluationParams['MTYPE_OO_O'] 274 | pairs.append({'gt': gtNum, 'det': detNum, 'type': 'OO'}) 275 | evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(detNum) + "\n" 276 | else: 277 | evaluationLog += "Match Discarded GT #" + str(gtNum) + " with Det #" + str( 278 | detNum) + " normDist: " + str(normDist) + " \n" 279 | # Find one-to-many matches 280 | evaluationLog += "Find one-to-many matches\n" 281 | for gtNum in range(len(gtRects)): 282 | if gtNum not in gtDontCareRectsNum: 283 | match, matchesDet = one_to_many_match(gtNum) 284 | if match is True: 285 | gtRectMat[gtNum] = 1 286 | recallAccum += evaluationParams['MTYPE_OM_O'] 287 | precisionAccum += evaluationParams['MTYPE_OM_O'] * len(matchesDet) 288 | pairs.append({'gt': gtNum, 'det': matchesDet, 'type': 'OM'}) 289 | for detNum in matchesDet: 290 | detRectMat[detNum] = 1 291 | evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(matchesDet) + "\n" 292 | 293 | # Find many-to-one matches 294 | evaluationLog += "Find many-to-one matches\n" 295 | for detNum in range(len(detRects)): 296 | if detNum not in detDontCareRectsNum: 297 | match, matchesGt = many_to_one_match(detNum) 298 | if match is True: 299 | detRectMat[detNum] = 1 300 | recallAccum += evaluationParams['MTYPE_OM_M'] * len(matchesGt) 301 | precisionAccum += evaluationParams['MTYPE_OM_M'] 302 | pairs.append({'gt': matchesGt, 'det': detNum, 'type': 'MO'}) 303 | for gtNum in matchesGt: 304 | gtRectMat[gtNum] = 1 305 | evaluationLog += "Match GT #" + str(matchesGt) + " with Det #" + str(detNum) + "\n" 306 | 307 | numGtCare = (len(gtRects) - len(gtDontCareRectsNum)) 308 | if numGtCare == 0: 309 | recall = float(1) 310 | precision = float(0) if len(detRects) > 0 else float(1) 311 | else: 312 | recall = float(recallAccum) / numGtCare 313 | precision = float(0) if (len(detRects) - len(detDontCareRectsNum)) == 0 else float( 314 | precisionAccum) / (len(detRects) - len(detDontCareRectsNum)) 315 | hmean = 0 if (precision + recall) == 0 else 2.0 * precision * recall / (precision + recall) 316 | 317 | evaluationLog += "Recall = " + str(recall) + "\n" 318 | evaluationLog += "Precision = " + str(precision) + "\n" 319 | 320 | methodRecallSum += recallAccum 321 | methodPrecisionSum += precisionAccum 322 | numGt += len(gtRects) - len(gtDontCareRectsNum) 323 | numDet += len(detRects) - len(detDontCareRectsNum) 324 | 325 | perSampleMetrics[resFile] = { 326 | 'precision': precision, 327 | 'recall': recall, 328 | 'hmean': hmean, 329 | 'pairs': pairs, 330 | 'recallMat': [] if len(detRects) > 100 else recallMat.tolist(), 331 | 'precisionMat': [] if len(detRects) > 100 else precisionMat.tolist(), 332 | 'gtPolPoints': gtPolPoints, 333 | 'detPolPoints': detPolPoints, 334 | 'gtDontCare': gtDontCareRectsNum, 335 | 'detDontCare': detDontCareRectsNum, 336 | 'evaluationParams': evaluationParams, 337 | 'evaluationLog': evaluationLog 338 | } 339 | 340 | methodRecall = 0 if numGt == 0 else methodRecallSum / numGt 341 | methodPrecision = 0 if numDet == 0 else methodPrecisionSum / numDet 342 | methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / ( 343 | methodRecall + methodPrecision) 344 | 345 | methodMetrics = {'precision': methodPrecision, 'recall': methodRecall, 'hmean': methodHmean} 346 | 347 | resDict = {'calculated': True, 'Message': '', 'method': methodMetrics, 'per_sample': perSampleMetrics} 348 | 349 | return resDict 350 | -------------------------------------------------------------------------------- /Algorithm_IoU.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from collections import namedtuple 4 | import rrc_evaluation_funcs 5 | from rrc_evaluation_funcs import logger 6 | import importlib 7 | import re 8 | from shapely.geometry import Polygon 9 | import numpy as np 10 | 11 | 12 | def default_evaluation_params(): 13 | """ 14 | default_evaluation_params: Default parameters to use for the validation and evaluation. 15 | """ 16 | return { 17 | 'IOU_CONSTRAINT': 0.5, 18 | 'AREA_PRECISION_CONSTRAINT': 0.5, 19 | 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt', 20 | 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt', 21 | 'LTRB': False, # LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4) 22 | 'CRLF': False, # Lines are delimited by Windows CRLF format 23 | 'CONFIDENCES': False, # Detections must include confidence value. AP will be calculated 24 | 'PER_SAMPLE_RESULTS': True, # Generate per sample results and produce data for visualization 25 | 'E2E': False #compute average edit distance for end to end evaluation 26 | } 27 | 28 | 29 | def validate_data(gtFilePath, submFilePath, evaluationParams): 30 | """ 31 | Method validate_data: validates that all files in the results folder are correct (have the correct name contents). 32 | Validates also that there are no missing files in the folder. 33 | If some error detected, the method raises the error 34 | """ 35 | gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID']) 36 | 37 | subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True) 38 | 39 | # Validate format of GroundTruth 40 | for k in gt: 41 | rrc_evaluation_funcs.validate_lines_in_file(k, gt[k], evaluationParams['CRLF'], evaluationParams['LTRB'], True) 42 | 43 | # Validate format of results 44 | for k in subm: 45 | if (k in gt) == False: 46 | raise Exception("The sample %s not present in GT" % k) 47 | 48 | rrc_evaluation_funcs.validate_lines_in_file(k, subm[k], evaluationParams['CRLF'], evaluationParams['LTRB'], 49 | evaluationParams['E2E'], evaluationParams['CONFIDENCES']) 50 | 51 | 52 | def evaluate_method(gtFilePath, submFilePath, evaluationParams): 53 | """ 54 | Method evaluate_method: evaluate method and returns the results 55 | Results. Dictionary with the following values: 56 | - method (required) Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 } 57 | - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 } 58 | """ 59 | if evaluationParams['E2E']: 60 | from hanziconv import HanziConv 61 | import editdistance 62 | 63 | def polygon_from_points(points): 64 | """ 65 | Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4 66 | """ 67 | resBoxes = np.empty([1, 8], dtype='int32') 68 | resBoxes[0, 0] = int(points[0]) 69 | resBoxes[0, 4] = int(points[1]) 70 | resBoxes[0, 1] = int(points[2]) 71 | resBoxes[0, 5] = int(points[3]) 72 | resBoxes[0, 2] = int(points[4]) 73 | resBoxes[0, 6] = int(points[5]) 74 | resBoxes[0, 3] = int(points[6]) 75 | resBoxes[0, 7] = int(points[7]) 76 | pointMat = resBoxes[0].reshape([2, 4]).T 77 | return Polygon(pointMat) 78 | 79 | def rectangle_to_polygon(rect): 80 | resBoxes = np.empty([1, 8], dtype='int32') 81 | resBoxes[0, 0] = int(rect.xmin) 82 | resBoxes[0, 4] = int(rect.ymax) 83 | resBoxes[0, 1] = int(rect.xmin) 84 | resBoxes[0, 5] = int(rect.ymin) 85 | resBoxes[0, 2] = int(rect.xmax) 86 | resBoxes[0, 6] = int(rect.ymin) 87 | resBoxes[0, 3] = int(rect.xmax) 88 | resBoxes[0, 7] = int(rect.ymax) 89 | 90 | pointMat = resBoxes[0].reshape([2, 4]).T 91 | 92 | return Polygon(pointMat) 93 | 94 | def rectangle_to_points(rect): 95 | points = [int(rect.xmin), int(rect.ymax), int(rect.xmax), int(rect.ymax), int(rect.xmax), int(rect.ymin), 96 | int(rect.xmin), int(rect.ymin)] 97 | return points 98 | 99 | def get_union(pD, pG): 100 | areaA = pD.area 101 | areaB = pG.area 102 | return areaA + areaB - get_intersection(pD, pG) 103 | 104 | def get_intersection_over_union(pD, pG): 105 | try: 106 | return get_intersection(pD, pG) / get_union(pD, pG) 107 | except: 108 | return 0 109 | 110 | def get_intersection(pD, pG): 111 | pInt = pD & pG 112 | if pInt.is_empty: 113 | return 0 114 | return pInt.area 115 | 116 | def compute_ap(confList, matchList, numGtCare): 117 | correct = 0 118 | AP = 0 119 | if len(confList) > 0: 120 | confList = np.array(confList) 121 | matchList = np.array(matchList) 122 | sorted_ind = np.argsort(-confList) 123 | confList = confList[sorted_ind] 124 | matchList = matchList[sorted_ind] 125 | for n in range(len(confList)): 126 | match = matchList[n] 127 | if match: 128 | correct += 1 129 | AP += float(correct) / (n + 1) 130 | 131 | if numGtCare > 0: 132 | AP /= numGtCare 133 | 134 | return AP 135 | 136 | #from RTWC17 137 | def normalize_txt(st): 138 | """ 139 | Normalize Chinese text strings by: 140 | - remove puncutations and other symbols 141 | - convert traditional Chinese to simplified 142 | - convert English characters to lower cases 143 | """ 144 | st = ''.join(st.split(' ')) 145 | st = re.sub("\"", "", st) 146 | # remove any this not one of Chinese character, ascii 0-9, and ascii a-z and A-Z 147 | new_st = re.sub(r'[^\u4e00-\u9fa5\u0041-\u005a\u0061-\u007a0-9]+', '', st) 148 | # convert Traditional Chinese to Simplified Chinese 149 | new_st = HanziConv.toSimplified(new_st) 150 | # convert uppercase English letters to lowercase 151 | new_st = new_st.lower() 152 | return new_st 153 | 154 | def text_distance(str1, str2): 155 | str1 = normalize_txt(str1) 156 | str2 = normalize_txt(str2) 157 | return editdistance.eval(str1, str2) 158 | 159 | perSampleMetrics = {} 160 | 161 | matchedSum = 0 162 | 163 | Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax') 164 | 165 | gt = rrc_evaluation_funcs.load_zip_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID']) 166 | subm = rrc_evaluation_funcs.load_zip_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True) 167 | 168 | numGlobalCareGt = 0 169 | numGlobalCareDet = 0 170 | 171 | arrGlobalConfidences = [] 172 | arrGlobalMatches = [] 173 | 174 | #total edit distance 175 | total_dist = 0 176 | 177 | for resFile in gt: 178 | gtFile = rrc_evaluation_funcs.decode_utf8(gt[resFile]) 179 | 180 | detMatched = 0 181 | 182 | iouMat = np.empty([1, 1]) 183 | 184 | gtPols = [] 185 | detPols = [] 186 | 187 | gtTrans = [] 188 | detTrans = [] 189 | 190 | gtPolPoints = [] 191 | detPolPoints = [] 192 | 193 | # Array of Ground Truth Polygons' keys marked as don't Care 194 | gtDontCarePolsNum = [] 195 | # Array of Detected Polygons' matched with a don't Care GT 196 | detDontCarePolsNum = [] 197 | 198 | pairs = [] 199 | detMatchedNums = [] 200 | 201 | arrSampleConfidences = [] 202 | arrSampleMatch = [] 203 | sampleAP = 0 204 | 205 | example_dist = 0 206 | match_tuples = [] 207 | 208 | evaluationLog = "" 209 | 210 | pointsList, _, transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile,evaluationParams['CRLF'],evaluationParams['LTRB'],True, False) 211 | for n in range(len(pointsList)): 212 | points = pointsList[n] 213 | transcription = transcriptionsList[n] 214 | dontCare = (transcription == "###") or (transcription=="?") 215 | if evaluationParams['LTRB']: 216 | gtRect = Rectangle(*points) 217 | gtPol = rectangle_to_polygon(gtRect) 218 | else: 219 | gtPol = polygon_from_points(points) 220 | gtPols.append(gtPol) 221 | gtPolPoints.append(points) 222 | gtTrans.append(transcription) 223 | if dontCare: 224 | gtDontCarePolsNum.append(len(gtPols) - 1) 225 | 226 | evaluationLog += "GT polygons: " + str(len(gtPols)) + ( 227 | " (" + str(len(gtDontCarePolsNum)) + " don't care)\n" if len(gtDontCarePolsNum) > 0 else "\n") 228 | 229 | if resFile in subm: 230 | 231 | detFile = rrc_evaluation_funcs.decode_utf8(subm[resFile]) 232 | 233 | pointsList, confidencesList, transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(detFile,evaluationParams['CRLF'],evaluationParams['LTRB'],evaluationParams['E2E'],evaluationParams['CONFIDENCES']) 234 | for n in range(len(pointsList)): 235 | points = pointsList[n] 236 | 237 | if evaluationParams['LTRB']: 238 | detRect = Rectangle(*points) 239 | detPol = rectangle_to_polygon(detRect) 240 | else: 241 | detPol = polygon_from_points(points) 242 | 243 | detPols.append(detPol) 244 | detPolPoints.append(points) 245 | if evaluationParams['E2E']: 246 | transcription = transcriptionsList[n] 247 | detTrans.append(transcription) 248 | if len(gtDontCarePolsNum) > 0: 249 | for dontCarePol in gtDontCarePolsNum: 250 | dontCarePol = gtPols[dontCarePol] 251 | intersected_area = get_intersection(dontCarePol, detPol) 252 | pdDimensions = detPol.area 253 | precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions 254 | if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT']): 255 | detDontCarePolsNum.append(len(detPols) - 1) 256 | break 257 | 258 | evaluationLog += "DET polygons: " + str(len(detPols)) + ( 259 | " (" + str(len(detDontCarePolsNum)) + " don't care)\n" if len(detDontCarePolsNum) > 0 else "\n") 260 | 261 | if len(gtPols) > 0 and len(detPols) > 0: 262 | # Calculate IoU and precision matrixs 263 | outputShape = [len(gtPols), len(detPols)] 264 | iouMat = np.empty(outputShape) 265 | gtRectMat = np.zeros(len(gtPols), np.int8) 266 | detRectMat = np.zeros(len(detPols), np.int8) 267 | for gtNum in range(len(gtPols)): 268 | for detNum in range(len(detPols)): 269 | pG = gtPols[gtNum] 270 | pD = detPols[detNum] 271 | iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG) 272 | 273 | # match dt index of every gt 274 | gtMatch = np.empty(len(gtPols), np.int8) 275 | gtMatch.fill(-1) 276 | # match gt index of every dt 277 | dtMatch = np.empty(len(detPols), dtype=np.int8) 278 | dtMatch.fill(-1) 279 | 280 | for gtNum in range(len(gtPols)): 281 | max_iou = 0 282 | match_dt_idx = -1 283 | 284 | for detNum in range(len(detPols)): 285 | if gtRectMat[gtNum] == 0 and detRectMat[detNum] == 0\ 286 | and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum: 287 | if iouMat[gtNum, detNum] > evaluationParams['IOU_CONSTRAINT']: 288 | gtRectMat[gtNum] = 1 289 | detRectMat[detNum] = 1 290 | detMatched += 1 291 | pairs.append({'gt': gtNum, 'det': detNum}) 292 | detMatchedNums.append(detNum) 293 | evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(detNum) + "\n" 294 | 295 | if evaluationParams['E2E'] and gtMatch[gtNum] == -1 and dtMatch[detNum] == -1\ 296 | and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum: 297 | if iouMat[gtNum, detNum] > evaluationParams['IOU_CONSTRAINT'] and iouMat[gtNum, detNum] > max_iou: 298 | max_iou = iouMat[gtNum, detNum] 299 | match_dt_idx = detNum 300 | 301 | if evaluationParams['E2E'] and match_dt_idx >= 0: 302 | gtMatch[gtNum] = match_dt_idx 303 | dtMatch[match_dt_idx] = gtNum 304 | 305 | if evaluationParams['E2E']: 306 | for gtNum in range(len(gtPols)): 307 | if gtNum in gtDontCarePolsNum: 308 | continue 309 | gt_text = gtTrans[gtNum] 310 | if gtMatch[gtNum] >= 0: 311 | dt_text = detTrans[gtMatch[gtNum]] 312 | else: 313 | dt_text = u'' 314 | dist = text_distance(gt_text, dt_text) 315 | example_dist += dist 316 | match_tuples.append((gt_text, dt_text, dist)) 317 | match_tuples.append(("===============","==============", -1)) 318 | for detNum in range(len(detPols)): 319 | if detNum in detDontCarePolsNum: 320 | continue 321 | if dtMatch[detNum] == -1: 322 | gt_text = u'' 323 | dt_text = detTrans[detNum] 324 | dist = text_distance(gt_text, dt_text) 325 | example_dist += dist 326 | match_tuples.append((gt_text, dt_text, dist)) 327 | 328 | if evaluationParams['CONFIDENCES']: 329 | for detNum in range(len(detPols)): 330 | if detNum not in detDontCarePolsNum: 331 | # we exclude the don't care detections 332 | match = detNum in detMatchedNums 333 | 334 | arrSampleConfidences.append(confidencesList[detNum]) 335 | arrSampleMatch.append(match) 336 | 337 | arrGlobalConfidences.append(confidencesList[detNum]) 338 | arrGlobalMatches.append(match) 339 | #avoid when det file don't exist, example_dist=0 340 | elif evaluationParams['E2E']: 341 | match_tuples.append(("===============", "==============", -1)) 342 | dt_text = u'' 343 | for gtNum in range(len(gtPols)): 344 | if gtNum in gtDontCarePolsNum: 345 | continue 346 | gt_text = gtTrans[gtNum] 347 | dist = text_distance(gt_text, dt_text) 348 | example_dist += dist 349 | match_tuples.append((gt_text, dt_text, dist)) 350 | total_dist += example_dist 351 | 352 | if evaluationParams['E2E']: 353 | logger.debug('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') 354 | logger.debug("file:{}".format(resFile)) 355 | for tp in match_tuples: 356 | gt_text, dt_text, dist = tp 357 | logger.debug(u'GT: "{}" matched to DT: "{}", distance = {}'.format(gt_text, dt_text, dist)) 358 | logger.debug('Distance = {:f}'.format(example_dist)) 359 | logger.debug('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<') 360 | 361 | numGtCare = (len(gtPols) - len(gtDontCarePolsNum)) 362 | numDetCare = (len(detPols) - len(detDontCarePolsNum)) 363 | if numGtCare == 0: 364 | recall = float(1) 365 | precision = float(0) if numDetCare > 0 else float(1) 366 | sampleAP = precision 367 | else: 368 | recall = float(detMatched) / numGtCare 369 | precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare 370 | if evaluationParams['CONFIDENCES'] and evaluationParams['PER_SAMPLE_RESULTS']: 371 | sampleAP = compute_ap(arrSampleConfidences, arrSampleMatch, numGtCare) 372 | 373 | hmean = 0 if (precision + recall) == 0 else 2.0 * precision * recall / (precision + recall) 374 | matchedSum += detMatched 375 | numGlobalCareGt += numGtCare 376 | numGlobalCareDet += numDetCare 377 | if evaluationParams['PER_SAMPLE_RESULTS']: 378 | perSampleMetrics[resFile] = { 379 | 'precision': precision, 380 | 'recall': recall, 381 | 'hmean': hmean, 382 | 'pairs': pairs, 383 | 'AP': sampleAP, 384 | 'iouMat': [] if len(detPols) > 100 else iouMat.tolist(), 385 | 'gtPolPoints': gtPolPoints, 386 | 'detPolPoints': detPolPoints, 387 | 'gtDontCare': gtDontCarePolsNum, 388 | 'detDontCare': detDontCarePolsNum, 389 | 'evaluationParams': evaluationParams, 390 | 'evaluationLog': evaluationLog 391 | } 392 | if evaluationParams['E2E']: 393 | perSampleMetrics[resFile]['exampleDistance'] = example_dist 394 | # print("file:{} exampleDistance:{}".format(resFile,example_dist)) 395 | 396 | # Compute MAP and MAR 397 | AP = 0 398 | if evaluationParams['CONFIDENCES']: 399 | AP = compute_ap(arrGlobalConfidences, arrGlobalMatches, numGlobalCareGt) 400 | 401 | methodRecall = 0 if numGlobalCareGt == 0 else float(matchedSum) / numGlobalCareGt 402 | methodPrecision = 0 if numGlobalCareDet == 0 else float(matchedSum) / numGlobalCareDet 403 | methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / ( 404 | methodRecall + methodPrecision) 405 | methodDistance = 0 if len(gt) == 0 else float(total_dist)/len(gt) 406 | 407 | methodMetrics = {'precision': methodPrecision, 'recall': methodRecall, 'hmean': methodHmean, 'AP': AP, 'distance': methodDistance} 408 | 409 | resDict = {'calculated': True, 'Message': '', 'method': methodMetrics, 'per_sample': perSampleMetrics} 410 | 411 | return resDict 412 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MODIFIED BY [ICDAR CODES](http://rrc.cvc.uab.es/?com=introduction) 2 | INSTRUCTIONS FOR THE STANDALONE SCRIPTS 3 | 4 | ### HOW TO USE 5 | Requirements: 6 | - Python version 3.6+ 7 | - pip install -r requirements.txt 8 | - Each Task requires different Python modules. When running the script, if some module is not installed you will see a notification and installation instructions. 9 | 10 | Procedure: 11 | Download the ZIP file for the requested script and unzip it to a directory. 12 | 13 | Open a terminal in the directory and run the command: 14 | 15 | **python script.py –g gt.zip –s submit.zip -p '{\"LTRB\":true,\"E2E\":true}'** 16 | 17 | Pycharm IDE use parameters 18 | 19 | **-g gt.zip -s submit.zip -p {\\"LTRB\\":true,\\"E2E\\":true}** 20 | 21 | If you have already installed all the required modules, then you will see the method’s results or an error message if the submitted file is not correct. 22 | 23 | parameters: 24 | 25 | -g: Path of the Ground Truth file. In most cases, the Ground Truth will be included in the same Zip file named 'gt.zip', gt.txt' or 'gt.json'. If not, you will be able to get it on the Downloads page of the Task. 26 | 27 | -s: Path of your method's results file. 28 | 29 | Optional parameters: 30 | 31 | -o: Path to a directory where to copy the file ‘results.zip’ that contains per-sample results. 32 | 33 | -p: JSON string parameters to override the script default parameters. The parameters that can be overrided are inside the function 'default_evaluation_params' located at the begining of the evaluation Script. 34 | 35 | 36 | ``` 37 | when use Algorithm_IOU -p 38 | 'IOU_CONSTRAINT': 0.5, 39 | 'AREA_PRECISION_CONSTRAINT': 0.5, 40 | 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt', 41 | 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt', 42 | 'LTRB': False, # LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4) 43 | 'CRLF': False, # Lines are delimited by Windows CRLF format 44 | 'CONFIDENCES': False, # Detections must include confidence value. AP will be calculated 45 | 'PER_SAMPLE_RESULTS': True, # Generate per sample results and produce data for visualization 46 | 'E2E': False #compute average edit distance for end to end evaluation 47 | ``` 48 | ``` 49 | when use Algorithm_DetEva -p 50 | 'AREA_RECALL_CONSTRAINT': 0.8, 51 | 'AREA_PRECISION_CONSTRAINT': 0.4, 52 | 'EV_PARAM_IND_CENTER_DIFF_THR': 1, 53 | 'MTYPE_OO_O': 1., 54 | 'MTYPE_OM_O': 0.8, 55 | 'MTYPE_OM_M': 1., 56 | 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt', 57 | 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt', 58 | 'CRLF': False # Lines are delimited by Windows CRLF format 59 | ``` 60 | 61 | -c: choose algorithm for differet tasks.(Challenges 1、2 use 'DetEva' Challenges 4 use 'IoU', default 'IoU') 62 | 63 | **Example: python script.py –g gt.zip –s submit.zip –o ./ -p '{\"CRLF\":true}' -c DetEva** 64 | 65 | 66 | ### THEORY 67 | see [my blog](https://blog.csdn.net/liuxiaoheng1992/article/details/82632594) 68 | 69 | ***If this repository helps you,please star it. Thanks.*** 70 | -------------------------------------------------------------------------------- /gt.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MichaelHL-ai/OCR_EVALUATION/5dfb9f4e507a44994bec0c67e54d81970683db0b/gt.zip -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | hanziconv 2 | editdistance 3 | numpy 4 | shapely -------------------------------------------------------------------------------- /rrc_evaluation_funcs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # encoding: UTF-8 3 | import json 4 | import sys 5 | 6 | sys.path.append('./') 7 | import zipfile 8 | import re 9 | import sys 10 | import os 11 | import codecs 12 | import logging 13 | 14 | logging.basicConfig() 15 | logger = logging.getLogger() 16 | logger.setLevel(logging.INFO) 17 | 18 | 19 | def load_zip_file_keys(file, fileNameRegExp=''): 20 | """ 21 | Returns an array with the entries of the ZIP file that match with the regular expression. 22 | The key's are the names or the file or the capturing group definied in the fileNameRegExp 23 | """ 24 | try: 25 | archive = zipfile.ZipFile(file, mode='r', allowZip64=True) 26 | except: 27 | raise Exception('Error loading the ZIP archive.') 28 | 29 | pairs = [] 30 | 31 | for name in archive.namelist(): 32 | addFile = True 33 | keyName = name 34 | if fileNameRegExp != "": 35 | m = re.match(fileNameRegExp, name) 36 | if m == None: 37 | addFile = False 38 | else: 39 | if len(m.groups()) > 0: 40 | keyName = m.group(1) 41 | 42 | if addFile: 43 | pairs.append(keyName) 44 | 45 | return pairs 46 | 47 | 48 | def load_zip_file(file, fileNameRegExp='', allEntries=False): 49 | """ 50 | Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file. 51 | The key's are the names or the file or the capturing group definied in the fileNameRegExp 52 | allEntries validates that all entries in the ZIP file pass the fileNameRegExp 53 | """ 54 | try: 55 | archive = zipfile.ZipFile(file, mode='r', allowZip64=True) 56 | except: 57 | raise Exception('Error loading the ZIP archive') 58 | 59 | pairs = [] 60 | for name in archive.namelist(): 61 | addFile = True 62 | keyName = name 63 | if fileNameRegExp != "": 64 | m = re.match(fileNameRegExp, name) 65 | if m == None: 66 | addFile = False 67 | else: 68 | if len(m.groups()) > 0: 69 | keyName = m.group(1) 70 | 71 | if addFile: 72 | pairs.append([keyName, archive.read(name)]) 73 | else: 74 | if allEntries: 75 | raise Exception('ZIP entry not valid: %s' % name) 76 | 77 | return dict(pairs) 78 | 79 | 80 | def decode_utf8(raw): 81 | """ 82 | Returns a Unicode object on success, or None on failure 83 | """ 84 | try: 85 | raw = codecs.decode(raw, 'utf-8', 'replace') 86 | # extracts BOM if exists 87 | raw = raw.encode('utf8') 88 | if raw.startswith(codecs.BOM_UTF8): 89 | raw = raw.replace(codecs.BOM_UTF8, '', 1) 90 | return raw.decode('utf-8') 91 | except: 92 | return None 93 | 94 | 95 | def validate_lines_in_file(fileName, file_contents, CRLF=True, LTRB=True, withTranscription=False, withConfidence=False, 96 | imWidth=0, imHeight=0): 97 | """ 98 | This function validates that all lines of the file calling the Line validation function for each line 99 | """ 100 | utf8File = decode_utf8(file_contents) 101 | if (utf8File is None): 102 | raise Exception("The file %s is not UTF-8" % fileName) 103 | 104 | lines = utf8File.split("\r\n" if CRLF else "\n") 105 | for line in lines: 106 | line = line.replace("\r", "").replace("\n", "") 107 | if (line != ""): 108 | try: 109 | validate_tl_line(line, LTRB, withTranscription, withConfidence, imWidth, imHeight) 110 | except Exception as e: 111 | raise Exception( 112 | ("Line in sample not valid. Sample: %s Line: %s Error: %s" % (fileName, line, str(e))).encode( 113 | 'utf-8', 'replace')) 114 | 115 | 116 | def validate_tl_line(line, LTRB=True, withTranscription=True, withConfidence=True, imWidth=0, imHeight=0): 117 | """ 118 | Validate the format of the line. If the line is not valid an exception will be raised. 119 | If maxWidth and maxHeight are specified, all points must be inside the imgage bounds. 120 | Posible values are: 121 | LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 122 | LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 123 | """ 124 | get_tl_line_values(line, LTRB, withTranscription, withConfidence, imWidth, imHeight) 125 | 126 | 127 | def get_tl_line_values(line, LTRB=True, withTranscription=False, withConfidence=False, imWidth=0, imHeight=0): 128 | """ 129 | Validate the format of the line. If the line is not valid an exception will be raised. 130 | If maxWidth and maxHeight are specified, all points must be inside the imgage bounds. 131 | Posible values are: 132 | LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 133 | LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 134 | Returns values from a textline. Points , [Confidences], [Transcriptions] 135 | """ 136 | confidence = 0.0 137 | transcription = "" 138 | points = [] 139 | 140 | numPoints = 4 141 | 142 | if LTRB: 143 | 144 | numPoints = 4 145 | 146 | if withTranscription and withConfidence: 147 | m = re.match( 148 | r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$', 149 | line) 150 | if m == None: 151 | raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence,transcription") 152 | elif withConfidence: 153 | m = re.match( 154 | r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*([0-1].?[0-9]*)\s*$', 155 | line) 156 | if m == None: 157 | raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence") 158 | elif withTranscription: 159 | m = re.match( 160 | r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,(.*)$', 161 | line) 162 | if m == None: 163 | raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,transcription") 164 | else: 165 | m = re.match( 166 | r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,?\s*$', 167 | line) 168 | if m == None: 169 | raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax") 170 | 171 | xmin = float(m.group(1)) 172 | ymin = float(m.group(2)) 173 | xmax = float(m.group(3)) 174 | ymax = float(m.group(4)) 175 | if (xmax < xmin): 176 | raise Exception("Xmax value (%s) not valid (Xmax < Xmin)." % (xmax)) 177 | if (ymax < ymin): 178 | raise Exception("Ymax value (%s) not valid (Ymax < Ymin)." % (ymax)) 179 | 180 | points = [float(m.group(i)) for i in range(1, (numPoints + 1))] 181 | 182 | if (imWidth > 0 and imHeight > 0): 183 | validate_point_inside_bounds(xmin, ymin, imWidth, imHeight) 184 | validate_point_inside_bounds(xmax, ymax, imWidth, imHeight) 185 | 186 | else: 187 | 188 | numPoints = 8 189 | 190 | if withTranscription and withConfidence: 191 | m = re.match( 192 | r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$', 193 | line) 194 | if m == None: 195 | raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence,transcription") 196 | elif withConfidence: 197 | m = re.match( 198 | r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*([0-1].?[0-9]*)\s*$', 199 | line) 200 | if m == None: 201 | raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence") 202 | elif withTranscription: 203 | m = re.match( 204 | r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,(.*)$', 205 | line) 206 | if m == None: 207 | raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,transcription") 208 | else: 209 | m = re.match( 210 | r'^\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*,\s*(-?[0-9]+\.?[0-9]*)\s*$', 211 | line) 212 | if m == None: 213 | raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4") 214 | 215 | points = [float(m.group(i)) for i in range(1, (numPoints + 1))] 216 | 217 | isClockwise = validate_clockwise_points(points) 218 | if not isClockwise: 219 | # convert anticlockwise to clockwise sequence 220 | points = [points[0], points[1], points[6], points[7], points[4], points[5], points[2], points[3]] 221 | 222 | if (imWidth > 0 and imHeight > 0): 223 | validate_point_inside_bounds(points[0], points[1], imWidth, imHeight) 224 | validate_point_inside_bounds(points[2], points[3], imWidth, imHeight) 225 | validate_point_inside_bounds(points[4], points[5], imWidth, imHeight) 226 | validate_point_inside_bounds(points[6], points[7], imWidth, imHeight) 227 | 228 | if withConfidence: 229 | try: 230 | confidence = float(m.group(numPoints + 1)) 231 | except ValueError: 232 | raise Exception("Confidence value must be a float") 233 | 234 | if withTranscription: 235 | posTranscription = numPoints + (2 if withConfidence else 1) 236 | transcription = m.group(posTranscription) 237 | m2 = re.match(r'^\s*\"(.*)\"\s*$', transcription) 238 | if m2 != None: # Transcription with double quotes, we extract the value and replace escaped characters 239 | transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"") 240 | 241 | return points, confidence, transcription 242 | 243 | 244 | def validate_point_inside_bounds(x, y, imWidth, imHeight): 245 | if (x < 0 or x > imWidth): 246 | raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" % (x, imWidth, imHeight)) 247 | if (y < 0 or y > imHeight): 248 | raise Exception( 249 | "Y value (%s) not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" % (y, imWidth, imHeight)) 250 | 251 | 252 | def validate_clockwise_points(points): 253 | """ 254 | Validates that the points that the 4 points that dlimite a polygon are in clockwise order. 255 | """ 256 | 257 | if len(points) != 8: 258 | raise Exception("Points list not valid." + str(len(points))) 259 | 260 | point = [ 261 | [int(points[0]), int(points[1])], 262 | [int(points[2]), int(points[3])], 263 | [int(points[4]), int(points[5])], 264 | [int(points[6]), int(points[7])] 265 | ] 266 | edge = [ 267 | (point[1][0] - point[0][0]) * (point[1][1] + point[0][1]), 268 | (point[2][0] - point[1][0]) * (point[2][1] + point[1][1]), 269 | (point[3][0] - point[2][0]) * (point[3][1] + point[2][1]), 270 | (point[0][0] - point[3][0]) * (point[0][1] + point[3][1]) 271 | ] 272 | 273 | summatory = edge[0] + edge[1] + edge[2] + edge[3] 274 | if summatory > 0: 275 | logger.debug( 276 | "Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.") 277 | return False 278 | return True 279 | 280 | 281 | def get_tl_line_values_from_file_contents(content, CRLF=True, LTRB=True, withTranscription=False, withConfidence=False, 282 | imWidth=0, imHeight=0, sort_by_confidences=True): 283 | """ 284 | Returns all points, confindences and transcriptions of a file in lists. Valid line formats: 285 | xmin,ymin,xmax,ymax,[confidence],[transcription] 286 | x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription] 287 | """ 288 | pointsList = [] 289 | transcriptionsList = [] 290 | confidencesList = [] 291 | 292 | lines = content.split("\r\n" if CRLF else "\n") 293 | for line in lines: 294 | line = line.replace("\r", "").replace("\n", "") 295 | if (line != ""): 296 | points, confidence, transcription = get_tl_line_values(line, LTRB, withTranscription, withConfidence, 297 | imWidth, imHeight) 298 | pointsList.append(points) 299 | transcriptionsList.append(transcription) 300 | confidencesList.append(confidence) 301 | 302 | if withConfidence and len(confidencesList) > 0 and sort_by_confidences: 303 | import numpy as np 304 | sorted_ind = np.argsort(-np.array(confidencesList)) 305 | confidencesList = [confidencesList[i] for i in sorted_ind] 306 | pointsList = [pointsList[i] for i in sorted_ind] 307 | transcriptionsList = [transcriptionsList[i] for i in sorted_ind] 308 | 309 | return pointsList, confidencesList, transcriptionsList 310 | 311 | 312 | def main_evaluation(args, default_evaluation_params_fn, validate_data_fn, evaluate_method_fn, show_result=True, 313 | per_sample=True): 314 | """ 315 | This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample. 316 | Params: 317 | args: 318 | -g for ground truth, 319 | -s for detect result, 320 | -p The parameters that can be overrided are inside the function 'default_evaluation_params' located at the begining of the evaluation Script, 321 | -o Path to a directory where to copy the file ‘results.zip’ that contains per-sample results, 322 | evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results 323 | """ 324 | evalParams = default_evaluation_params_fn() 325 | if args.p: 326 | evalParams.update(json.loads(args.p)) 327 | 328 | resDict = {'calculated': True, 'Message': '', 'method': '{}', 'per_sample': '{}'} 329 | try: 330 | validate_data_fn(args.g, args.s, evalParams) 331 | evalData = evaluate_method_fn(args.g, args.s, evalParams) 332 | resDict.update(evalData) 333 | 334 | except Exception as e: 335 | import traceback 336 | traceback.print_exc() 337 | resDict['Message'] = str(e) 338 | resDict['calculated'] = False 339 | 340 | if args.o: 341 | if not os.path.exists(args.o): 342 | os.makedirs(args.o) 343 | 344 | resultsOutputname = args.o + '/results.zip' 345 | outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True) 346 | 347 | del resDict['per_sample'] 348 | if 'output_items' in resDict.keys(): 349 | del resDict['output_items'] 350 | 351 | outZip.writestr('method.json', json.dumps(resDict)) 352 | 353 | if not resDict['calculated']: 354 | if show_result: 355 | sys.stderr.write('Error!\n' + resDict['Message'] + '\n\n') 356 | if args.o: 357 | outZip.close() 358 | return resDict 359 | 360 | if args.o: 361 | if per_sample == True: 362 | for k, v in evalData['per_sample'].iteritems(): 363 | outZip.writestr(k + '.json', json.dumps(v)) 364 | 365 | if 'output_items' in evalData.keys(): 366 | for k, v in evalData['output_items'].iteritems(): 367 | outZip.writestr(k, v) 368 | 369 | outZip.close() 370 | 371 | if show_result: 372 | sys.stdout.write("Calculated!") 373 | sys.stdout.write(json.dumps(resDict['method'])) 374 | 375 | return resDict 376 | 377 | 378 | def main_validation(args, default_evaluation_params_fn, validate_data_fn): 379 | """ 380 | This process validates a method 381 | Params: 382 | default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation 383 | validate_data_fn: points to a method that validates the correct format of the submission 384 | """ 385 | try: 386 | evalParams = default_evaluation_params_fn() 387 | if args.p: 388 | evalParams.update(json.loads(args.p[1:-1])) 389 | 390 | validate_data_fn(args.g, args.s, evalParams) 391 | print('SUCCESS') 392 | sys.exit(0) 393 | except Exception as e: 394 | print(e) 395 | sys.exit(101) -------------------------------------------------------------------------------- /script.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | ''' 3 | default parameters, you can modify them by 4 | -p '{\"GT_SAMPLE_NAME_2_ID\":\"([0-9]+).txt\",\"DET_SAMPLE_NAME_2_ID\":\"([0-9]+).txt\",\"CONFIDENCES\":true}' 5 | 6 | 'IOU_CONSTRAINT': 0.5, 7 | 'AREA_PRECISION_CONSTRAINT': 0.5, 8 | 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt', 9 | 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt', 10 | 'LTRB': False, # LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4) 11 | 'CRLF': False, # Lines are delimited by Windows CRLF format 12 | 'CONFIDENCES': False, # Detections must include confidence value. AP will be calculated 13 | 'PER_SAMPLE_RESULTS': True # Generate per sample results and produce data for visualization 14 | 15 | ''' 16 | import argparse 17 | import rrc_evaluation_funcs 18 | 19 | def argparser(): 20 | parse = argparse.ArgumentParser() 21 | parse.add_argument('-g', dest='g', default='./gt.zip', help="Path of the Ground Truth file. In most cases, the Ground Truth will be included in the same Zip file named 'gt.zip', gt.txt' or 'gt.json'. If not, you will be able to get it on the Downloads page of the Task.") 22 | parse.add_argument('-s', dest='s', default='./submit.zip', help="Path of your method's results file.") 23 | parse.add_argument('-o', dest='o', help="Path to a directory where to copy the file 'results.zip' that containts per-sample results.") 24 | parse.add_argument('-p', dest='p', help="JSON string parameters to override the script default parameters. The parameters that can be overrided are inside the function 'default_evaluation_params' located at the begining of the evaluation Script. use: -p '{\"CRLF\":true}'") 25 | parse.add_argument('-c', dest='choice', default='IoU', help="choose algorithm for differet tasks.(Challenges 1、2 use 'DetEva' Challenges 4 use 'IoU', default 'IoU')") 26 | args = parse.parse_args() 27 | return args 28 | 29 | if __name__=='__main__': 30 | args = argparser() 31 | if args.choice=='DetEva': 32 | from Algorithm_DetEva import default_evaluation_params,validate_data,evaluate_method 33 | elif args.choice=='IoU': 34 | from Algorithm_IoU import default_evaluation_params,validate_data,evaluate_method 35 | 36 | 37 | rrc_evaluation_funcs.main_evaluation(args,default_evaluation_params,validate_data,evaluate_method) 38 | -------------------------------------------------------------------------------- /submit.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MichaelHL-ai/OCR_EVALUATION/5dfb9f4e507a44994bec0c67e54d81970683db0b/submit.zip --------------------------------------------------------------------------------