├── .gitignore ├── .ipynb_checkpoints └── Docstrum-checkpoint.ipynb ├── Docstrum.ipynb ├── README.md ├── assets ├── Docstrum_Visualized_Steps.gif └── Text-line_Grouping_Process.gif ├── box.py ├── box.pyc ├── colors.py ├── colors.pyc ├── content.py ├── dimension.py ├── dimension.pyc ├── geometry.py ├── geometry.pyc ├── images_bank ├── AC_2c_title_clean.jpg ├── AC_dense.jpg └── AC_dense_clean.jpg ├── main.py ├── margin.py ├── output └── AC_2c_title_clean.jpg ├── page.py ├── page.pyc ├── stopwatch.py ├── stopwatch.pyc ├── text.py └── text.pyc /.gitignore: -------------------------------------------------------------------------------- 1 | # Datasets 2 | docstrums/ 3 | images/ 4 | output/ 5 | images_bank 6 | 7 | # For Mac OS 8 | .DS_Store 9 | -------------------------------------------------------------------------------- /Docstrum.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#!/usr/bin/python\n", 12 | "# find . -name '.DS_Store' -type f -delete\n", 13 | "# Chulwoo Pack\n", 14 | "\n", 15 | "import sys\n", 16 | "import os\n", 17 | "from page import Page\n", 18 | "\n", 19 | "SHOW_STEPS = True # change this to false if you just want to see the final output for each page.\n", 20 | "SAVE_OUTPUT = True\n", 21 | "SAVE_DOCSTRUM = True\n", 22 | "\n", 23 | "inputFolder = os.path.join('images')\n", 24 | "outputFolder = os.path.join('output')\n", 25 | "\n", 26 | " \n", 27 | "inputPath = os.path.join(inputFolder, os.listdir(inputFolder)[0])\n", 28 | "outputPath = os.path.join(outputFolder, os.listdir(inputFolder)[0])\n", 29 | "\n", 30 | "page = Page(inputPath, SHOW_STEPS, SAVE_DOCSTRUM)\n", 31 | "#page = Page(inputPath, SHOW_STEPS)\n", 32 | " \n", 33 | "if SAVE_OUTPUT:\n", 34 | " page.save(outputPath) # save a copy of what is displayed. Used for getting images for the paper.\n", 35 | " \n", 36 | "page.show((800, 800))" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "# LINE EXTRACTION TESTING\n", 48 | "\n", 49 | "import sys\n", 50 | "import os\n", 51 | "from page import Page\n", 52 | "\n", 53 | "import cv2\n", 54 | "import math\n", 55 | "import numpy\n", 56 | "import subprocess\n", 57 | "import os\n", 58 | "\n", 59 | "import colors\n", 60 | "import geometry as g\n", 61 | "from box import Box\n", 62 | "import text\n", 63 | "from dimension import Dimension\n", 64 | "from stopwatch import Stopwatch\n", 65 | "import numpy\n", 66 | "import matplotlib.pyplot as plt\n", 67 | "import ntpath\n", 68 | "\n", 69 | "\n", 70 | "SHOW_STEPS = True # change this to false if you just want to see the final output for each page.\n", 71 | "SAVE_OUTPUT = True\n", 72 | "SAVE_DOCSTRUM = False\n", 73 | "\n", 74 | "inputFolder = os.path.join('images')\n", 75 | "outputFolder = os.path.join('output')\n", 76 | "\n", 77 | " \n", 78 | "inputPath = os.path.join(inputFolder, os.listdir(inputFolder)[0])\n", 79 | "outputPath = os.path.join(outputFolder, os.listdir(inputFolder)[0])\n", 80 | "\n", 81 | "page = Page(inputPath, SHOW_STEPS, SAVE_DOCSTRUM)\n", 82 | "\n", 83 | "\n", 84 | "if True:\n", 85 | " page.save(outputPath) # save a copy of what is displayed. Used for getting images for the paper.\n", 86 | " \n", 87 | "page.show((800, 800))" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "print(\"%.2f\" %1.237)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "for line in page.lines:\n", 110 | " line.group = None\n", 111 | "\n", 112 | "for line in page.lines:\n", 113 | " print(line.group)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "#NER VERSION: ADJUSTIVE SEARCHING ORDER\n", 125 | "import cv2\n", 126 | "\n", 127 | "from shapely.geometry import Point # For checking overlap\n", 128 | "from shapely.geometry.polygon import Polygon # For checking overlap\n", 129 | "from shapely.geometry import MultiPoint # For checking overlap\n", 130 | "\n", 131 | "import progressbar # For displaying progressbar\n", 132 | "from time import sleep # For displaying progressbar\n", 133 | "\n", 134 | "#from stopwatch import Stopwatch # For checking run-time\n", 135 | "\n", 136 | "#stopwatch = Stopwatch()\n", 137 | "\n", 138 | "image = page.image.copy()\n", 139 | "\n", 140 | "EPS = 1e-10\n", 141 | "group_idx = 0\n", 142 | "threshold_angle = 1.0\n", 143 | "threshold_paralldist = 1.7 * 13.0\n", 144 | "threshold_perpendist = 1.7 * 17.0 #[1.5~1.7]\n", 145 | "threshold_overlap = 1.0\n", 146 | "threshold_early_skip = 100\n", 147 | "threshold_visualize_line_width = 5\n", 148 | "\n", 149 | "SHOW_DETAIL = False\n", 150 | "SHOW_VISUAL_STEP = True\n", 151 | "EARLY_SKIP = False\n", 152 | "\n", 153 | "########\n", 154 | "# INIT #\n", 155 | "########\n", 156 | "# Get lines\n", 157 | "_my_lines = page.lines\n", 158 | "# Remove dots\n", 159 | "my_lines = []\n", 160 | "for _my_line in _my_lines:\n", 161 | " if(_my_line.start.x-_my_line.end.x==0 and _my_line.start.y-_my_line.end.y==0):\n", 162 | " continue\n", 163 | " else:\n", 164 | " my_lines.append(_my_line)\n", 165 | "# Sorting lines\n", 166 | "my_lines.sort(key=lambda line:((line.start.y+line.end.y)/2,(line.start.x+line.end.x)/2))\n", 167 | "# Lines assigned a group\n", 168 | "my_lines_in_group = []\n", 169 | "# Lines not assigned any group yet\n", 170 | "my_lines_no_group = []\n", 171 | "for i in range(0,len(my_lines)-1):\n", 172 | " my_lines_no_group.append(i)\n", 173 | "if SHOW_DETAIL: print(\"no_group:\",my_lines_in_group)\n", 174 | "if SHOW_DETAIL: print(\"in_group:\",my_lines_no_group)\n", 175 | "# First line, not dot (its index, i)\n", 176 | "\n", 177 | "bar = progressbar.ProgressBar(maxval=len(my_lines_no_group), \\\n", 178 | " widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])\n", 179 | "progress_idx = 0\n", 180 | "bar.start()\n", 181 | "\n", 182 | "max_loop = len(my_lines)\n", 183 | "#act_loop = 0\n", 184 | "\n", 185 | "for act_loop in xrange(max_loop): # Make sure looked up every lines \n", 186 | " #flag_found_none = True\n", 187 | " if ((len(my_lines_in_group) == 0) and (len(my_lines_no_group) == 0)):\n", 188 | " break\n", 189 | " #test_act_loop = test_act_loop+1\n", 190 | " \n", 191 | " progress_idx = progress_idx+2 # Update progressbar\n", 192 | " bar.update(act_loop) # Update progressbar\n", 193 | " sleep(0.1) \n", 194 | " #######################\n", 195 | " # Set the ith element #\n", 196 | " #######################\n", 197 | " i = -1\n", 198 | " if EARLY_SKIP:\n", 199 | " early_skip = threshold_early_skip\n", 200 | " ## TODO: Since, currently there is no lines in my_lines_in_group queue and lines are remained, find another line as ith element in my_lines_no_group queue with excluding dots. \n", 201 | " if(len(my_lines_in_group) == 0):\n", 202 | " delta_x_i = 0\n", 203 | " delta_y_i = 0\n", 204 | " for candidate_line_idx in my_lines_no_group[:]:\n", 205 | " x_O_i = my_lines[candidate_line_idx].start.x\n", 206 | " y_O_i = page.image.shape[0] - my_lines[candidate_line_idx].start.y\n", 207 | " x_F_i = my_lines[candidate_line_idx].end.x\n", 208 | " y_F_i = page.image.shape[0] - my_lines[candidate_line_idx].end.y \n", 209 | " #delta_x_i = abs(x_F_i - x_O_i)\n", 210 | " delta_x_i = float(x_F_i - x_O_i)\n", 211 | " #delta_y_i = abs(y_F_i - y_O_i)\n", 212 | " delta_y_i = float(y_F_i - y_O_i)\n", 213 | " if (delta_x_i != 0 and delta_y_i != 0): # Found!\n", 214 | " i = candidate_line_idx\n", 215 | " my_lines_no_group.remove(candidate_line_idx)\n", 216 | " break\n", 217 | " else:\n", 218 | " i = my_lines_in_group.pop(0)\n", 219 | " \n", 220 | " \n", 221 | " # TODO: more sophisticated way to break?\n", 222 | " if (i == -1):\n", 223 | " break\n", 224 | " \n", 225 | " # Visualize ith element\n", 226 | " if SHOW_VISUAL_STEP:\n", 227 | " image = page.image.copy()\n", 228 | " cv2.line(image, ((my_lines[i].start.x,my_lines[i].start.y)),((my_lines[i].end.x,my_lines[i].end.y)), (0,0,255),threshold_visualize_line_width)\n", 229 | " page.display(image, title='Visualization of text-line groupping step')\n", 230 | " \n", 231 | " # No more lines to search\n", 232 | " if (len(my_lines_no_group) == 0):\n", 233 | " break\n", 234 | " else:\n", 235 | " my_lines[i].noise = False # This assure that dot-wise noise to be excluded from grouping process\n", 236 | " #######################\n", 237 | " # Set the jth element #\n", 238 | " #######################\n", 239 | " for j in my_lines_no_group[:]:\n", 240 | " if EARLY_SKIP:\n", 241 | " if early_skip < 0:\n", 242 | " break\n", 243 | " if SHOW_VISUAL_STEP:\n", 244 | " cv2.line(image, ((my_lines[j].start.x,my_lines[j].start.y)),((my_lines[j].end.x,my_lines[j].end.y)), (255,0,0),threshold_visualize_line_width)\n", 245 | " page.display(image, title='Visualization of text-line groupping step')\n", 246 | "\n", 247 | " sameGroup = False\n", 248 | " ################################\n", 249 | " # CALCULATE GEOMETRIC FEATURES #\n", 250 | " ################################\n", 251 | " # Point setting\n", 252 | " x_O_i = my_lines[i].start.x\n", 253 | " y_O_i = page.image.shape[0] - my_lines[i].start.y\n", 254 | " x_F_i = my_lines[i].end.x\n", 255 | " y_F_i = page.image.shape[0] - my_lines[i].end.y \n", 256 | "\n", 257 | " x_O_j = my_lines[j].start.x\n", 258 | " y_O_j = page.image.shape[0] - my_lines[j].start.y\n", 259 | " x_F_j = my_lines[j].end.x\n", 260 | " y_F_j = page.image.shape[0] - my_lines[j].end.y\n", 261 | "\n", 262 | " #delta_x_i = abs(x_F_i - x_O_i)\n", 263 | " #delta_y_i = abs(y_F_i - y_O_i)\n", 264 | " #delta_x_j = abs(x_F_j - x_O_j)\n", 265 | " #delta_y_j = abs(y_F_j - y_O_j)\n", 266 | " delta_x_i = float(x_F_i - x_O_i)\n", 267 | " delta_y_i = float(y_F_i - y_O_i)\n", 268 | " delta_x_j = float(x_F_j - x_O_j)\n", 269 | " delta_y_j = float(y_F_j - y_O_j)\n", 270 | " \n", 271 | " \n", 272 | " # ith or jth line is dot, so skip it\n", 273 | " if (delta_x_j == 0 and delta_y_j == 0):\n", 274 | " my_lines_no_group.remove(j)\n", 275 | " continue\n", 276 | "\n", 277 | " if SHOW_DETAIL:\n", 278 | " print(\"\\n****************************************************************\")\n", 279 | " print(\"# of in_group:\",len(my_lines_in_group),my_lines_in_group)\n", 280 | " print(\"# of no_group:\",len(my_lines_no_group),my_lines_no_group)\n", 281 | " print(i, my_lines[i].points)\n", 282 | " print(j, my_lines[j].points)\n", 283 | " print(\"i:\",x_O_i,y_O_i,\"-\",x_F_i,y_F_i)\n", 284 | " print(\"j:\",x_O_j,y_O_j,\"-\",x_F_j,y_F_j)\n", 285 | "\n", 286 | " # Calculate angle\n", 287 | " theta_i_j = math.atan2(delta_y_j,delta_x_j-math.atan2(delta_y_i,delta_x_i))\n", 288 | " if SHOW_DETAIL:\n", 289 | " print(\"Angle:\",theta_i_j)\n", 290 | "\n", 291 | " # Calculate overlap\n", 292 | " #if delta_x_j == 0:\n", 293 | " # delta_x_j = 0.1\n", 294 | " #if delta_y_i == 0:\n", 295 | " # delta_y_i = 0.1\n", 296 | " #if delta_y_j == 0:\n", 297 | " # delta_y_j = 0.1\n", 298 | " #if delta_x_i == 0:\n", 299 | " # delta_x_i = 0.1\n", 300 | "\n", 301 | "\n", 302 | " x_A_j = (x_O_i*delta_x_i*delta_x_j + x_O_j*delta_y_i*delta_y_j + delta_x_j*delta_y_i*(y_O_i-y_O_j))/(delta_y_i*delta_y_j + delta_x_i*delta_x_j + EPS)\n", 303 | " if (delta_x_j != 0):\n", 304 | " y_A_j = (delta_y_j/delta_x_j)*(x_A_j - x_O_j) + y_O_j\n", 305 | " else:\n", 306 | " x_A_j = y_O_j\n", 307 | "\n", 308 | " x_B_j = (x_F_i*delta_x_i*delta_x_j + x_F_j*delta_y_i*delta_y_j + delta_x_j*delta_y_i*(y_F_i-y_F_j))/(delta_y_i*delta_y_j + delta_x_i*delta_x_j + EPS)\n", 309 | " if (delta_x_j != 0):\n", 310 | " y_B_j = (delta_y_j/delta_x_j)*(x_B_j - x_F_j) + y_F_j\n", 311 | " else:\n", 312 | " x_B_j = y_F_j\n", 313 | "\n", 314 | " # Find C and D ponts\n", 315 | " #x_middle_candidates = [x_O_j, x_F_j, x_A_j, x_B_j]\n", 316 | " #x_middle_candidates.sort()\n", 317 | " #y_middle_candidates = [y_O_j, y_F_j, y_A_j, y_B_j]\n", 318 | " #y_middle_candidates.sort()\n", 319 | " C_D_candidates = [(x_O_j,y_O_j), (x_F_j,y_F_j), (x_A_j,y_A_j), (x_B_j,y_B_j)]\n", 320 | " if (delta_x_j != 0):\n", 321 | " C_D_candidates.sort(key=lambda x:x[0]) # sort by x\n", 322 | " elif (delta_y_j != 0):\n", 323 | " C_D_candidates.sort(key=lambda x:x[1]) # sort by y\n", 324 | " x_C_j,y_C_j = C_D_candidates[1]\n", 325 | " x_D_j,y_D_j = C_D_candidates[2]\n", 326 | "\n", 327 | " if SHOW_DETAIL:\n", 328 | " print(\"x_A_j,y_A_j\",x_A_j,y_A_j)\n", 329 | " print(\"x_B_j,y_B_j\",x_B_j,y_B_j)\n", 330 | " print(\"x_C_j,y_C_j\",x_C_j,y_C_j)\n", 331 | " print(\"x_D_j,y_D_j\",x_D_j,y_D_j)\n", 332 | "\n", 333 | " #x_i_j_components = [int(x_O_i), int(x_F_i), int(x_O_j), int(x_F_j)]\n", 334 | " #x_i_j_components.sort()\n", 335 | " #y_i_j_components = [int(y_O_i), int(y_F_i), int(y_O_j), int(y_F_j)]\n", 336 | " #y_i_j_components.sort()\n", 337 | " # convert to int in order to allow generous overlap\n", 338 | " #if ((int(x_O_j) <= int(x_C_j) <= int(x_F_j) and (int(y_O_j) <= int(y_C_j) <= int(y_F_j) or int(y_F_j) <= int(y_C_j) <= int(y_O_j))) or (int(x_O_i) <= int(x_C_j) <= int(x_F_i) and (int(y_O_i) <= int(x_C_j) <= int(y_F_i) or int(y_F_i) <= int(y_C_j) <= int(y_O_i)))) and ((int(x_O_j) <= int(x_D_j) <= int(x_F_j) and (int(y_O_j) <= int(y_D_j) <= int(y_F_j) or int(y_F_j) <= int(y_D_j) <= int(y_O_j))) or (int(x_O_i) <= int(x_D_j) <= int(x_F_i) and (int(y_O_i) <= int(y_D_j) <= int(y_F_i) or int(y_F_i) <= int(y_D_j) <= int(y_O_i)))):\n", 339 | " #if ((x_i_j_components[0] <= int(x_C_j[0]) <= x_i_j_components[-1]) and (y_i_j_components[0] <= int(y_C_j) <= y_i_j_components[-1]) and (x_i_j_components[0] <= int(x_D_j) <= x_i_j_components[-1]) and (y_i_j_components[0] <= int(y_D_j) <= y_i_j_components[-1])):\n", 340 | " #convex_hull = MultiPoint([(x_O_j, y_O_j), (x_O_j, y_F_j), (x_F_j, y_F_j), (x_F_j, y_O_j)])\n", 341 | " #polygon = Polygon([(x_O_i, y_O_i), (x_F_i, y_F_i),(x_F_j, y_F_j), (x_O_j, y_O_j)])\n", 342 | " #convex_hull = MultiPoint([(x_O_i, y_O_i), (x_F_i, y_F_i),(x_F_j, y_F_j), (x_O_j, y_O_j)]).convex_hull\n", 343 | " polygon = Polygon([(x_O_j, y_O_j), (x_O_j, y_F_j), (x_F_j, y_F_j), (x_F_j, y_O_j)])\n", 344 | " C_point = Point(x_C_j, y_C_j)\n", 345 | " D_point = Point(x_D_j, y_D_j)\n", 346 | " #if polygon.area != convex_hull.area:\n", 347 | " # overlap = False\n", 348 | " #elif (convex_hull.contains(C_point) and convex_hull.contains(D_point)):\n", 349 | " #if (convex_hull.contains(C_point) and convex_hull.contains(D_point)):\n", 350 | " if (polygon.contains(C_point) or polygon.touches(C_point)) and (polygon.contains(D_point) or polygon.touches(D_point)):\n", 351 | " overlap = True\n", 352 | " else:\n", 353 | " overlap = False\n", 354 | " \n", 355 | " #p_j = (math.sqrt(math.pow(y_D_j-y_C_j,2)+math.pow(x_D_j-x_C_j,2)))/2.0\n", 356 | " p_j = math.sqrt(math.pow(y_D_j-y_C_j,2)+math.pow(x_D_j-x_C_j,2))\n", 357 | " l_j = math.sqrt(math.pow(y_F_j-y_O_j,2)+math.pow(x_F_j-x_O_j,2))\n", 358 | " if (l_j == 0):\n", 359 | " l_j = 0.1\n", 360 | " if overlap:\n", 361 | " p_i_j = p_j/l_j\n", 362 | " else:\n", 363 | " p_i_j = -p_j/l_j\n", 364 | "\n", 365 | " if SHOW_DETAIL:\n", 366 | " print(\"Overlap?\",overlap)\n", 367 | " print(\"p_j:\",p_j)\n", 368 | " print(\"p_i_j:\",p_i_j)\n", 369 | "\n", 370 | " # Calculate parallel_dist\n", 371 | " if overlap:\n", 372 | " d_i_j_a = p_j\n", 373 | " else:\n", 374 | " d_i_j_a = -p_j\n", 375 | " if SHOW_DETAIL:\n", 376 | " print(\"parallel_dist: \",d_i_j_a)\n", 377 | "\n", 378 | " # Calculate perpend_dist\n", 379 | " x_M_j = (x_C_j + x_D_j)/2.0\n", 380 | " y_M_j = (y_C_j + y_D_j)/2.0\n", 381 | " if SHOW_DETAIL:\n", 382 | " print(\"x_M_j,y_M_j\",x_M_j,y_M_j)\n", 383 | " print(\"delta_x_i:\",delta_x_i)\n", 384 | " print(\"delta_y_i:\",delta_y_i)\n", 385 | " print(\"delta_x_j:\",delta_x_j)\n", 386 | " print(\"delta_y_j:\",delta_y_j)\n", 387 | "\n", 388 | " if delta_x_i != 0.0 and delta_y_i != 0.0:\n", 389 | " d_e_i_j = ((x_M_j - x_O_i) - (y_M_j - y_O_i)*delta_x_i/(delta_y_i + EPS))/((delta_x_i**2)/(delta_y_i**2 + EPS) + 1)**0.5 \n", 390 | " elif delta_y_i == 0.0:\n", 391 | " d_e_i_j = int(y_M_j) - int(y_O_i)\n", 392 | " elif delta_x_i == 0.0:\n", 393 | " d_e_i_j = int(x_M_j) - int(x_O_i)\n", 394 | " d_e_i_j = abs(d_e_i_j)\n", 395 | "\n", 396 | " if SHOW_DETAIL:\n", 397 | " print(\"perpend_dist: \",d_e_i_j)\n", 398 | "\n", 399 | " ######################\n", 400 | " # DECIDING GROUPNESS #\n", 401 | " #######################\n", 402 | " # 1. angle check\n", 403 | " if theta_i_j < threshold_angle:\n", 404 | " if SHOW_DETAIL: print(\"... Angle ok!\")\n", 405 | " # 2. perpend_dist check\n", 406 | " if 0 < d_e_i_j < threshold_perpendist:\n", 407 | " if SHOW_DETAIL: print(\"... Perpendicular ok!\")\n", 408 | " # 3.a. overlap check\n", 409 | " # 3.b. parallel_dist check\n", 410 | " if ((overlap and p_i_j <= threshold_overlap)):\n", 411 | " if SHOW_DETAIL: print(\"... Overlap & p_i_j ok!\")\n", 412 | " # Group!\n", 413 | " sameGroup = True\n", 414 | " elif (abs(d_i_j_a) < threshold_paralldist):\n", 415 | " if SHOW_DETAIL: print(\"... Parallel ok!\")\n", 416 | " # Group!\n", 417 | " sameGroup = True\n", 418 | "\n", 419 | " if SHOW_DETAIL:\n", 420 | " print(\"same group? \",sameGroup)\n", 421 | " if sameGroup:\n", 422 | "\n", 423 | " if EARLY_SKIP:\n", 424 | " early_skip = threshold_early_skip\n", 425 | " if SHOW_DETAIL:\n", 426 | " print(\"before group idx: \",group_idx)\n", 427 | " print(\"before i's group: \", my_lines[i].group)\n", 428 | " print(\"before j's group: \", my_lines[j].group)\n", 429 | " if (my_lines[i].group == None) and (my_lines[j].group == None):\n", 430 | " if SHOW_DETAIL:\n", 431 | " print(\"... case 1\")\n", 432 | " # Assign to a new block\n", 433 | " group_idx = group_idx + 1\n", 434 | " my_lines[i].group = group_idx\n", 435 | " my_lines[j].group = group_idx\n", 436 | " #my_lines_no_group.remove(i) # update queue\n", 437 | " my_lines_in_group.append(i) # update queue\n", 438 | " my_lines_no_group.remove(j) # update queue\n", 439 | " my_lines_in_group.append(j) # update queue\n", 440 | " elif (my_lines[i].group == None):\n", 441 | " if SHOW_DETAIL: print(\"... case 2\")\n", 442 | " # Unassigned text-line is assigned to the block of the other\n", 443 | " my_lines[i].group = my_lines[j].group\n", 444 | " #my_lines_no_group.remove(i) # update queue\n", 445 | " my_lines_in_group.append(i) # update queue\n", 446 | " elif (my_lines[j].group == None):\n", 447 | " if SHOW_DETAIL: print(\"... case 3\")\n", 448 | " # Unassigned text-line is assigned to the block of the other\n", 449 | " my_lines[j].group = my_lines[i].group\n", 450 | " my_lines_no_group.remove(j) # update queue\n", 451 | " my_lines_in_group.append(j) # update queue\n", 452 | " if SHOW_DETAIL: print(\"after group idx: \",group_idx)\n", 453 | " if SHOW_DETAIL: print(\"after i's group: \", my_lines[i].group)\n", 454 | " if SHOW_DETAIL: print(\"after j's group: \", my_lines[j].group)\n", 455 | " if SHOW_VISUAL_STEP:\n", 456 | " cv2.line(image, ((my_lines[j].start.x,my_lines[j].start.y)),((my_lines[j].end.x,my_lines[j].end.y)), (0,255,0),threshold_visualize_line_width)\n", 457 | " page.display(image, title='Visualization of text-line groupping step')\n", 458 | " else:\n", 459 | " if EARLY_SKIP:\n", 460 | " early_skip = early_skip - 1\n", 461 | " \n", 462 | " if (my_lines[i].noise == False and my_lines[i].group == None):\n", 463 | " group_idx = group_idx + 1\n", 464 | " my_lines[i].group = group_idx\n", 465 | "bar.finish()\n", 466 | "\n", 467 | "print(\"Total iter: [%d/%d]\" %(act_loop,max_loop))\n", 468 | "print(\"Done!\")" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": { 475 | "collapsed": false 476 | }, 477 | "outputs": [], 478 | "source": [ 479 | "\n", 480 | "dist = [10,20,20,50,20,20,30,25,25,25,25,25,25]\n", 481 | "n, bins, patches = plt.hist(dist, numpy.max(dist)-numpy.min(dist)+1, facecolor='orange', alpha=0.5)\n", 482 | "n_copy = n.copy()\n", 483 | "print(n)\n", 484 | "print(n_copy)\n", 485 | "n_copy[::-1].sort()\n", 486 | "print(n)\n", 487 | "print(n_copy)\n", 488 | "print(bins)\n", 489 | "print(numpy.argmax(n)+numpy.min(dist))\n", 490 | "print(bins.max())\n", 491 | "\n", 492 | "#n_copy[::-1].sort()\n", 493 | "a = numpy.where(n == n_copy[2])\n", 494 | "print(\"Test\",a)\n", 495 | "\n", 496 | "if len(a[0])>1:\n", 497 | " _max = a[0][int(len(a[0])/2)]\n", 498 | "else:\n", 499 | " _max = a[0][0]\n", 500 | "\n", 501 | "_max+numpy.min(dist)\n", 502 | "\n", 503 | "for i in range(5):\n", 504 | " print(i)" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": { 511 | "collapsed": true 512 | }, 513 | "outputs": [], 514 | "source": [ 515 | "peakind = signal.find_peaks_cwt([10,20,20,50,20,20,30], np.arange(1,10))" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": { 522 | "collapsed": false 523 | }, 524 | "outputs": [], 525 | "source": [ 526 | "peakind" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": { 533 | "collapsed": false 534 | }, 535 | "outputs": [], 536 | "source": [ 537 | "x_O_i = 491\n", 538 | "y_O_i = 615\n", 539 | "x_F_i = 757\n", 540 | "y_F_i = 600\n", 541 | "x_O_j = 491\n", 542 | "y_O_j = 615\n", 543 | "x_F_j = 757\n", 544 | "y_F_j = 600\n", 545 | "x_C_j = 757\n", 546 | "y_C_j = int(637.6500942238861)\n", 547 | "\n", 548 | "delta_y_j = 7.0\n", 549 | "delta_x_j = 658.0\n", 550 | "x_A_j = 4144.4503916449075\n", 551 | "x_F_j = 3890.0\n", 552 | "y_F_j = 1856.0\n", 553 | "(delta_y_j/delta_x_j)*(x_A_j - x_F_j) + y_F_j\n" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": null, 559 | "metadata": { 560 | "collapsed": false 561 | }, 562 | "outputs": [], 563 | "source": [ 564 | "('i:', 2639, 4875, '-', 2676, 4872)\n", 565 | "('j:', 992, 4892, '-', 2222, 4879)\n", 566 | "('Angle:', -0.010568017106413556)\n", 567 | "('x_A_j,y_A_j', 2638.9670025686564, 4874.593031680169)\n", 568 | "('x_B_j,y_B_j', 2676.178357373372, 4874.19974093833)\n", 569 | "('x_C_j,y_C_j', 2222, 4879)\n", 570 | "('x_D_j,y_D_j', 2638.9670025686564, 4874.593031680169)\n", 571 | "\n", 572 | "x_O_i = 2639\n", 573 | "y_O_i = 4875\n", 574 | "x_F_i = 2676\n", 575 | "y_F_i = 4872\n", 576 | "\n", 577 | "x_O_j = 896\n", 578 | "y_O_j = 4802\n", 579 | "x_F_j = 1415\n", 580 | "y_F_j = 4805\n", 581 | "\n", 582 | "x_C_j = 2222\n", 583 | "y_C_j = 4879\n", 584 | "x_D_j = 2638#.9670025686564\n", 585 | "y_D_j = 4874#.593031680169\n", 586 | "\n", 587 | "\n", 588 | "#polygon = Polygon([(x_O_i-1, y_O_i+1), (x_F_i+1, y_F_i+1),(x_F_j+1, y_F_j-1), (x_O_j-1, y_O_j-1)])\n", 589 | "polygon = Polygon([(x_O_i, y_O_i), (x_F_i, y_F_i),(x_F_j, y_F_j), (x_O_j, y_O_j)])\n", 590 | "#C_point = Point(x_C_j, y_C_j)\n", 591 | "#D_point = Point(x_D_j, y_D_j)\n", 592 | "polygon.area" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": { 599 | "collapsed": false 600 | }, 601 | "outputs": [], 602 | "source": [ 603 | "from shapely.geometry import MultiPoint\n", 604 | "convex_hull = MultiPoint([(x_O_i, y_O_i), (x_F_i, y_F_i),(x_F_j, y_F_j), (x_O_j, y_O_j)]).convex_hull\n", 605 | "convex_hull.area" 606 | ] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": null, 611 | "metadata": { 612 | "collapsed": false 613 | }, 614 | "outputs": [], 615 | "source": [ 616 | "x_O_j = 2476\n", 617 | "y_O_j = 1938\n", 618 | "x_F_j = 2840\n", 619 | "y_F_j = 1932\n", 620 | "x_A_j = 2363\n", 621 | "y_A_j = 1939\n", 622 | "x_B_j = 2631\n", 623 | "y_B_j = 1935\n", 624 | "\n", 625 | "C_D_candidates = [(x_O_j,y_O_j), (x_F_j,y_F_j), (x_A_j,y_A_j), (x_B_j,y_B_j)]\n", 626 | "C_D_candidates.sort(key=lambda x:x[0])\n", 627 | "print(C_D_candidates[1])\n", 628 | "print(C_D_candidates[2])\n" 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": null, 634 | "metadata": { 635 | "collapsed": false 636 | }, 637 | "outputs": [], 638 | "source": [ 639 | "C_D_candidates" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": null, 645 | "metadata": { 646 | "collapsed": false 647 | }, 648 | "outputs": [], 649 | "source": [ 650 | "C_D_candidates" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": null, 656 | "metadata": { 657 | "collapsed": false 658 | }, 659 | "outputs": [], 660 | "source": [ 661 | "C_x,C_y = C_D_candidates[1]\n", 662 | "print(C_x)\n", 663 | "print(C_y)\n" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": null, 669 | "metadata": { 670 | "collapsed": false 671 | }, 672 | "outputs": [], 673 | "source": [ 674 | "Polygon([(0-1, y_O_i+1), (x_F_i+1, y_F_i+1),(x_F_j+1, y_F_j-1), (x_O_j-1, y_O_j-1)])" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": null, 680 | "metadata": { 681 | "collapsed": true 682 | }, 683 | "outputs": [], 684 | "source": [] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "metadata": { 690 | "collapsed": false 691 | }, 692 | "outputs": [], 693 | "source": [ 694 | "######################\n", 695 | "# Draw Grouped Lines #\n", 696 | "######################\n", 697 | "import cv2\n", 698 | "image = page.image.copy()\n", 699 | "for my_line in my_lines:\n", 700 | " #print(my_line.start.x)\n", 701 | "# print my_line.group\n", 702 | " if my_line.group == None:\n", 703 | " continue\n", 704 | " blue = 0\n", 705 | " green = 0 \n", 706 | " red = 0\n", 707 | " else:\n", 708 | " blue = (my_line.group*100)%255\n", 709 | " green = (my_line.group*200)%255\n", 710 | " red = (my_line.group*300)%255\n", 711 | " \n", 712 | " #print(blue,green,red)\n", 713 | " cv2.line(image, (my_line.start.x,my_line.start.y), (my_line.end.x,my_line.end.y), (blue,green,red),10)\n", 714 | " \n", 715 | "#cv2.imwrite(outputPath, image) \n", 716 | "maxDimension = Dimension(800, 800)\n", 717 | "displayDimension = Dimension(image.shape[1], image.shape[0])\n", 718 | "displayDimension.fitInside(maxDimension)\n", 719 | "image = cv2.resize(image, tuple(displayDimension))\n", 720 | "cv2.namedWindow('title', cv2.CV_WINDOW_AUTOSIZE)\n", 721 | "cv2.imshow('grouped', image)\n", 722 | "cv2.waitKey()\n", 723 | "cv2.destroyAllWindows()" 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": null, 729 | "metadata": { 730 | "collapsed": false 731 | }, 732 | "outputs": [], 733 | "source": [ 734 | "#######################\n", 735 | "# Draw Bounding Boxes #\n", 736 | "#######################\n", 737 | "import cv2\n", 738 | "import numpy\n", 739 | "THRESHOLD_POLY_EXAGGERATE = 10 # Unit: Pixel\n", 740 | "image = page.image.copy()\n", 741 | "tot_groups = group_idx+1\n", 742 | "group_table = []\n", 743 | "for group_idx in range(tot_groups):\n", 744 | " group_table.append([])\n", 745 | "\n", 746 | "for my_line in my_lines:\n", 747 | " for group_idx in range(1,tot_groups):\n", 748 | " if my_line.group == None:\n", 749 | " continue\n", 750 | " elif my_line.group == group_idx:\n", 751 | " exaggerated_left_start_x = my_line.start.x-THRESHOLD_POLY_EXAGGERATE\n", 752 | " exaggerated_up_start_y = my_line.start.y+THRESHOLD_POLY_EXAGGERATE\n", 753 | " exaggerated_down_start_y = my_line.start.y-THRESHOLD_POLY_EXAGGERATE\n", 754 | " \n", 755 | " exaggerated_right_end_x = my_line.end.x+THRESHOLD_POLY_EXAGGERATE\n", 756 | " exaggerated_up_end_y = my_line.end.y+THRESHOLD_POLY_EXAGGERATE\n", 757 | " exaggerated_down_end_y = my_line.end.y-THRESHOLD_POLY_EXAGGERATE\n", 758 | " \n", 759 | " group_table[group_idx-1].append([exaggerated_left_start_x,exaggerated_up_start_y])\n", 760 | " group_table[group_idx-1].append([exaggerated_left_start_x,exaggerated_down_start_y])\n", 761 | " \n", 762 | " group_table[group_idx-1].append([exaggerated_right_end_x,exaggerated_up_end_y])\n", 763 | " group_table[group_idx-1].append([exaggerated_right_end_x,exaggerated_down_end_y])\n", 764 | " \n", 765 | " \n", 766 | "\n", 767 | "for group_idx in range(1,tot_groups):\n", 768 | " points = numpy.array(group_table[group_idx-1], dtype='int')\n", 769 | " rect = cv2.minAreaRect(points)\n", 770 | " box = cv2.cv.BoxPoints(rect) # cv2.boxPoints(rect) for OpenCV 3.x\n", 771 | " box = numpy.int0(box)\n", 772 | " cv2.drawContours(image,numpy.int32([box]),0,(0,0,255),7)\n", 773 | " #convex_hull = cv2.convexHull(points)\n", 774 | " #cv2.polylines(image, numpy.int32([convex_hull]), True, (0, 0, 255), thickness=2)\n", 775 | "\n", 776 | "\n", 777 | "maxDimension = Dimension(800, 800)\n", 778 | "displayDimension = Dimension(image.shape[1], image.shape[0])\n", 779 | "displayDimension.fitInside(maxDimension)\n", 780 | "image = cv2.resize(image, tuple(displayDimension))\n", 781 | "cv2.namedWindow('Polylines', cv2.CV_WINDOW_AUTOSIZE)\n", 782 | "cv2.imshow('Polylines', image)\n", 783 | "cv2.waitKey()\n", 784 | "cv2.destroyAllWindows()\n" 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": null, 790 | "metadata": { 791 | "collapsed": false 792 | }, 793 | "outputs": [], 794 | "source": [ 795 | "cv2.imwrite(outputPath, image)" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": null, 801 | "metadata": { 802 | "collapsed": false 803 | }, 804 | "outputs": [], 805 | "source": [ 806 | "group_table" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": null, 812 | "metadata": { 813 | "collapsed": false 814 | }, 815 | "outputs": [], 816 | "source": [ 817 | "cv2.imwrite(outputPath, image)" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": null, 823 | "metadata": { 824 | "collapsed": false 825 | }, 826 | "outputs": [], 827 | "source": [ 828 | "################################\n", 829 | "# Draw BoundingBox (Rectangle) #\n", 830 | "################################\n", 831 | "import cv2\n", 832 | "import numpy\n", 833 | "image = page.image.copy()\n", 834 | "boundingbox_table = numpy.zeros((group_idx+1,4)) # [min_x,max_x,min_y,max_y]\n", 835 | "boundingbox_table[:,0] = image.shape[1]\n", 836 | "boundingbox_table[:,1] = 0\n", 837 | "boundingbox_table[:,2] = image.shape[0]\n", 838 | "boundingbox_table[:,3] = 0\n", 839 | "\n", 840 | "# Find BoundingBoxes for Each Group\n", 841 | "for my_line in my_lines:\n", 842 | " for i in range(1,group_idx+1):\n", 843 | " if my_line.group == None:\n", 844 | " # Update if found new min or max\n", 845 | " if my_line.start.x < boundingbox_table[-1,0]:\n", 846 | " boundingbox_table[-1,0] = my_line.start.x\n", 847 | " if my_line.end.x > boundingbox_table[-1,1]:\n", 848 | " boundingbox_table[-1,1] = my_line.end.x\n", 849 | " if my_line.start.y < boundingbox_table[-1,2]:\n", 850 | " boundingbox_table[-1,2] = my_line.start.y\n", 851 | " if my_line.end.y > boundingbox_table[-1,3]:\n", 852 | " boundingbox_table[-1,3] = my_line.end.y\n", 853 | " elif my_line.group == i:\n", 854 | " # Update if found new min or max\n", 855 | " if my_line.start.x < boundingbox_table[i-1,0]:\n", 856 | " boundingbox_table[i-1,0] = my_line.start.x\n", 857 | " if my_line.end.x > boundingbox_table[i-1,1]:\n", 858 | " boundingbox_table[i-1,1] = my_line.end.x\n", 859 | " if my_line.start.y < boundingbox_table[i-1,2]:\n", 860 | " boundingbox_table[i-1,2] = my_line.start.y\n", 861 | " if my_line.end.y > boundingbox_table[i-1,3]:\n", 862 | " boundingbox_table[i-1,3] = my_line.end.y\n", 863 | " \n", 864 | "# Draw BoundingBoxes \n", 865 | "for i in range(group_idx+1):\n", 866 | " x_min = int(boundingbox_table[i,0])\n", 867 | " x_max = int(boundingbox_table[i,1])\n", 868 | " y_min = int(boundingbox_table[i,2])\n", 869 | " y_max = int(boundingbox_table[i,3])\n", 870 | " cv2.rectangle(image,(x_min,y_max),(x_max,y_min),(0,0,255),5) # (image, Top-Left, Bottom-Right, BGR_Color, Width)\n", 871 | "\n", 872 | "\n", 873 | "maxDimension = Dimension(800, 800)\n", 874 | "displayDimension = Dimension(image.shape[1], image.shape[0])\n", 875 | "displayDimension.fitInside(maxDimension)\n", 876 | "image = cv2.resize(image, tuple(displayDimension))\n", 877 | "cv2.namedWindow('title', cv2.CV_WINDOW_AUTOSIZE)\n", 878 | "cv2.imshow('grouped', image)\n", 879 | "cv2.waitKey()\n", 880 | "cv2.destroyAllWindows()" 881 | ] 882 | }, 883 | { 884 | "cell_type": "code", 885 | "execution_count": null, 886 | "metadata": { 887 | "collapsed": false 888 | }, 889 | "outputs": [], 890 | "source": [ 891 | "cv2.imwrite(outputPath, image)" 892 | ] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": null, 897 | "metadata": { 898 | "collapsed": false, 899 | "scrolled": true 900 | }, 901 | "outputs": [], 902 | "source": [ 903 | "############################\n", 904 | "############################\n", 905 | "############################\n", 906 | "############################\n", 907 | "#### LEGACY CODES BELOW ####\n", 908 | "############################\n", 909 | "############################\n", 910 | "############################\n", 911 | "############################" 912 | ] 913 | }, 914 | { 915 | "cell_type": "code", 916 | "execution_count": null, 917 | "metadata": { 918 | "collapsed": false 919 | }, 920 | "outputs": [], 921 | "source": [ 922 | "#WORKING OLD VERSION: BUT CLUSMY RESULT\n", 923 | "#from __future__ import division\n", 924 | "#i=0\n", 925 | "#j=5\n", 926 | "\n", 927 | "SHOW_DETAIL = True\n", 928 | "SHOW_DETAIL = True\n", 929 | "\n", 930 | "# Sorting lines\n", 931 | "my_lines = page.lines\n", 932 | "my_lines.sort(key=lambda line:((line.start.y+line.end.y)/2,(line.start.x+line.end.x)/2))\n", 933 | "#my_lines[0].start.x = 2\n", 934 | "#my_lines[0].start.y = 0\n", 935 | "#my_lines[0].end.x = 6\n", 936 | "#my_lines[0].end.y = 0\n", 937 | "\n", 938 | "#my_lines[5].start.x = 1\n", 939 | "#my_lines[5].start.y = 1\n", 940 | "#my_lines[5].end.x = 5\n", 941 | "#my_lines[5].end.y = 3\n", 942 | "\n", 943 | "#my_lines[0].group = None\n", 944 | "#my_lines[5].group = None\n", 945 | "\n", 946 | "EPS = 3#1e-3\n", 947 | "group_idx = 0\n", 948 | "threshold_angle = 1.0\n", 949 | "threshold_perpendist = 1.3 * 60.0\n", 950 | "threshold_overlap = 1.0\n", 951 | "threshold_paralldist = 1.5 * 40.0\n", 952 | "\n", 953 | "for idx_my_line, my_line in enumerate(my_lines):\n", 954 | " if(idx_my_line+1 == len(my_lines)-1):\n", 955 | " break\n", 956 | " i = idx_my_line\n", 957 | " for j in range(i+1,len(my_lines)-1):\n", 958 | " #for j in range(i+1,30):\n", 959 | " sameGroup = False\n", 960 | " ################################\n", 961 | " # CALCULATE GEOMETRIC FEATURES #\n", 962 | " ################################\n", 963 | " # Point setting\n", 964 | " x_O_i = my_lines[i].start.x\n", 965 | " #y_O_i = my_lines[i].start.y\n", 966 | " y_O_i = page.image.shape[0] - my_lines[i].start.y\n", 967 | " x_F_i = my_lines[i].end.x\n", 968 | " #y_F_i = my_lines[i].end.y\n", 969 | " y_F_i = page.image.shape[0] - my_lines[i].end.y \n", 970 | "\n", 971 | " x_O_j = my_lines[j].start.x\n", 972 | " #y_O_j = my_lines[j].start.y\n", 973 | " y_O_j = page.image.shape[0] - my_lines[j].start.y\n", 974 | " x_F_j = my_lines[j].end.x\n", 975 | " #y_F_j = my_lines[j].end.y\n", 976 | " y_F_j = page.image.shape[0] - my_lines[j].end.y\n", 977 | " \n", 978 | " delta_x_i = abs(x_F_i - x_O_i)\n", 979 | " delta_y_i = abs(y_F_i - y_O_i)\n", 980 | " delta_x_j = abs(x_F_j - x_O_j)\n", 981 | " delta_y_j = abs(y_F_j - y_O_j)\n", 982 | " \n", 983 | " # ith or jth line is dot, so skip it\n", 984 | " if ((delta_x_i == 0 and delta_y_i == 0) or (delta_x_j == 0 and delta_y_j == 0)):\n", 985 | " continue\n", 986 | " \n", 987 | " if SHOW_DETAIL:\n", 988 | " print(\"\\n****************************************************************\")\n", 989 | " print(i, my_lines[i].points)\n", 990 | " print(j, my_lines[j].points)\n", 991 | " print(\"i:\",x_O_i,y_O_i,\"-\",x_F_i,y_F_i)\n", 992 | " print(\"j:\",x_O_j,y_O_j,\"-\",x_F_j,y_F_j)\n", 993 | " \n", 994 | " # Calculate angle\n", 995 | " theta_i_j = math.atan2(delta_y_j,delta_x_j-math.atan2(delta_y_i,delta_x_i))\n", 996 | " if SHOW_DETAIL:\n", 997 | " print(\"Angle:\",theta_i_j)\n", 998 | "\n", 999 | " # Calculate overlap\n", 1000 | " #if delta_x_j == 0:\n", 1001 | " # delta_x_j = 0.1\n", 1002 | " #if delta_y_i == 0:\n", 1003 | " # delta_y_i = 0.1\n", 1004 | " #if delta_y_j == 0:\n", 1005 | " # delta_y_j = 0.1\n", 1006 | " #if delta_x_i == 0:\n", 1007 | " # delta_x_i = 0.1\n", 1008 | "\n", 1009 | "\n", 1010 | " x_A_j = (x_O_i*delta_x_i*delta_x_j + x_O_j*delta_y_i*delta_y_j + delta_x_j*delta_y_i*(y_O_i-y_O_j))/(delta_y_i*delta_y_j + delta_x_i*delta_x_j + EPS)\n", 1011 | " if (delta_x_j != 0):\n", 1012 | " y_A_j = (delta_y_j/delta_x_j)*(x_A_j - x_O_j) + y_O_j\n", 1013 | " else:\n", 1014 | " x_A_j = y_O_j\n", 1015 | "\n", 1016 | " x_B_j = (x_F_i*delta_x_i*delta_x_j + x_F_j*delta_y_i*delta_y_j + delta_x_j*delta_y_i*(y_F_i-y_F_j))/(delta_y_i*delta_y_j + delta_x_i*delta_x_j + EPS)\n", 1017 | " if (delta_x_j != 0):\n", 1018 | " y_B_j = (delta_y_j/delta_x_j)*(x_A_j - x_F_j) + y_F_j\n", 1019 | " else:\n", 1020 | " x_B_j = y_F_j\n", 1021 | "\n", 1022 | " x_middle_candidates = [x_O_j, x_F_j, x_A_j, x_B_j]\n", 1023 | " x_middle_candidates.sort()\n", 1024 | " y_middle_candidates = [y_O_j, y_F_j, y_A_j, y_B_j]\n", 1025 | " y_middle_candidates.sort()\n", 1026 | "\n", 1027 | " x_C_j = x_middle_candidates[-2]\n", 1028 | " y_C_j = y_middle_candidates[-2]\n", 1029 | "\n", 1030 | " x_D_j = x_middle_candidates[-3]\n", 1031 | " y_D_j = y_middle_candidates[-3]\n", 1032 | " if SHOW_DETAIL:\n", 1033 | " print(\"x_A_j,y_A_j\",x_A_j,y_A_j)\n", 1034 | " print(\"x_B_j,y_B_j\",x_B_j,y_B_j)\n", 1035 | " print(\"x_C_j,y_C_j\",x_C_j,y_C_j)\n", 1036 | " print(\"x_D_j,y_D_j\",x_D_j,y_D_j)\n", 1037 | "\n", 1038 | " if ((x_O_j <= x_C_j <= x_F_j and y_O_j <= y_C_j <= y_F_j) or (x_O_i <= x_C_j <= x_F_i and y_O_i <= x_C_j <= y_F_i)) and ((x_O_j <= x_D_j <= x_F_j and y_O_j <= y_D_j <= y_F_j) or (x_O_i <= x_D_j <= x_F_i and y_O_i <= y_D_j <= y_F_i)):\n", 1039 | " overlap = True\n", 1040 | " else:\n", 1041 | " overlap = False\n", 1042 | " # Force to be true; no overlap is required in the default mode\n", 1043 | " #overlap = True\n", 1044 | "\n", 1045 | " p_j = (math.sqrt(math.pow(y_D_j-y_C_j,2)+math.pow(x_D_j-x_C_j,2)))/2.0\n", 1046 | " l_j = math.sqrt(math.pow(y_F_j-y_O_j,2)+math.pow(x_F_j-x_O_j,2))\n", 1047 | " if (l_j == 0):\n", 1048 | " l_j = 0.1\n", 1049 | " if overlap:\n", 1050 | " p_i_j = p_j/l_j\n", 1051 | " else:\n", 1052 | " p_i_j = -p_j/l_j\n", 1053 | " \n", 1054 | " if SHOW_DETAIL:\n", 1055 | " print(\"Overlap?\",overlap)\n", 1056 | " print(\"p_j:\",p_j)\n", 1057 | " print(\"p_i_j:\",p_i_j)\n", 1058 | "\n", 1059 | " # Calculate parallel_dist\n", 1060 | " if overlap:\n", 1061 | " d_i_j_a = p_j\n", 1062 | " else:\n", 1063 | " d_i_j_a = -p_j\n", 1064 | " if SHOW_DETAIL:\n", 1065 | " print(\"parallel_dist: \",d_i_j_a)\n", 1066 | "\n", 1067 | " # Calculate perpend_dist\n", 1068 | " x_M_j = (x_C_j + x_D_j)/2.0\n", 1069 | " y_M_j = (y_C_j + y_D_j)/2.0\n", 1070 | " if SHOW_DETAIL:\n", 1071 | " print(\"x_M_j,y_M_j\",x_M_j,y_M_j)\n", 1072 | " print(\"delta_x_i:\",delta_x_i)\n", 1073 | " print(\"delta_y_i:\",delta_y_i)\n", 1074 | " print(\"delta_x_j:\",delta_x_j)\n", 1075 | " print(\"delta_y_j:\",delta_y_j)\n", 1076 | " \n", 1077 | " if delta_x_i != 0.0 and delta_x_i != 0.0:\n", 1078 | " d_e_i_j = ((x_M_j - x_O_i) - (y_M_j - y_O_i)*delta_x_i/(delta_y_i + EPS))/((delta_x_i**2)/(delta_y_i**2 + EPS) + 1)**0.5\n", 1079 | " #((x_M_j - x_O_i) - (y_M_j - y_O_i)*delta_x_i/(delta_y_i + EPS))/(math.pow(math.pow(delta_x_i,2)/math.pow(delta_y_i,2)+1,0.5) + EPS)\n", 1080 | " elif delta_y_i == 0.0:\n", 1081 | " d_e_i_j = y_M_j - y_O_i \n", 1082 | " elif delta_x_i == 0.0:\n", 1083 | " d_e_i_j = x_M_j - x_O_i\n", 1084 | " d_e_i_j = abs(d_e_i_j)\n", 1085 | " \n", 1086 | " if SHOW_DETAIL:\n", 1087 | " print(\"perpend_dist: \",d_e_i_j)\n", 1088 | "\n", 1089 | " ######################\n", 1090 | " # DECIDING GROUPNESS #\n", 1091 | " #######################\n", 1092 | " # 1. angle check\n", 1093 | " if theta_i_j < threshold_angle:\n", 1094 | " if SHOW_DETAIL: print(\"... Angle ok!\")\n", 1095 | " # 2. perpend_dist check\n", 1096 | " if 0 < d_e_i_j < threshold_perpendist:\n", 1097 | " if SHOW_DETAIL: print(\"... Perpendicular ok!\")\n", 1098 | " # 3.a. overlap check\n", 1099 | " # 3.b. parallel_dist check\n", 1100 | " if ((overlap and p_i_j < threshold_overlap)):\n", 1101 | " if SHOW_DETAIL: print(\"... Overlap & p_i_j ok!\")\n", 1102 | " # Group!\n", 1103 | " sameGroup = True\n", 1104 | " elif (abs(d_i_j_a) < threshold_paralldist):\n", 1105 | " if SHOW_DETAIL: print(\"... Parallel ok!\")\n", 1106 | " # Group!\n", 1107 | " sameGroup = True\n", 1108 | " \n", 1109 | " if SHOW_DETAIL:\n", 1110 | " print(\"same group? \",sameGroup)\n", 1111 | " if sameGroup:\n", 1112 | " if SHOW_DETAIL:\n", 1113 | " print(\"before group idx: \",group_idx)\n", 1114 | " print(\"before i's group: \", my_lines[i].group)\n", 1115 | " print(\"before j's group: \", my_lines[j].group)\n", 1116 | " if (my_lines[i].group == None) and (my_lines[j].group == None):\n", 1117 | " if SHOW_DETAIL:\n", 1118 | " print(\"... case 1\")\n", 1119 | " # Assign to a new block\n", 1120 | " group_idx = group_idx + 1\n", 1121 | " my_lines[i].group = group_idx\n", 1122 | " my_lines[j].group = group_idx\n", 1123 | " elif (my_lines[i].group == None):\n", 1124 | " if SHOW_DETAIL: print(\"... case 2\")\n", 1125 | " # Unassigned text-line is assigned to the block of the other\n", 1126 | " my_lines[i].group = my_lines[j].group\n", 1127 | " elif (my_lines[j].group == None):\n", 1128 | " if SHOW_DETAIL: print(\"... case 3\")\n", 1129 | " # Unassigned text-line is assigned to the block of the other\n", 1130 | " my_lines[j].group = my_lines[i].group\n", 1131 | " if SHOW_DETAIL: print(\"after group idx: \",group_idx)\n", 1132 | " if SHOW_DETAIL: print(\"after i's group: \", my_lines[i].group)\n", 1133 | " if SHOW_DETAIL: print(\"after j's group: \", my_lines[j].group)\n", 1134 | " #else:\n", 1135 | " # Block merge\n", 1136 | "\n", 1137 | "print(\"Done!\")" 1138 | ] 1139 | }, 1140 | { 1141 | "cell_type": "code", 1142 | "execution_count": null, 1143 | "metadata": { 1144 | "collapsed": true 1145 | }, 1146 | "outputs": [], 1147 | "source": [ 1148 | "##############################\n", 1149 | "# Draw BoundingBox (Polygon) #\n", 1150 | "##############################\n", 1151 | "import cv2\n", 1152 | "import numpy\n", 1153 | "image = page.image.copy()\n", 1154 | "# point_0 (x_0, y_0) x_0:* y_0:Max\n", 1155 | "# point_1 (x_1, y_1) x_1:Max y_1:*\n", 1156 | "# point_2 (x_2, y_2) x_2:* y_2:Min\n", 1157 | "# point_3 (x_3, y_3) x_3:Min y_3:*\n", 1158 | "boundingbox_table = numpy.zeros((group_idx+1,8)) # [x_0,y_0,x_1,y_1,x_2,y_2,x_3,y_3]\n", 1159 | "boundingbox_table[:,0] = 0 # x_0\n", 1160 | "boundingbox_table[:,1] = 0 # y_0\n", 1161 | "boundingbox_table[:,2] = 0 # x_1\n", 1162 | "boundingbox_table[:,3] = 0 # y_1\n", 1163 | "boundingbox_table[:,4] = 0 # x_2\n", 1164 | "boundingbox_table[:,5] = image.shape[0] # y_2\n", 1165 | "boundingbox_table[:,6] = image.shape[1] # x_3\n", 1166 | "boundingbox_table[:,7] = 0 # y_3\n", 1167 | "\n", 1168 | "# Find BoundingBoxes for Each Group\n", 1169 | "for my_line in my_lines:\n", 1170 | " for i in range(1,group_idx+1):\n", 1171 | " \n", 1172 | " \n", 1173 | " if my_line.group == i:\n", 1174 | " # Update if found new min or max\n", 1175 | " if my_line.start.y > boundingbox_table[i-1,1]:\n", 1176 | " boundingbox_table[i-1,0] = my_line.start.x\n", 1177 | " boundingbox_table[i-1,1] = my_line.start.y\n", 1178 | " if my_line.end.y > boundingbox_table[i-1,1]:\n", 1179 | " boundingbox_table[i-1,0] = my_line.end.x\n", 1180 | " boundingbox_table[i-1,1] = my_line.end.y\n", 1181 | " \n", 1182 | " if my_line.start.x > boundingbox_table[i-1,2]:\n", 1183 | " boundingbox_table[i-1,2] = my_line.start.x\n", 1184 | " boundingbox_table[i-1,3] = my_line.start.y\n", 1185 | " if my_line.end.y > boundingbox_table[i-1,2]:\n", 1186 | " boundingbox_table[i-1,2] = my_line.end.x\n", 1187 | " boundingbox_table[i-1,3] = my_line.end.y\n", 1188 | " \n", 1189 | " if my_line.start.y < boundingbox_table[i-1,5]:\n", 1190 | " boundingbox_table[i-1,4] = my_line.start.x\n", 1191 | " boundingbox_table[i-1,5] = my_line.start.y\n", 1192 | " if my_line.end.y < boundingbox_table[i-1,5]:\n", 1193 | " boundingbox_table[i-1,4] = my_line.end.x\n", 1194 | " boundingbox_table[i-1,5] = my_line.end.y\n", 1195 | " \n", 1196 | " if my_line.start.x < boundingbox_table[i-1,6]:\n", 1197 | " boundingbox_table[i-1,6] = my_line.start.x\n", 1198 | " boundingbox_table[i-1,7] = my_line.start.y\n", 1199 | " if my_line.end.x < boundingbox_table[i-1,6]:\n", 1200 | " boundingbox_table[i-1,6] = my_line.end.x\n", 1201 | " boundingbox_table[i-1,7] = my_line.end.y\n", 1202 | "\n", 1203 | "# Draw BoundingBoxes \n", 1204 | "for i in range(group_idx+1):\n", 1205 | " x_0 = int(boundingbox_table[i,0])\n", 1206 | " y_0 = int(boundingbox_table[i,1])\n", 1207 | " x_1 = int(boundingbox_table[i,2])\n", 1208 | " y_1 = int(boundingbox_table[i,3])\n", 1209 | " x_2 = int(boundingbox_table[i,4])\n", 1210 | " y_2 = int(boundingbox_table[i,5])\n", 1211 | " x_3 = int(boundingbox_table[i,6])\n", 1212 | " y_3 = int(boundingbox_table[i,7])\n", 1213 | " \n", 1214 | " pts = numpy.array([[x_0,y_0],[x_1,y_1],[x_2,y_2],[x_3,y_3]], numpy.int32)\n", 1215 | " pts = pts.reshape((-1,1,2))\n", 1216 | " cv2.polylines(image,[pts],True,(0,0,255),5)\n", 1217 | "\n", 1218 | "maxDimension = Dimension(800, 800)\n", 1219 | "displayDimension = Dimension(image.shape[1], image.shape[0])\n", 1220 | "displayDimension.fitInside(maxDimension)\n", 1221 | "image = cv2.resize(image, tuple(displayDimension))\n", 1222 | "cv2.namedWindow('title', cv2.CV_WINDOW_AUTOSIZE)\n", 1223 | "cv2.imshow('grouped', image)\n", 1224 | "cv2.waitKey()\n", 1225 | "cv2.destroyAllWindows()" 1226 | ] 1227 | } 1228 | ], 1229 | "metadata": { 1230 | "kernelspec": { 1231 | "display_name": "Python 2", 1232 | "language": "python", 1233 | "name": "python2" 1234 | }, 1235 | "language_info": { 1236 | "codemirror_mode": { 1237 | "name": "ipython", 1238 | "version": 2 1239 | }, 1240 | "file_extension": ".py", 1241 | "mimetype": "text/x-python", 1242 | "name": "python", 1243 | "nbconvert_exporter": "python", 1244 | "pygments_lexer": "ipython2", 1245 | "version": "2.7.13" 1246 | } 1247 | }, 1248 | "nbformat": 4, 1249 | "nbformat_minor": 2 1250 | } 1251 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Docstrum Algorithm 3 | ## Getting Started 4 | This repo is for developing a Docstrum algorithm presented by O’Gorman (1993). 5 | 6 | ## Disclaimer 7 | This source code is built on top of the work by Chadoliver. Please find the original code from here (https://github.com/chadoliver/cosc428-structor). 8 | 9 | ## Objective 10 | This project aims at segmenting a document image into meaningful components. The domain of image is specified on historical machine-printed/hand-written document image. 11 | 12 | ## Dependencies 13 | - python 2.7 14 | - Packages: 15 | - `numpy` 16 | - `cv2` 17 | 18 | ## Process 19 | 20 | 21 | - Pre-processing [Optional for vertical-line removal](https://docs.opencv.org/3.2.0/d1/dee/tutorial_moprh_lines_detection.html) 22 | - Blurring [Bilateral Filtering](https://en.wikipedia.org/wiki/Bilateral_filter) 23 | - [Otsu's thresholding](https://en.wikipedia.org/wiki/Otsu%27s_method) 24 | - Morphological [erosion & dilation](https://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html) 25 | - Smoothing (Averaging) 26 | - Static thresholding 27 | - Nearest-Neighbor Clustering and Docstrum Plot 28 | - Spacing and Orientation Estimation 29 | - Determination of Text-lines 30 | - Structural Block Determination 31 | - Post-processing 32 | - TBD 33 | 34 | ## Evaluation 35 | - TBD 36 | 37 | ## Citing Docstrum 38 | O'Gorman, L., 1993. The document spectrum for page layout analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence, 15(11), pp.1162-1173. [pdf](http://ieeexplore.ieee.org/abstract/document/244677/). 39 | 40 | @article{o1993document, 41 | title={The document spectrum for page layout analysis}, 42 | author={O'Gorman, Lawrence}, 43 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 44 | volume={15}, 45 | number={11}, 46 | pages={1162--1173}, 47 | year={1993}, 48 | publisher={IEEE} 49 | } 50 | 51 | ## Notes 52 | ### How to remove .DS_Store 53 | ``` 54 | find . -name '.DS_Store' -type f -delete 55 | ``` 56 | -------------------------------------------------------------------------------- /assets/Docstrum_Visualized_Steps.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/assets/Docstrum_Visualized_Steps.gif -------------------------------------------------------------------------------- /assets/Text-line_Grouping_Process.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/assets/Text-line_Grouping_Process.gif -------------------------------------------------------------------------------- /box.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy 3 | import math 4 | 5 | import geometry as g 6 | import colors 7 | 8 | class EmptyObject: 9 | 10 | def __init__(self): 11 | pass 12 | 13 | def distance(start, end): 14 | 15 | rise = float(end[1]) - float(start[1]) 16 | run = float(end[0]) - float(start[0]) 17 | 18 | distance = math.sqrt(rise**2 + run**2) 19 | 20 | return distance 21 | 22 | def midpoint(start, end): 23 | 24 | midX = float(start[0]+end[0]) / 2 25 | midY = float(start[1]+end[1]) / 2 26 | 27 | return (midX, midY) 28 | 29 | def angle(start, end): 30 | 31 | rise = float(end[1]) - float(start[1]) 32 | run = float(end[0]) - float(start[0]) 33 | 34 | radians = math.atan2(rise, run) 35 | degrees = math.degrees(radians) 36 | 37 | return degrees 38 | 39 | class Box: 40 | 41 | def __init__(self, points): 42 | 43 | self.rect = cv2.minAreaRect(points) # rect = ((center_x,center_y),(width,height),angle) 44 | self.points = self.rectToPoints(self.rect) 45 | 46 | self.setImportantPoints(self.points) # sets up properties such as self.top.left, self.center.right, etc 47 | 48 | self.width = distance(self.center.left, self.center.right) 49 | self.height = distance(self.top.left, self.bottom.left) 50 | self.area = self.width * self.height 51 | self.angle = angle(self.top.left, self.top.right) 52 | 53 | self.words = [] # children words, with a center of mass inside the box. 54 | self.isLine = False # temporary flag, until I set up a proper Lines class. 55 | 56 | def rectToPoints(self, rect): 57 | 58 | points = cv2.cv.BoxPoints(rect) # Find four vertices of rectangle from above rect 59 | points = numpy.int0(numpy.around(points)) # Round the values and make them integers 60 | return points 61 | 62 | def setImportantPoints(self, points): 63 | # figures out which point is top-left, etc, and assigns each to one of: self.top.left, self.top.right, 64 | # self.bottom.left, and self.bottom.right 65 | 66 | points = sorted(points, key=lambda point: point[0]) # sort by x position. 67 | left = sorted(points[:2], key=lambda point: point[1]) # [top-left, bottom-left] 68 | right = sorted(points[2:], key=lambda point: point[1]) # [top-right, bottom-right] 69 | 70 | self.top = EmptyObject() 71 | self.top.left = left[0] 72 | self.top.right = right[0] 73 | 74 | self.bottom = EmptyObject() 75 | self.bottom.left = left[1] 76 | self.bottom.right = right[1] 77 | 78 | self.center = EmptyObject() 79 | self.center.left = midpoint(self.top.left, self.bottom.left) 80 | self.center.right = midpoint(self.top.right, self.bottom.right) 81 | self.center.center = midpoint(self.center.left, self.center.right) 82 | 83 | def isTouchingEdge(self, shape, closenessThreshold=200): 84 | 85 | isTouching = False 86 | shape = (shape[1], shape[0]) # switch width and height so that it matches the format of a Point() 87 | 88 | for point in self.points: 89 | if point[0] <= (0 + closenessThreshold): 90 | isTouching = True 91 | elif point[1] <= (0 + closenessThreshold): 92 | isTouching = True 93 | elif point[0] >= (shape[0] - closenessThreshold): 94 | isTouching = True 95 | elif point[1] >= (shape[1] - closenessThreshold): 96 | isTouching = True 97 | 98 | return isTouching 99 | 100 | def contains(self, word): 101 | 102 | boxContour = numpy.array(self.points) 103 | retval = cv2.pointPolygonTest(numpy.array([self.points]), word.center, False) 104 | 105 | if retval > 0: 106 | return True 107 | else: 108 | return False 109 | 110 | def paint(self, image, color, width=5): 111 | 112 | #image = g.Point(self.center.center).paint(image, colors.RED) 113 | cv2.polylines(image, [self.points], True, color, width, cv2.CV_AA) 114 | 115 | #image = g.Point(self.center.left).paint(image, colors.BLUE) 116 | #image = g.Point(self.center.right).paint(image, colors.GREEN) 117 | 118 | return image 119 | -------------------------------------------------------------------------------- /box.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/box.pyc -------------------------------------------------------------------------------- /colors.py: -------------------------------------------------------------------------------- 1 | class EmptyObject: 2 | 3 | def __init__(self): 4 | pass 5 | 6 | greyscale = EmptyObject() 7 | greyscale.BLACK = 0 8 | greyscale.WHITE = 255 9 | greyscale.MID_GREY = 127 10 | 11 | 12 | WHITE = (255, 255, 255) 13 | LIGHT_GREY = (191, 191, 191) 14 | MID_GREY = (127, 128, 128) 15 | DARK_GREY = (63, 63, 63) 16 | BLACK = (0, 0, 0) 17 | 18 | BLUE = (255, 0, 0) 19 | CYAN = (255, 255, 0) 20 | 21 | GREEN = (0, 255, 0) 22 | LIME_GREEN = (0, 255, 102) 23 | 24 | YELLOW = (0, 255, 255) 25 | BURNT_YELLOW = (0, 223, 255) 26 | ORANGE = (0, 127, 255) 27 | 28 | RED = (0, 0, 255) 29 | MAGENTA = (255,0,255) 30 | 31 | PURPLE = (191, 0, 191) 32 | 33 | class cycle: 34 | 35 | def __init__(self, *colors): 36 | 37 | self.colors = colors 38 | self.index = 0 # this will continually loop through self.colors 39 | 40 | def next(self): 41 | color = self.colors[self.index] 42 | self.index = (self.index+1) % len(self.colors) 43 | return color 44 | 45 | def __iter__(self, limit=None): 46 | class Iterator(): 47 | def __init__(self, colors, limit): 48 | self.colors = colors 49 | self.index = 0 # this will continually loop through self.colors 50 | self.limit = limit 51 | self.iterCounter = 0 # this will continually count upwards (it won't loop around) 52 | def __iter__(self): 53 | return self 54 | def next(self): 55 | if self.limit != None and self.limit > self.iterCounter: 56 | raise StopIteration 57 | else: 58 | color = self.colors[self.index] 59 | self.index = (self.index+1) % len(self.colors) 60 | self.iterCounter += 1 61 | return color 62 | 63 | return Iterator(self.colors, limit) 64 | -------------------------------------------------------------------------------- /colors.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/colors.pyc -------------------------------------------------------------------------------- /content.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from box import Box 3 | import colors 4 | 5 | class BoilerPlate: 6 | 7 | def __init__(self): 8 | self.pageNum = None 9 | self.chapterTitle = None # only one of [.chapterTitle, .bookTitle] will be set -- they're mutually exclusive. 10 | self.bookTitle = None 11 | 12 | def paint(self, image, color): 13 | 14 | image = self.pageNum.paint(image, color, box=True) 15 | if self.chapterTitle is not None: 16 | image = self.chapterTitle.paint(image, color, box=True) 17 | if self.bookTitle is not None: 18 | image = self.bookTitle.paint(image, color, box=True) 19 | 20 | return image 21 | 22 | class SectionTitle: 23 | 24 | def __init__(self, firstLine=None): 25 | 26 | self.contentType = "SectionTitle" 27 | 28 | if firstLine is None: 29 | self.lines = [] 30 | else: 31 | self.lines = [firstLine] 32 | 33 | def append(self, line): 34 | 35 | self.lines.append(line) 36 | 37 | def paint(self, image, color=colors.MAGENTA): 38 | 39 | for line in self.lines: 40 | image = line.paint(image, color, box=True) 41 | return image 42 | 43 | class Figure: 44 | 45 | def __init__(self): 46 | 47 | self.contentType = "Figure" 48 | self.image = None 49 | self.caption = [] 50 | 51 | def paint(self, image, color=colors.CYAN): 52 | 53 | image = self.image.paint(image, color, box=True) 54 | for line in self.caption: 55 | image = line.paint(image, color, box=True) 56 | return image 57 | 58 | class Paragraph: 59 | 60 | def __init__(self, firstLine=None): 61 | 62 | self.contentType = "Paragraph" 63 | 64 | if firstLine is None: 65 | self.lines = [] 66 | else: 67 | self.lines = [firstLine] 68 | 69 | def append(self, line): 70 | 71 | self.lines.append(line) 72 | 73 | def __add__(self, other): 74 | # designed to be used when adding Content()s together, so that paragraphs which are split over 75 | # a page can be reconstituted. 76 | pass 77 | 78 | def __getitem__(self, val): 79 | return self.lines.__getitem__(val) 80 | 81 | def __len__(self): 82 | return self.lines.__len__() 83 | 84 | def paint(self, image, color=colors.RED): 85 | 86 | points = [] 87 | for line in self.lines: 88 | for word in line.words: 89 | for point in word.contour: 90 | points.append(point) 91 | points = numpy.array(points) # This needs to have the format [ [[a,b]], [[c,d]] ] 92 | box = Box(points) 93 | image = box.paint(image, color) 94 | 95 | for line in self.lines: 96 | image = line.paint(image, colors.BURNT_YELLOW, centerLine=True) 97 | 98 | return image 99 | 100 | 101 | class ChapterStart: 102 | 103 | def __init__(self, lines): 104 | 105 | self.contentType = "ChapterStart" 106 | self.chapterNum = lines.pull() 107 | self.titleLines = [] 108 | self.quoteLines = [] 109 | 110 | while not lines.peekStart().isHorizontalRule: 111 | self.titleLines.append(lines.pull()) 112 | lines.pull() # discard the
line. 113 | 114 | while not lines.peekStart().isHorizontalRule: 115 | self.quoteLines.append(lines.pull()) 116 | lines.pull() # discard the
line. 117 | 118 | def paint(self, image, color=colors.ORANGE): 119 | 120 | image = self.chapterNum.paint(image, color) 121 | for line in self.titleLines: 122 | image = line.paint(image, color, box=True) 123 | for line in self.quoteLines: 124 | image = line.paint(image, color, box=True) 125 | 126 | return image 127 | 128 | class Content: 129 | 130 | def __init__(self, lines, isChapterStart=False): 131 | 132 | self.lines = lines 133 | self.content = [] 134 | 135 | if isChapterStart: 136 | chapterStart = ChapterStart(self.lines) 137 | self.content.append(chapterStart) 138 | 139 | self.stateMachine() 140 | 141 | def stateMachine(self): 142 | 143 | try: 144 | newLine = self.lines.pull() 145 | except IndexError: 146 | return 147 | 148 | if newLine.box.height > 300: 149 | self.SM_newFigure(newLine) 150 | elif newLine.isCentered: 151 | self.SM_sectionTitle(newLine) 152 | else: 153 | self.SM_newParagraph(newLine) 154 | 155 | def SM_newFigure(self, line): 156 | 157 | figure = Figure() 158 | figure.image = line 159 | 160 | try: 161 | newLine = self.lines.pull() 162 | except IndexError: 163 | self.content.append(figure) 164 | return 165 | 166 | if newLine.isCentered: 167 | self.SM_addCaptionLine(newLine, figure) 168 | else: 169 | self.content.append(figure) 170 | self.SM_newParagraph(newLine) 171 | 172 | def SM_addCaptionLine(self, line, figure): 173 | 174 | figure.caption.append(line) 175 | 176 | try: 177 | newLine = self.lines.pull() 178 | except IndexError: 179 | self.content.append(figure) 180 | return 181 | 182 | if newLine.isCentered: 183 | self.SM_addCaptionLine(newLine, figure) 184 | else: 185 | self.content.append(figure) 186 | self.SM_newParagraph(newLine) 187 | 188 | def SM_newParagraph(self, line): 189 | 190 | paragraph = Paragraph(line) 191 | 192 | try: 193 | newLine = self.lines.pull() 194 | except IndexError: 195 | self.content.append(paragraph) 196 | return 197 | 198 | if newLine.box.height > 300: 199 | self.content.append(paragraph) 200 | self.SM_newFigure(newLine) 201 | 202 | elif newLine.isCentered: 203 | self.content.append(paragraph) 204 | self.SM_sectionTitle(newLine) 205 | 206 | elif line.isParagraphEnd or newLine.isParagraphStart: 207 | self.content.append(paragraph) 208 | self.SM_newParagraph(newLine) 209 | 210 | elif newLine.isParagraphEnd: 211 | self.SM_paragraphEnd(newLine, paragraph) 212 | 213 | else: 214 | self.SM_paragraphBody(newLine, paragraph) 215 | 216 | def SM_paragraphBody(self, line, paragraph): 217 | 218 | paragraph.append(line) 219 | 220 | try: 221 | newLine = self.lines.pull() 222 | except IndexError: 223 | self.content.append(paragraph) 224 | return 225 | 226 | if newLine.box.height > 300: 227 | self.content.append(paragraph) 228 | self.SM_newFigure(newLine) 229 | 230 | elif newLine.isCentered: 231 | self.content.append(paragraph) 232 | self.SM_sectionTitle(newLine) 233 | 234 | elif newLine.isParagraphStart: 235 | self.content.append(paragraph) 236 | self.SM_newParagraph(newLine) 237 | 238 | elif newLine.isParagraphEnd: 239 | self.SM_paragraphEnd(newLine, paragraph) 240 | 241 | else: 242 | self.SM_paragraphBody(newLine, paragraph) 243 | 244 | def SM_paragraphEnd(self, line, paragraph): 245 | 246 | paragraph.append(line) 247 | 248 | try: 249 | newLine = self.lines.pull() 250 | except IndexError: 251 | self.content.append(paragraph) 252 | return 253 | 254 | if newLine.box.height > 300: 255 | self.content.append(paragraph) 256 | self.SM_newFigure(newLine) 257 | 258 | elif newLine.isCentered: 259 | self.content.append(paragraph) 260 | self.SM_sectionTitle(newLine) 261 | 262 | else: 263 | self.content.append(paragraph) 264 | self.SM_newParagraph(newLine) 265 | 266 | def SM_sectionTitle(self, line): 267 | sectionTitle = SectionTitle(line) 268 | 269 | try: 270 | newLine = self.lines.pull() 271 | except IndexError: 272 | self.content.append(sectionTitle) 273 | return 274 | 275 | if newLine.box.height > 300: 276 | self.content.append(sectionTitle) 277 | self.SM_newFigure(newLine) 278 | else: 279 | self.content.append(sectionTitle) 280 | self.SM_newParagraph(newLine) 281 | 282 | def paint(self, image): 283 | 284 | for item in self.content: 285 | image = item.paint(image) 286 | 287 | return image 288 | -------------------------------------------------------------------------------- /dimension.py: -------------------------------------------------------------------------------- 1 | class Dimension(): 2 | 3 | def __init__(self, x, y): 4 | self.x = abs(x) 5 | self.y = abs(y) 6 | 7 | def __str__(self): 8 | return '(x:%i, y:%i)' %(self.x, self.y) 9 | 10 | def scale(self, ratio): 11 | self.x = int(self.x * ratio) 12 | self.y = int(self.y * ratio) 13 | 14 | def fitInside(self, boundingDimension): 15 | if (self.x > boundingDimension.x): 16 | xratio = float(boundingDimension.x) / float(self.x) 17 | self.scale(xratio) 18 | if (self.y > boundingDimension.y): 19 | yratio = float(boundingDimension.y) / float(self.y) 20 | self.scale(yratio) 21 | 22 | def __iter__(self): 23 | class Iterator(): 24 | def __init__(self, source): 25 | self.source = source 26 | self.index = 0 27 | def __iter__(self): 28 | return self 29 | def next(self): 30 | if self.index >= len(self.source): 31 | raise StopIteration 32 | else: 33 | self.index += 1 34 | return self.source[self.index-1] 35 | 36 | return Iterator((self.x, self.y)) 37 | -------------------------------------------------------------------------------- /dimension.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/dimension.pyc -------------------------------------------------------------------------------- /geometry.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy 3 | import math 4 | 5 | import colors 6 | 7 | class Angle: 8 | 9 | def __init__(self, guess=None, degrees=None, radians=None, gradient=None): 10 | 11 | self.canonical = None # radians is the 'canonical' representation. 12 | 13 | if guess is not None: 14 | try: 15 | #try treating it like an Angle object 16 | self.radians(guess.radians()) 17 | except: 18 | # otherwise treat it like a number in degrees 19 | self.degrees(guess) 20 | elif radians is not None: 21 | self.radians(radians) 22 | elif degrees is not None: 23 | self.degrees(degrees) 24 | elif gradient is not None: 25 | self.gradient(gradient) 26 | else: 27 | raise TypeError('Angle() takes at least one argument') 28 | 29 | def radians(self, newVal=None): 30 | if newVal is not None: 31 | self.canonical = Angle.sanitize(newVal) 32 | else: 33 | return self.canonical 34 | 35 | def degrees(self, newVal=None): 36 | if newVal is not None: 37 | rads = math.radians(newVal) 38 | self.canonical = Angle.sanitize(rads) 39 | else: 40 | return math.degrees(self.canonical) 41 | 42 | def gradient(self, newVal=None): 43 | # gradient = rise / run = tan(radians) 44 | if newVal is not None: 45 | rads = math.atan(newVal) 46 | self.canonical = Angle.sanitize(rads) 47 | else: 48 | return math.tan(self.canonical) 49 | 50 | def __add__(self, other): 51 | other = Angle(other) # voila, we can now do angle2 = angle1 + 45 52 | raw = self.radians() + other.radians() 53 | return Angle(radians=Angle.sanitize(raw)) 54 | 55 | def __sub__(self, other): 56 | other = Angle(other) 57 | raw = self.radians() - other.radians() 58 | return Angle(radians=Angle.sanitize(raw)) 59 | 60 | @staticmethod 61 | def sanitize(rads): 62 | 63 | rads = float(rads) 64 | 65 | # put it into the range -pi < x < pi, including accounting for wrap-around 66 | rads = ((rads + math.pi) % (2*math.pi)) - math.pi 67 | 68 | # Our angles are symmetric. 3*pi/4 is equivalent to -pi/4 69 | if rads > (math.pi/2): 70 | rads = rads - math.pi 71 | elif rads < (-math.pi/2): 72 | rads = rads + math.pi 73 | 74 | return rads 75 | 76 | @staticmethod 77 | def average(angles): 78 | # important: this doesn't do well with angles close to +-90 degrees. Even if they're clustered close 79 | # to one point, they'll be split into >90 degrees and < 90 degrees sets, and average to zero. 80 | # This comes from the fact that angles are actually angles (i.e. symmetric), not bearings (directions). 81 | 82 | sumOfRads = 0.0 83 | for angle in angles: 84 | sumOfRads += angle.radians() 85 | rawAverage = sumOfRads / len(angles) 86 | return Angle(radians=Angle.sanitize(rawAverage)) 87 | 88 | class PointArray: 89 | 90 | def __init__(self, points=[]): 91 | 92 | self.points = [] 93 | for point in points: 94 | # make sure that each point is a Point instance. Also allows us to accept a generator. 95 | self.points.append(Point(point)) 96 | 97 | def __str__(self): 98 | # human-readable output 99 | strings = [point.__str__() for point in self.points] 100 | 101 | return "[%s]" %(", ".join(strings)) 102 | 103 | def __repr__(self): 104 | # machine-readable output 105 | return self.__str__() 106 | 107 | def append(self, point): 108 | self.points.append(Point(point)) 109 | 110 | def numpyArray(self): 111 | return numpy.array([ [list(point.align())] for point in self.points ]) 112 | 113 | def __getitem__(self, key): 114 | return self.points.__getitem__(key) 115 | 116 | def __setitem__(self, key, value): 117 | self.points.__setitem__(key, value) 118 | 119 | def __delattr__(self, key): 120 | self.points.__setitem__(key, None) 121 | 122 | def __reversed__(self): 123 | return self.points.__reversed__() 124 | 125 | def __len__(self): 126 | return self.points.__len__() 127 | 128 | def __iter__(self): 129 | return self.points.__iter__() 130 | 131 | def paint(self, image, color): 132 | for point in self.points: 133 | image = point.paint(image, color) 134 | return image 135 | 136 | 137 | class Point: 138 | 139 | def __init__(self, foo=None, bar=None): 140 | 141 | try: 142 | # If foo is an array, use that and ignore bar. 143 | # Note that this also means that Point(Point(foo, bar)) is harmless 144 | self.x = foo[0] 145 | self.y = foo[1] 146 | except: 147 | # Otherwise treat foo and bar like two numbers 148 | self.x = foo 149 | self.y = bar 150 | 151 | self.isPoint = True # used to test instance type. 152 | 153 | def align(self): 154 | # return a new point instance where .x and .y are integers 155 | 156 | return Point(numpy.int0(numpy.around([self.x, self.y]))) 157 | 158 | def cv2point(self): 159 | 160 | return tuple(self.align()) 161 | 162 | def rotate(self, angle): 163 | 164 | angle = Angle(angle) 165 | rotatedPoint = Point() 166 | rotatedPoint.x = self.x*math.cos(-angle.radians()) - self.y*math.sin(-angle.radians()) 167 | rotatedPoint.y = self.x*math.sin(-angle.radians()) + self.y*math.cos(-angle.radians()) 168 | 169 | return rotatedPoint 170 | 171 | def __str__(self): 172 | # human-readable output 173 | return "(x:%s, y:%s)" %(self.x, self.y) 174 | 175 | def __repr__(self): 176 | # machine-readable output 177 | return self.__str__() 178 | 179 | def __getitem__(self, key): 180 | # this is a hack that allows the object to be treated like a list. 181 | return [self.x, self.y].__getitem__(key) 182 | 183 | def __setitem__(self, key, value): 184 | if key == 0: 185 | self.x = value 186 | elif key == 1: 187 | self.y = value 188 | else: 189 | raise KeyError('key must be 0 or 1') 190 | 191 | def __delattr__(self, key): 192 | self.__setitem__(key, None) 193 | 194 | def __reversed__(self): 195 | return Point(self.y, self.x) 196 | 197 | def __len__(self): 198 | return 2 199 | 200 | def __iter__(self): 201 | 202 | yield self.x 203 | yield self.y 204 | raise StopIteration 205 | 206 | def __add__(self, other): 207 | result = Point() 208 | result.x = self.x + other.x 209 | result.y = self.y + other.y 210 | return result 211 | 212 | def __sub__(self, other): 213 | result = Point() 214 | result.x = self.x - other.x 215 | result.y = self.y - other.y 216 | return result 217 | 218 | def paint(self, image, color, diameter=3): 219 | cv2.circle(image, self.cv2point(), diameter, color, 1, cv2.CV_AA) 220 | return image 221 | 222 | @staticmethod 223 | def distance(start, end): 224 | 225 | start = Point(start) 226 | end = Point(end) 227 | 228 | delta = end - start 229 | 230 | distance = math.sqrt(delta.x**2 + delta.y**2) 231 | 232 | return distance 233 | 234 | @staticmethod 235 | def midpoint(start, end): 236 | 237 | start = Point(start) 238 | end = Point(end) 239 | 240 | midpoint = Point() 241 | midpoint.x = float(start.x + end.y) / 2 242 | midpoint.y = float(start.x + end.y) / 2 243 | 244 | return midpoint 245 | 246 | 247 | class Line: 248 | 249 | def __init__(self, points=[], inputAngle=None, frame=None): 250 | 251 | self.frame = frame 252 | 253 | self.start = None 254 | self.end = None 255 | self.angle = None 256 | self.group = None 257 | self.noise = True 258 | 259 | if inputAngle != None: 260 | inputAngle = Angle(inputAngle) 261 | self.inputAngle = inputAngle 262 | 263 | self.points = PointArray(points) 264 | self.update() 265 | 266 | def append(self, point): 267 | 268 | self.points.append(point) 269 | self.update() 270 | 271 | def intersect(self, other): 272 | 273 | if (self.start is None) or (self.end is None): 274 | raise Exception('The PixelLine is underspecified; it requires at least two points') 275 | if (other.start is None) or (other.end is None): 276 | raise Exception('The PixelLine is underspecified; it requires at least two points') 277 | 278 | otherX = float(other.start.x) 279 | otherY = float(other.start.y) 280 | otherM = float(other.angle.gradient()) 281 | 282 | selfX = float(self.start.x) 283 | selfY = float(self.start.y) 284 | selfM = float(self.angle.gradient()) 285 | 286 | point = Point() 287 | point.x = (otherY - selfY + selfM*selfX - otherM*otherX) / (selfM - otherM) 288 | point.y = selfY + selfM*(point.x - selfX) 289 | 290 | return point 291 | 292 | def update(self): 293 | 294 | if (self.inputAngle is not None) and (len(self.points) >= 1): 295 | self.lineFromPointAngle() 296 | 297 | elif len(self.points) < 2: 298 | self.start = None 299 | self.end = None 300 | self.angle = None 301 | 302 | elif len(self.points) == 2: 303 | self.lineFromTwoPoints() 304 | 305 | else: 306 | self.leastSquaresLine() 307 | 308 | self.clipToFrame() 309 | 310 | def lineFromPointAngle(self): 311 | # We find the line based on the angle and the first point. Note that in this case, the line 312 | # is effectively infinite. 313 | 314 | hypotenuse = 4000 315 | datum = self.points[0] 316 | angle = self.inputAngle + 90 317 | 318 | offset = Point() 319 | offset.x = int(hypotenuse * math.cos(angle.radians())) 320 | offset.y = int(hypotenuse * math.sin(angle.radians())) 321 | 322 | self.start = datum - offset 323 | self.end = datum + offset 324 | self.angle = self.inputAngle 325 | 326 | def lineFromTwoPoints(self): 327 | # This is the only case in which the line has a visible start and end point. 328 | 329 | self.start = self.points[0] 330 | self.end = self.points[1] 331 | self.angle = self.calculateAngle(self.start, self.end) 332 | 333 | def leastSquaresLine(self): 334 | # try to fit a least-squares trend line 335 | 336 | multiplier = 2000 337 | dx, dy, x0, y0 = cv2.fitLine(self.points.numpyArray(), cv2.cv.CV_DIST_L2, 0, 0.01, 0.01) 338 | 339 | self.start = Point(int(x0 - dx*multiplier), int(y0 - dy*multiplier)) 340 | self.end = Point(int(x0 + dx*multiplier), int(y0 + dy*multiplier)) 341 | self.angle = self.calculateAngle(self.start, self.end) 342 | 343 | def calculateAngle(self, start, end): 344 | 345 | rise = float(self.end.y) - float(self.start.y) 346 | run = float(self.end.x) - float(self.start.x) 347 | 348 | return Angle(radians=math.atan2(rise, run)) 349 | 350 | def clipToFrame(self): 351 | if self.frame is not None: 352 | rawStart, rawEnd = cv2.clipLine(self.frame, self.start, self.end) 353 | self.start = Point(rawStart) 354 | self.end = Point(rawEnd) 355 | 356 | def paint(self, image, color=colors.BLUE): 357 | 358 | if (self.start is None) or (self.end is None): 359 | raise Exception('The Line is underspecified; it requires at least two points') 360 | else: 361 | cv2.line(image, self.start.cv2point(), self.end.cv2point(), color, 1, cv2.CV_AA) 362 | 363 | return image 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | -------------------------------------------------------------------------------- /geometry.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/geometry.pyc -------------------------------------------------------------------------------- /images_bank/AC_2c_title_clean.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/images_bank/AC_2c_title_clean.jpg -------------------------------------------------------------------------------- /images_bank/AC_dense.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/images_bank/AC_dense.jpg -------------------------------------------------------------------------------- /images_bank/AC_dense_clean.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/images_bank/AC_dense_clean.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | from page import Page 6 | 7 | SHOW_STEPS = True # change this to false if you just want to see the final output for each page. 8 | SAVE_OUTPUT = False 9 | 10 | inputFolder = os.path.join('images') 11 | outputFolder = os.path.join('output') 12 | 13 | for filename in os.listdir(inputFolder)[:]: 14 | #for filename in ['page332.jpg', 'page335.jpg']: 15 | 16 | inputPath = os.path.join(inputFolder, filename) 17 | outputPath = os.path.join(outputFolder, filename) 18 | 19 | page = Page(inputPath, SHOW_STEPS) 20 | 21 | if SAVE_OUTPUT: 22 | page.save(outputPath) # save a copy of what is displayed. Used for getting images for the paper. 23 | 24 | page.show((800, 800)) 25 | 26 | -------------------------------------------------------------------------------- /margin.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | import colors 4 | import geometry as g 5 | from box import Box 6 | import text 7 | 8 | class NaiveMargin: 9 | """ This is a simple approximation of the margin, used to get rid of marginal noise.""" 10 | 11 | def __init__(self, candidateLines): 12 | 13 | self.candidateLines = candidateLines 14 | 15 | fullLines = [line for line in self.candidateLines if 1280 < line.box.width < 1330] 16 | left = g.Line([ line.box.center.left for line in fullLines ]) 17 | right = g.Line([ line.box.center.right for line in fullLines ]) 18 | 19 | # Make sure that 'start' means the same end for both geometric lines. This fixes a frustrating problem, 20 | # where in some pages most lines wouldn't be picked up. 21 | if left.start[1] < left.end[1]: 22 | left.start, left.end = left.end, left.start 23 | if right.start[1] < right.end[1]: 24 | right.start, right.end = right.end, right.start 25 | 26 | self.points = g.PointArray([left.start, left.end, right.end, right.start]) 27 | 28 | def selectLines(self): 29 | 30 | goodLines = text.LineCollection() 31 | for line in self.candidateLines: 32 | if self.contains(line.box.center.center): 33 | goodLines.append(line) 34 | 35 | return goodLines 36 | 37 | def contains(self, pointToTest): 38 | 39 | retval = cv2.pointPolygonTest(self.points.numpyArray(), pointToTest, False) 40 | 41 | if retval > 0: 42 | return True 43 | else: 44 | return False 45 | 46 | class Margin: 47 | 48 | def __init__(self, lineCollection=None): 49 | 50 | self.left = None # Each of these are a Line instance 51 | self.right = None 52 | self.top = None 53 | self.bottom = None 54 | 55 | self.height = None 56 | self.width = None 57 | 58 | self.angle = None 59 | 60 | if lineCollection: 61 | self.fit(lineCollection) 62 | 63 | def fit(self, lines): 64 | 65 | # Collate all the contours from all the 'border' words (those on the first and last lines, and 66 | # the first and last words from all other lines). 67 | borderPoints = g.PointArray() 68 | for word in self.selectBorderWords(lines): 69 | for wrappedPoint in word.contour: 70 | borderPoints.append(wrappedPoint[0]) 71 | 72 | self.angle = lines.avgAngle 73 | 74 | # interestingly, it's faster to sort the whole list, which is theoretical O(n log n), than it is 75 | # to do a min operation followed by a max operation, both of which are O(n). 76 | sortedHorizontally = sorted(borderPoints, key=lambda point: point.rotate(self.angle).x) 77 | leftPoint = sortedHorizontally[0] 78 | rightPoint = sortedHorizontally[-1] 79 | 80 | sortedVertically = sorted(borderPoints, key=lambda point: point.rotate(self.angle).y) 81 | topPoint = sortedVertically[0] 82 | bottomPoint = sortedVertically[-1] 83 | 84 | self.left = g.Line([leftPoint], self.angle) 85 | self.right = g.Line([rightPoint], self.angle) 86 | self.top = g.Line([topPoint], self.angle+90) 87 | self.bottom = g.Line([bottomPoint], self.angle+90) 88 | 89 | self.height = abs( topPoint.rotate(self.angle).y - bottomPoint.rotate(self.angle).y ) 90 | self.width = abs( leftPoint.rotate(self.angle).x - rightPoint.rotate(self.angle).x ) 91 | 92 | def selectBorderWords(self, lines): 93 | 94 | borderWords = [] 95 | firstLine = 0 96 | lastLine = len(lines)-1 97 | for lineNum, line in enumerate(lines): 98 | 99 | # For the first and last lines, we want to take all the words on the line. 100 | if lineNum in [firstLine, lastLine]: 101 | for word in line.words: 102 | borderWords.append(word) 103 | 104 | # For all other lines, we can just take the first and last words. This makes it faster. 105 | else: 106 | for word in [line.words[0], line.words[-1]]: 107 | borderWords.append(word) 108 | 109 | return borderWords 110 | 111 | def paint(self, image, color=colors.BLUE): 112 | 113 | for line in [self.left, self.right, self.top, self.bottom]: 114 | image = line.paint(image, color) 115 | 116 | #image = self.box.paint(image, color) 117 | 118 | #cv2.fillPoly(image, [self.corners.numpyArray()], colors.BLUE) 119 | 120 | return image 121 | -------------------------------------------------------------------------------- /output/AC_2c_title_clean.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/output/AC_2c_title_clean.jpg -------------------------------------------------------------------------------- /page.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import math 3 | import numpy 4 | import subprocess 5 | import os 6 | 7 | import colors 8 | import geometry as g 9 | from box import Box 10 | import text 11 | from dimension import Dimension 12 | from stopwatch import Stopwatch 13 | import numpy 14 | import matplotlib.pyplot as plt 15 | import ntpath 16 | 17 | import itertools # For mostcommon 18 | import operator # For mostcommon 19 | 20 | stopwatch = Stopwatch() 21 | 22 | class Page: 23 | 24 | def __init__(self, path, showSteps=False, saveDocstrum=False): 25 | 26 | stopwatch.reset(path) 27 | 28 | self.showSteps = showSteps 29 | self.saveDocstrum = saveDocstrum 30 | self.lines = [] 31 | greyscaleImage = cv2.imread(path, cv2.CV_LOAD_IMAGE_GRAYSCALE) 32 | self.orientations = [] 33 | self.dists = [] 34 | 35 | # PREPROCESSING START - NOISE REMOVAL 36 | ## Blurring 37 | #greyscaleImage = cv2.medianBlur(greyscaleImage,3) 38 | ## Closing 39 | kernel = numpy.ones((5,5),numpy.uint8) 40 | 41 | ## Opening 42 | #kernel = numpy.ones((5,5),numpy.uint8) 43 | #greyscaleImage = cv2.morphologyEx(greyscaleImage, cv2.MORPH_CLOSE, kernel) 44 | #greyscaleImage = cv2.morphologyEx(greyscaleImage, cv2.MORPH_CLOSE, kernel) 45 | 46 | #greyscaleImage = cv2.morphologyEx(greyscaleImage, cv2.MORPH_OPEN, kernel) 47 | #greyscaleImage = cv2.morphologyEx(greyscaleImage, cv2.MORPH_CLOSE, kernel) 48 | #self.display(greyscaleImage) 49 | # PREPROCESSING STOP 50 | 51 | colorImage = cv2.imread(path, cv2.CV_LOAD_IMAGE_COLOR) 52 | 53 | if showSteps: self.display(greyscaleImage, title="Original Image") 54 | 55 | ################################# 56 | # VERTICAL LINE REMOVAL - START # 57 | ################################# 58 | ''' 59 | #blurredImage = cv2.GaussianBlur(greyscaleImage,(5,5),0) 60 | #if showSteps: self.display(blurredImage, title="Gaussian-based Blurred Image") 61 | blurredImage = cv2.bilateralFilter(greyscaleImage,9,95,95) 62 | if showSteps: self.display(blurredImage, title="Bilateral-filter-based Blurred Image") 63 | _, binaryImage = cv2.threshold(blurredImage,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) 64 | #binaryImage = cv2.adaptiveThreshold(blurredImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1) 65 | if showSteps: self.display(binaryImage, title="Otsu-based Binarized Image") 66 | binaryImage = cv2.bitwise_not(binaryImage) 67 | if showSteps: self.display(binaryImage, title="Inverted Image") 68 | 69 | # kernel_size = (3,3) 70 | verticalsize = binaryImage.shape[0] / 90; 71 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1,verticalsize)) 72 | 73 | verticalMask = cv2.erode(binaryImage, kernel, (-1, -1)) 74 | if showSteps: self.display(verticalMask, title="MORP. Erosed Image") 75 | verticalMask = cv2.dilate(verticalMask, kernel, (-1, -1)) 76 | if showSteps: self.display(verticalMask, title="MORP. Dilated Image") 77 | verticalMask = cv2.blur(verticalMask, (9,9)) 78 | if showSteps: self.display(verticalMask, title="Smoothened Vertical-line Candidates") 79 | # Recursive 80 | verticalMask = cv2.dilate(verticalMask, kernel, (-1, -1)) 81 | if showSteps: self.display(verticalMask, title="MORP. Dilated Image_#2") 82 | verticalMask = cv2.blur(verticalMask, (9,9)) 83 | if showSteps: self.display(verticalMask, title="Smoothened Vertical-line Candidates_#2") 84 | verticalMask = cv2.dilate(verticalMask, kernel, (-1, -1)) 85 | if showSteps: self.display(verticalMask, title="MORP. Dilated Image_#3") 86 | verticalMask = cv2.blur(verticalMask, (9,9)) 87 | if showSteps: self.display(verticalMask, title="Smoothened Vertical-line Candidates_#3") 88 | verticalMask = cv2.dilate(verticalMask, kernel, (-1, -1)) 89 | if showSteps: self.display(verticalMask, title="MORP. Dilated Image_#4") 90 | verticalMask = cv2.blur(verticalMask, (9,9)) 91 | if showSteps: self.display(verticalMask, title="Smoothened Vertical-line Candidates_#4") 92 | verticalMask = cv2.dilate(verticalMask, kernel, (-1, -1)) 93 | if showSteps: self.display(verticalMask, title="MORP. Dilated Image_#5") 94 | verticalMask = cv2.blur(verticalMask, (9,9)) 95 | if showSteps: self.display(verticalMask, title="Smoothened Vertical-line Candidates_#5") 96 | #verticalMask = cv2.adaptiveThreshold(verticalMask,255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,15,-2) 97 | #_, verticalMask = cv2.threshold(verticalMask,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) 98 | _, verticalMask = cv2.threshold(verticalMask, 1, 255, cv2.THRESH_BINARY) 99 | if showSteps: self.display(verticalMask, title="Thresholded Vertical-line Candidates") 100 | 101 | verticalMask_mask = numpy.ones(binaryImage.shape[:2], dtype="uint8") * 255 102 | verticalMask_contours,verticalMask_hierarchy = cv2.findContours(verticalMask, 1, 2) 103 | for cnt in verticalMask_contours: 104 | x,y,w,h = cv2.boundingRect(cnt) 105 | if h>binaryImage.shape[0]/3: 106 | cv2.drawContours(verticalMask_mask, [cnt], -1, 0, -1) 107 | if showSteps: self.display(cv2.bitwise_not(verticalMask_mask), title="Final Vertical-lines") 108 | 109 | binaryImage = cv2.bitwise_and(binaryImage, verticalMask_mask) 110 | if showSteps: self.display(binaryImage, title="Fully Preprocessed Image") 111 | ''' 112 | ############################### 113 | # VERTICAL LINE REMOVAL - END # 114 | ############################### 115 | 116 | #_,binaryImage = cv2.threshold(greyscaleImage, cv2.THRESH_OTSU, colors.greyscale.WHITE, cv2.THRESH_BINARY) 117 | _, binaryImage = cv2.threshold(greyscaleImage,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) 118 | binaryImage = cv2.bitwise_not(binaryImage) 119 | if showSteps: self.display(binaryImage, title="Otsu-based Binarized Image") 120 | 121 | self.characters = text.CharacterSet(binaryImage) 122 | #self.display(binaryImage) 123 | stopwatch.lap("got characters") 124 | # words = [word, word, ..., word] 125 | # words = [append, count, extend, index, insert, pop, remove, reverse, sort] 126 | # word = [angles, characters, distances, findTuples, paint, registerChildCharacter] 127 | # word = [char, char, ..., char] 128 | # char = [nearestNeighbors, parentWord, x, y] 129 | self.words = self.characters.getWords() 130 | stopwatch.lap("got words & tuples") 131 | 132 | print "Total ", len(self.words), " words are found." 133 | #for idx, word in enumerate(self.words): 134 | # print "[",idx,"] word:" 135 | # for idx_char, character in enumerate(word.characters): 136 | # print "**[", idx_char, "] char info.. ", "(",character.x,",",character.y,")" 137 | 138 | #print "Tuple 1: (",self.words[1].angles[1], ", ", self.words[1].distances[1], ")" 139 | 140 | 141 | self.buildDocstrum(path) 142 | stopwatch.lap("built Docstrum") 143 | #theta = self.words[1].angles 144 | #r = self.words[1].distances 145 | #ax = plt.subplot(111,polar=True) 146 | #ax.scatter(theta,r) 147 | #plt.show() 148 | 149 | textlineImage = self.find_textline(colorImage) 150 | self.display(textlineImage, title="Found textlines") 151 | 152 | self.image = colorImage 153 | 154 | stopwatch.lap("finished analysing page") 155 | stopwatch.endRun() 156 | 157 | #self.drawTextLine(self.words,colorImage) 158 | #self.paint(self.image) 159 | 160 | #self.display_textline(textlineImage) 161 | 162 | #self.display(self.paint_textline(self.image)) 163 | print "Done." 164 | 165 | def most_common(L): 166 | # get an iterable of (item, iterable) pairs 167 | SL = sorted((x, i) for i, x in enumerate(L)) 168 | # print 'SL:', SL 169 | groups = itertools.groupby(SL, key=operator.itemgetter(0)) 170 | # auxiliary function to get "quality" for an item 171 | def _auxfun(g): 172 | item, iterable = g 173 | count = 0 174 | min_index = len(L) 175 | for _, where in iterable: 176 | count += 1 177 | min_index = min(min_index, where) 178 | # print 'item %r, count %r, minind %r' % (item, count, min_index) 179 | return count, -min_index 180 | # pick the highest-count/earliest item 181 | return max(groups, key=_auxfun)[0] 182 | 183 | def nnAngleHist(self, theta, path): 184 | #print "theta from hist: ", theta 185 | num_bins = 180 186 | n, bins, patches = plt.hist(theta, num_bins, facecolor='blue', alpha=0.5) 187 | if self.saveDocstrum: 188 | plt.savefig(os.path.join(os.path.abspath("./docstrums"),"ds_nnAngle_" + ntpath.basename(path))) 189 | plt.show() 190 | 191 | 192 | def nnDistHist(self, dist, path): 193 | num_bins = int(numpy.max(dist)-numpy.min(dist)+1) 194 | #num_bins = 2*int(numpy.max(dist)+1) 195 | #print("num_bins: ",num_bins) 196 | #num_bins = 180 197 | n, bins, patches = plt.hist(dist, num_bins, facecolor='orange', alpha=0.5) 198 | if self.saveDocstrum: 199 | plt.savefig(os.path.join(os.path.abspath("./docstrums"),"ds_nnDist_" + ntpath.basename(path))) 200 | plt.show() 201 | 202 | dist_peaks = [] 203 | n_copy = n.copy() 204 | n_copy[::-1].sort() # sort in reverse way 205 | THRESHOLD_DIST_WIDTH = 15 206 | for i in xrange(num_bins): 207 | _max_idx = numpy.where(n == n_copy[i]) # Find peak 208 | if len(_max_idx[0])>1: # If ties, 209 | _max = _max_idx[0][int(len(_max_idx[0])/2)] # get middle 210 | else: 211 | _max = _max_idx[0][0] 212 | dist_peaks.append(int(_max+numpy.min(dist))) 213 | print ("Distance peaks: %s" %dist_peaks) 214 | ''' 215 | first_group_offset = -1 216 | second_group_offset = -1 217 | _min = _max = dist_peaks[0] 218 | for i in xrange(len(dist_peaks)): 219 | #print("Ele: %d" %dist_peaks[i]) 220 | if first_group_offset>-1 and second_group_offset>-1: 221 | break 222 | if _min <= dist_peaks[i] <= _max: 223 | #print("...within [%d,%d]" %(_min,_max)) 224 | continue 225 | elif abs(dist_peaks[i] -_min) <= THRESHOLD_DIST_WIDTH: 226 | if dist_peaks[i]<_min: 227 | _min = dist_peaks[i] 228 | #print("...new min %d" %_min) 229 | elif _max < dist_peaks[i]: 230 | _max = dist_peaks[i] 231 | #print("...new max %d" %_max) 232 | continue 233 | elif abs(_max - dist_peaks[i]) <= THRESHOLD_DIST_WIDTH: 234 | if _max < dist_peaks[i]: 235 | _max = dist_peaks[i] 236 | #print("...new max %d" %_max) 237 | elif dist_peaks[i]<_min: 238 | _min = dist_peaks[i] 239 | #print("...new min %d" %_min) 240 | continue 241 | else: 242 | if first_group_offset == -1: 243 | first_group_offset = i 244 | #print("...found first group!") 245 | _min = dist_peaks[i] 246 | _max = dist_peaks[i] 247 | else: 248 | second_group_offset = i 249 | print ("first group: %s and avg: %d" %(dist_peaks[:first_group_offset],numpy.mean(dist_peaks[:first_group_offset]))) 250 | print ("second group: %s and avg: %d" %(dist_peaks[first_group_offset:second_group_offset],numpy.mean(dist_peaks[first_group_offset:second_group_offset]))) 251 | 252 | ''' 253 | 254 | def buildDocstrum(self, path): 255 | theta = [] 256 | theta_hist = [] 257 | dist_hist = [] 258 | r = [] 259 | sz = 1 260 | for word in self.words: 261 | for angle in word.angles: 262 | #theta.append(numpy.pi+angle) # The second quadrant 263 | #print "word.angle = <<", angle, ">>" 264 | theta.append(1/2*numpy.pi-angle) # -pi/2 < x < pi/2 (1 and 4 quadrant) 265 | theta.append(3/2*numpy.pi-angle) # pi/2 < x < -pi/2 (2 and 3 quadrant) 266 | theta_hist.append(math.degrees(1/2*numpy.pi-angle)) 267 | for distance in word.distances: 268 | r.append(distance) 269 | r.append(distance) 270 | dist_hist.append(distance) 271 | ax = plt.subplot(111,polar=True) 272 | #print("The peak of text-line orientation: ",self.most_common(theta_hist)) 273 | #print("shape of dist_hist: ",numpy.shape(dist_hist)) 274 | #print("The peak of within-line distance: ",self.most_common(dist_hist)) 275 | self.orientations = theta_hist 276 | self.dists = dist_hist 277 | ax.scatter(theta,r,sz) 278 | if self.saveDocstrum: 279 | plt.savefig(os.path.join(os.path.abspath("./docstrums"),"ds_" + ntpath.basename(path))) 280 | if self.showSteps: 281 | plt.show() 282 | self.nnAngleHist(theta_hist,path) 283 | #self.nnDistHist(dist_hist,path) 284 | 285 | ''' paint ''' 286 | ''' color words ''' 287 | def paint(self, image): 288 | 289 | #print len(self.words) 290 | for word in self.words: 291 | image = word.paint(image, colors.RED) 292 | 293 | return image 294 | 295 | def find_textline(self,image): 296 | image = image.copy() 297 | ratio = 4.0/8.0 298 | #ratio = 4.0/4.0 299 | for word in self.words: 300 | #dir(word) 301 | #word.angles 302 | points = [] 303 | multiplier = 1 304 | for character in word.characters: 305 | #print "(",character.x,", ",character.y,")" 306 | #print "nn: ", character.nearestNeighbors 307 | points.append([character.x, character.y]) 308 | points.sort(key=lambda x: x[0]) 309 | #print("points:",points) 310 | w = max(points,key=lambda x: x[0])[0]-min(points,key=lambda x: x[0])[0] 311 | #print("w:",w) 312 | h = max(points,key=lambda x: x[1])[1]-min(points,key=lambda x: x[1])[1] 313 | #print(h) 314 | dx, dy, x0, y0 = cv2.fitLine(numpy.array(points), cv2.cv.CV_DIST_L2, 0, 0.01, 0.01) 315 | #print("dx:",dx,", dy:",dy,", x0:",x0,", y0:",y0) 316 | #start = (int(x0 - dx*w*ratio), int(y0 - dy*w*ratio)) 317 | start = (int(min(points,key=lambda x: x[0])[0]),int((dy/dx)*(min(points,key=lambda x: x[0])[0]-x0)+y0)) 318 | #end = (int(x0 + dx*w*ratio), int(y0 + dy*w*ratio)) 319 | end = (int(max(points,key=lambda x: x[0])[0]),int((dy/dx)*(max(points,key=lambda x: x[0])[0]-x0)+y0)) 320 | #print(start,end) 321 | self.lines.append(g.Line([start,end])) 322 | cv2.line(image, start, end, (0,255,255),2) 323 | return image 324 | 325 | def save(self, path): 326 | 327 | image = self.image.copy() 328 | image = self.paint(image) 329 | #image = self.paint_textline(image) 330 | cv2.imwrite(path, image) 331 | 332 | def display(self, image, boundingBox=(800,800), title='Image'): 333 | 334 | stopwatch.pause() 335 | 336 | if boundingBox: 337 | maxDimension = Dimension(boundingBox[0], boundingBox[1]) 338 | displayDimension = Dimension(image.shape[1], image.shape[0]) 339 | displayDimension.fitInside(maxDimension) 340 | image = cv2.resize(image, tuple(displayDimension)) 341 | 342 | cv2.namedWindow(title, cv2.CV_WINDOW_AUTOSIZE) 343 | cv2.imshow(title, image) 344 | cv2.waitKey() 345 | 346 | stopwatch.unpause() 347 | 348 | def show(self, boundingBox=None, title="Image"): #textImage 349 | 350 | #image = numpy.zeros(self.image.shape, numpy.uint8) 351 | image = self.image.copy() 352 | 353 | image = self.paint(image) 354 | 355 | self.display(image, boundingBox, title) 356 | 357 | def extractWords(self, sourceImage): 358 | 359 | image = sourceImage.copy() 360 | image = threshold(image) 361 | 362 | tempImageFile = os.path.join('src', 'tempImage.tiff') 363 | tempTextFile = os.path.join('src', 'tempText') 364 | 365 | mask = numpy.zeros(image.shape, numpy.uint8) 366 | singleWord = numpy.zeros(image.shape, numpy.uint8) 367 | -------------------------------------------------------------------------------- /page.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/page.pyc -------------------------------------------------------------------------------- /stopwatch.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | class Stopwatch: 4 | 5 | def __init__(self, message=None): 6 | 7 | self.initialised = False 8 | 9 | self.startTime = time.time() 10 | self.lastLapTime = time.time() 11 | 12 | self.pauseStartTime = None 13 | self.pauseDuration = 0 14 | self.totalPauseDurationInRun = 0 15 | self.runTimes = [] 16 | 17 | if message is not None: 18 | self.lap(message) 19 | 20 | def __getTotalRunTime(self): 21 | currentTime = time.time() 22 | return currentTime - self.startTime - self.totalPauseDurationInRun 23 | 24 | def lap(self, message): 25 | 26 | currentTime = time.time() 27 | lapTime = currentTime - self.lastLapTime - self.pauseDuration 28 | 29 | print "%.2f\t%.2f\t%s" %(self.__getTotalRunTime(), lapTime, message) 30 | self.lastLapTime = currentTime 31 | self.pauseStartTime = None 32 | self.pauseDuration = 0 33 | 34 | def pause(self): 35 | 36 | self.pauseStartTime = time.time() 37 | 38 | def unpause(self): 39 | 40 | if self.pauseStartTime is not None: 41 | currentTime = time.time() 42 | self.pauseDuration += currentTime - self.pauseStartTime 43 | self.totalPauseDurationInRun += currentTime - self.pauseStartTime 44 | 45 | self.pauseStartTime = None 46 | 47 | def endRun(self): 48 | 49 | self.runTimes.append(self.__getTotalRunTime()) 50 | average = sum(self.runTimes) / (len(self.runTimes)) 51 | print "average time: %.2f" %average 52 | print 53 | 54 | self.pauseStartTime = None 55 | self.pauseDuration = 0 56 | self.totalPauseDurationInRun = 0 57 | 58 | def reset(self, message="reset"): 59 | 60 | if not self.initialised: 61 | self.initialised = True 62 | 63 | self.pauseStartTime = None 64 | self.pauseDuration = 0 65 | self.totalPauseDurationInRun = 0 66 | 67 | self.startTime = time.time() 68 | self.lastLapTime = time.time() 69 | self.lap(message) 70 | -------------------------------------------------------------------------------- /stopwatch.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/stopwatch.pyc -------------------------------------------------------------------------------- /text.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy 3 | import math 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | import colors 8 | import geometry as g 9 | from box import Box 10 | from dimension import Dimension 11 | from scipy import spatial 12 | 13 | import itertools 14 | import operator 15 | 16 | def threshold(image, threshold=colors.greyscale.MID_GREY, method=cv2.THRESH_BINARY_INV): 17 | retval, dst = cv2.threshold(image, threshold, colors.greyscale.WHITE, method) 18 | return dst 19 | 20 | class Character: 21 | 22 | def __init__(self, x, y): 23 | 24 | self.coordinate = [x, y] 25 | self.x = x 26 | self.y = y 27 | 28 | self.nearestNeighbours = [] 29 | self.parentWord = None 30 | 31 | def assignParentWord(self, word): 32 | 33 | self.parentWord = word 34 | self.parentWord.registerChildCharacter(self) 35 | 36 | for neighbour in self.nearestNeighbours: 37 | if neighbour.parentWord == None: 38 | neighbour.assignParentWord(self.parentWord) 39 | 40 | def toArray(self): 41 | return self.coordinate 42 | 43 | def __len__(self): 44 | return len(self.coordinate) 45 | 46 | def __getitem__(self, key): 47 | return self.coordinate.__getitem__(key) 48 | 49 | def __setitem__(self, key, value): 50 | self.coordinate.__setitem__(key, value) 51 | 52 | def __delitem__(self, key): 53 | self.coordinate.__delitem__(key) 54 | 55 | def __iter__(self): 56 | return self.coordinate.__iter__() 57 | 58 | def __contains__(self, item): 59 | return self.coordinate.__contains__(item) 60 | 61 | ''' paint ''' 62 | ''' paint a dot on the centroid of a character ''' 63 | def paint(self, image, color=colors.YELLOW): 64 | 65 | pointObj = g.Point(self.coordinate) 66 | image = pointObj.paint(image, color) 67 | return image 68 | 69 | class CharacterSet: 70 | 71 | def __init__(self, sourceImage): 72 | 73 | self.characters = self.getCharacters(sourceImage) 74 | self.NNTree = spatial.KDTree([char.toArray() for char in self.characters]) 75 | #self.angles = [] 76 | #self.distances = [] 77 | 78 | ''' getCharacters ''' 79 | ''' This function (1) binarize a source image (2) get contours (characters) (3) get its centroid ''' 80 | def getCharacters(self, sourceImage): 81 | 82 | characters = [] 83 | 84 | image = sourceImage.copy() 85 | # image = threshold(image, cv2.THRESH_OTSU, method=cv2.THRESH_BINARY) 86 | 87 | if False: 88 | imS = cv2.resize(image, (800, 800)) 89 | cv2.imshow('binarized', imS) 90 | cv2.waitKey() 91 | 92 | for contour in self.getContours(image): 93 | try: 94 | box = Box(contour) 95 | 96 | moments = cv2.moments(contour) 97 | centroidX = int( moments['m10'] / moments['m00'] ) 98 | centroidY = int( moments['m01'] / moments['m00'] ) 99 | character = Character(centroidX, centroidY) 100 | 101 | except ZeroDivisionError: 102 | continue 103 | 104 | #if box.area > 50: 105 | if box.area > 1: 106 | #if True: 107 | characters.append(character) 108 | 109 | print "Total ", len(characters), " characters are found." 110 | return characters 111 | 112 | ''' getContours ''' 113 | ''' Input: Binary Image ''' 114 | ''' Output: BLOBs ''' 115 | def getContours(self, sourceImage, threshold=-1): 116 | image = sourceImage.copy() 117 | blobs = [] 118 | topLevelContours = [] 119 | 120 | # cv2.findContours : It stores the (x,y) coordinates of the boundary of a shape. Here, contours are the boundaries of a shape with same intensity. 121 | # CHAIN_APPROX_NONE : All the boundary points are stored. 122 | # CHAIN_APPROX_SIMPLE : It removes all redundant points and compresses the contour, thereby saving memory. 123 | # hierarchy = [Next, Previous, First_Child, Parent] 124 | # REFERENCE : https://docs.opencv.org/3.1.0/d4/d73/tutorial_py_contours_begin.html 125 | contours, hierarchy = cv2.findContours(image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 126 | 127 | for i in range(len(hierarchy[0])): 128 | 129 | if len(contours[i]) > 2: # 1- and 2-point contours have a divide-by-zero error in calculating the center of mass. 130 | 131 | # bind each contour with its corresponding hierarchy context description. 132 | obj = {'contour': contours[i], 'context': hierarchy[0][i]} 133 | blobs.append(obj) 134 | 135 | for blob in blobs: 136 | parent = blob['context'][3] 137 | if parent <= threshold: # no parent, therefore a root 138 | topLevelContours.append(blob['contour']) 139 | 140 | return topLevelContours 141 | 142 | ''' transitiveClosure ''' 143 | ''' Obtain nearest-neighbor groups on the same text lines with the use of a transitive closure on within-line nearest neighbor pairings ''' 144 | ''' 145 | def transitiveClosure(self): 146 | 147 | self.characters = sorted(self.characters, key=lambda char: (char.y, char.x)) 148 | # self.characters = sorted(self.characters, key=lambda char: char.x) 149 | for idx, character in enumerate(self.characters): 150 | print "[",idx,"] character's nn info..", "(",character.x,",",character.y,")" 151 | character.nearestNeighbours = sorted(character.nearestNeighbours, key=lambda nn_char: nn_char.x) 152 | # for each character's nn [checking purpose.. it can be removed later] 153 | for idx_nn, character_nn in enumerate(character.nearestNeighbours): 154 | print "**[", idx_nn, "] nn info.. ", "(",character_nn.x,",",character_nn.y,")" 155 | 156 | within_line_nn_groups = [] 157 | within_line_nn_group = [] 158 | 159 | start_flag = True 160 | end_flag = False 161 | 162 | self.characters = sorted(self.characters, key=lambda char: (char.y, char.x)) 163 | # self.characters = sorted(self.characters, key=lambda char: char.x) 164 | # for each character 165 | for idx, character in enumerate(self.characters): 166 | if start_flag: 167 | within_line_nn_group.append(character) 168 | character.nearestNeighbours = sorted(character.nearestNeighbours, key=lambda nn_char: nn_char.x) 169 | if len(character.nearestNeighbours)>0: 170 | # start char of group 171 | if start_flag: 172 | within_line_nn_group.append(character.nearestNeighbours[0]) 173 | start_flag = False 174 | # end char of group 175 | elif ((idx+1)==len(self.characters) or len(character.nearestNeighbours)<2 or character.nearestNeighbours[1].x != self.characters[idx+1].x): 176 | end_flag = True 177 | within_line_nn_groups.append(within_line_nn_group) 178 | within_line_nn_group = [] 179 | start_flag = True 180 | # mid char of group 181 | else: 182 | within_line_nn_group.append(character.nearestNeighbours[1]) 183 | print "Found group: ", within_line_nn_groups 184 | ''' 185 | 186 | ''' most_common ''' 187 | ''' Find the most common element in a list ''' 188 | ''' Input: A list ''' 189 | ''' Output: The most common element ''' 190 | def most_common(self,L): 191 | # get an iterable of (item, iterable) pairs 192 | SL = sorted((x, i) for i, x in enumerate(L)) 193 | # print 'SL:', SL 194 | groups = itertools.groupby(SL, key=operator.itemgetter(0)) 195 | # auxiliary function to get "quality" for an item 196 | def _auxfun(g): 197 | item, iterable = g 198 | count = 0 199 | min_index = len(L) 200 | for _, where in iterable: 201 | count += 1 202 | min_index = min(min_index, where) 203 | # print 'item %r, count %r, minind %r' % (item, count, min_index) 204 | return count, -min_index 205 | # pick the highest-count/earliest item 206 | return max(groups, key=_auxfun)[0] 207 | 208 | ''' getWords ''' 209 | ''' Find nearest neighbors ''' 210 | ''' Input: Characters ''' 211 | ''' Output: k-nearest neighbors ''' 212 | def getWords(self): 213 | 214 | words = [] 215 | k = 5 216 | mode = 'horizontal' # mode = ['default','horizontal','vertical'] 217 | #EPS = 1e-2 218 | 219 | # find the average distance between nearest neighbours 220 | NNDistances = [] 221 | NNHorizontalDistances = [] 222 | NNVerticalDistances = [] 223 | remove_counter = 0 224 | for character in self.characters: 225 | remove_counter = remove_counter+1 226 | result = self.NNTree.query(character.toArray(), k=k) # we only want nearest neighbour, but the first result will be the point matching itself. 227 | nearestNeighbourDistance = result[0] 228 | for i in xrange(1,k): 229 | #print("[%s] nearestNeighbourDistance: %s"%(remove_counter,result[0])) 230 | NNDistances.append(nearestNeighbourDistance[i]) 231 | avgNNDistance = sum(NNDistances)/len(NNDistances) 232 | 233 | maxDistance = avgNNDistance*3 234 | #maxDistance = avgNNDistance*20000 235 | for character in self.characters: 236 | #print ("Finding a a nn of ",character.x,character.y) 237 | queryResult = self.NNTree.query(character.coordinate, k=k) 238 | distances = queryResult[0] 239 | neighbours = queryResult[1] 240 | for i in range(1,k): 241 | if mode == 'horizontal': 242 | ################################### 243 | # Transitive Closure - Horizontal # 244 | ################################### 245 | #if(abs(self.characters[neighbours[i]].y-character.y) < avgNNDistance/2): 246 | neighbour = self.characters[neighbours[i]] 247 | line = g.Line([character.coordinate, neighbour.coordinate]) 248 | angle = line.calculateAngle(line.start, line.end) 249 | if(abs(angle.canonical) <= 0.261799 and distances[i] < maxDistance): # 15(degree) = 0.261799(rad), 30(degree) = 0.523599(rad) 250 | character.nearestNeighbours.append(neighbour) 251 | NNHorizontalDistances.append(distances[i]) 252 | #print (i,"th nn!", "dist:", distances[i], " neighbor:(",neighbour.x,",",neighbour.y,")") 253 | # Below is just for calculating the most common vertical distance purpose... 254 | if(1.309 <= abs(angle.canonical) <= 1.8326 and distances[i] < maxDistance): # 75(degree)=1.309(rad), 105(degree)=1.8326(rad) 60(degree)=1.0472(rad), 90(degree)=1.5708(rad), 120(degree)=2.0944(rad) 255 | NNVerticalDistances.append(distances[i]) 256 | elif mode == 'vertical': # This code might be deleted in future..? 257 | ################################### 258 | # Transitive Closure - Vertical # 259 | ################################### 260 | #if(abs(self.characters[neighbours[i]].x-character.x) < avgNNDistance/2): 261 | neighbour = self.characters[neighbours[i]] 262 | line = g.Line([character.coordinate, neighbour.coordinate]) 263 | angle = line.calculateAngle(line.start, line.end) 264 | if(1.309 <= abs(angle.canonical) <= 1.8326): # 75(degree)=1.309(rad), 105(degree)=1.8326(rad) 60(degree)=1.0472(rad), 90(degree)=1.5708(rad), 120(degree)=2.0944(rad) 265 | character.nearestNeighbours.append(neighbour) 266 | NNVerticalDistances.append(distances[i]) 267 | # print (i,"th nn!", "dist:", distances[i], " neighbor:(",neighbour.x,",",neighbour.y,") angle:",angle.canonical) 268 | else: 269 | ################################### 270 | # Transitive Closure - Default # 271 | ################################### 272 | # Find nn in every direction within maxDistance 273 | if distances[i] < maxDistance: 274 | neighbour = self.characters[neighbours[i]] 275 | character.nearestNeighbours.append(neighbour) 276 | 277 | num_bins = int((numpy.max(NNDistances)-numpy.min(NNDistances)+1)/10) 278 | n, bins, patches = plt.hist(NNDistances, num_bins, facecolor='orange', alpha=0.5) 279 | plt.show() 280 | print("Total %d all NNs" %len(NNDistances)) 281 | print("average NN distance: ",avgNNDistance) 282 | 283 | num_bins = int((numpy.max(NNHorizontalDistances)-numpy.min(NNHorizontalDistances)+1)/10) 284 | n, bins, patches = plt.hist(NNHorizontalDistances, num_bins, facecolor='orange', alpha=0.5) 285 | plt.show() 286 | print("Total %d hor NNs" %len(NNHorizontalDistances)) 287 | dist_peaks = [] 288 | n_copy = n.copy() 289 | n_copy[::-1].sort() # sort in reverse way 290 | for i in xrange(num_bins): 291 | _max_idx = numpy.where(n == n_copy[i]) # Find peak 292 | for j in xrange(len(_max_idx[0])): 293 | dist_peaks.append(int(bins[_max_idx[0][j]])) 294 | print ("Distance peaks: %s" %dist_peaks) 295 | avgHorizontalNNDistance = sum(NNHorizontalDistances)/(len(NNHorizontalDistances)) 296 | print("average NN horizontal distance: %.2f\n" %avgHorizontalNNDistance) 297 | 298 | 299 | num_bins = int((numpy.max(NNVerticalDistances)-numpy.min(NNVerticalDistances)+1)/10) 300 | n, bins, patches = plt.hist(NNVerticalDistances, num_bins, facecolor='orange', alpha=0.5) 301 | plt.show() 302 | print("Total %d ver NNs" %len(NNVerticalDistances)) 303 | dist_peaks = [] 304 | n_copy = n.copy() 305 | n_copy[::-1].sort() # sort in reverse way 306 | for i in xrange(num_bins): 307 | _max_idx = numpy.where(n == n_copy[i]) # Find peak 308 | for j in xrange(len(_max_idx[0])): 309 | dist_peaks.append(int(bins[_max_idx[0][j]])) 310 | print ("Distance peaks: %s" %dist_peaks) 311 | avgVerticalNNDistance = sum(NNVerticalDistances)/(len(NNVerticalDistances)) 312 | print("average NN vertical distance: %.2f\n" %avgVerticalNNDistance) 313 | 314 | self.characters = sorted(self.characters, key=lambda character: (character.x)) 315 | for character in self.characters: 316 | #print ("Deciding wordness of (",character.x,character.y,")") 317 | if character.parentWord == None: 318 | #print ("(",character.x,character.y,") is a parent!") 319 | if len(character.nearestNeighbours) >= 0: 320 | #print ("(",character.x,character.y,") is a word!!!!") 321 | word = Word([character]) 322 | word.findTuples() 323 | words.append(word) 324 | ''' 325 | print "Total ", len(words), " words are found." 326 | for idx, word in enumerate(words): 327 | print "[",idx,"] word:" 328 | for idx_char, character in enumerate(word.characters): 329 | print "**[", idx_char, "] char info.. ", "(",character.x,",",character.y,")" 330 | ''' 331 | return words 332 | 333 | def paint(self, image, color=colors.BLUE): 334 | 335 | for character in self.characters: 336 | image = character.paint(image, color) # draw a dot at the word's center of mass. 337 | 338 | return image 339 | 340 | class Word: 341 | 342 | def __init__(self, characters=[]): 343 | 344 | self.characters = set(characters) 345 | self.angles = [] 346 | self.distances = [] 347 | 348 | for character in characters: 349 | character.assignParentWord(self) 350 | 351 | def findTuples(self): 352 | # Get tuple info ... 2/21/2018 353 | for character in self.characters: 354 | for neighbour in character.nearestNeighbours: 355 | line = g.Line([character, neighbour]) 356 | angle = line.calculateAngle(line.start, line.end) 357 | delta = line.start-line.end 358 | distance = math.sqrt(delta.x**2 + delta.y**2) 359 | #print("START: ",line.start, " END: ", line.end, " DIST: ", distance," ANGLE_degree: ", angle.degrees(), "ANGLE_canonical: ", angle.canonical) 360 | self.angles.append(angle.canonical) 361 | #self.angles.append(angle.degrees()) 362 | self.distances.append(distance) 363 | 364 | def registerChildCharacter(self, character): 365 | 366 | self.characters.add(character) 367 | 368 | ''' paint ''' 369 | ''' Draw a line between characters ''' 370 | def paint(self, image, color=colors.YELLOW): 371 | 372 | for character in self.characters: 373 | image = character.paint(image, color) 374 | 375 | for neighbour in character.nearestNeighbours: 376 | line = g.Line([character, neighbour]) 377 | image = line.paint(image, color) 378 | 379 | return image 380 | 381 | #class Line: 382 | # def __init__(self, words=[]): 383 | # self.words = set(words) 384 | # def update(): 385 | 386 | 387 | 388 | -------------------------------------------------------------------------------- /text.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chulwoopack/docstrum/6559d78f6ec0e14f3372e1e91629b81a96465e42/text.pyc --------------------------------------------------------------------------------