├── .gitignore
├── .gitkeep
├── .travis.yml
├── Dockerfile
├── LICENSE
├── README.md
├── ocrd-tool.json
├── qurator
    └── sbb_textline_detector
    │   ├── __init__.py
    │   ├── main.py
    │   ├── ocrd-tool.json
    │   └── ocrd_cli.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.egg-info
3 | /build
4 | 


--------------------------------------------------------------------------------
/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qurator-spk/sbb_textline_detection/35e0ae0ef811ec160d07a38b0552f90260b3dec5/.gitkeep


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Travis CI configuration for sbb_textline_detector
 2 | 
 3 | dist: xenial  # required for Python >= 3.7
 4 | language: python
 5 | python:
 6 |   # sbb_textline_detector requires Python 3.6
 7 |   - "3.6"
 8 |   # broken on Python 3.7 (and never supposed to work)
 9 |   # tensorflow-gpu<2.0 is not available for Python 3.8
10 | 
11 | install:
12 |   - pip install -U pip
13 |   - pip install .
14 | 
15 | script:
16 |   - ocrd-sbb-textline-detector --help
17 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3
 2 | 
 3 | ADD requirements.txt /
 4 | RUN pip install --proxy=http-proxy.sbb.spk-berlin.de:3128 -r requirements.txt
 5 | 
 6 | COPY . /usr/src/sbb_textline_detector
 7 | RUN pip install /usr/src/sbb_textline_detector
 8 | 
 9 | ENTRYPOINT ["sbb_textline_detector"]
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### This software is no longer actively maintained. It has been superseded by [eynollah](https://github.com/qurator-spk/eynollah), which offers additional functionality and overall improved performance and compatibility.
 2 | ---
 3 | 
 4 | [![Build Status](https://travis-ci.org/qurator-spk/sbb_textline_detection.svg?branch=master)](https://travis-ci.org/qurator-spk/sbb_textline_detection)
 5 | 
 6 | # Textline Detection
 7 | > Detect textlines in document images
 8 | 
 9 | ## Introduction
10 | This tool performs border, region and textline detection from document image data and returns the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML).
11 | The goal of this project is to extract textlines of a document in order to feed them to an OCR model. This is achieved by four successive stages as follows:
12 | * [Border detection](https://github.com/qurator-spk/sbb_textline_detection#border-detection)
13 | * [Layout detection](https://github.com/qurator-spk/sbb_textline_detection#layout-detection)
14 | * [Textline detection](https://github.com/qurator-spk/sbb_textline_detection#textline-detection)
15 | * [Heuristic methods](https://github.com/qurator-spk/sbb_textline_detection#heuristic-methods)
16 | 
17 | The first three stages are based on [pixelwise segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation).
18 | 
19 | ## Border detection
20 | For the purpose of text recognition (OCR) and in order to avoid noise being introduced from texts outside the printspace, one first needs to detect the border of the printed frame. This is done by a binary pixelwise-segmentation model trained on a dataset of 2,000 documents where about 1,200 of them come from the [dhSegment](https://github.com/dhlab-epfl/dhSegment/) project (you can download the dataset from [here](https://github.com/dhlab-epfl/dhSegment/releases/download/v0.2/pages.zip)) and the remainder having been annotated in SBB. For border detection, the model needs to be fed with the whole image at once rather than separated in patches.
21 | 
22 | ## Layout detection
23 | As a next step, text regions need to be identified by means of layout detection. Again a pixelwise segmentation model was trained on 131 labeled images from the SBB digital collections, including some data augmentation. Since the target of this tool are historical documents, we consider as main region types text regions, separators, images, tables and background - each with their own subclasses, e.g. in the case of text regions, subclasses like header/heading, drop capital, main body text etc. While it would be desirable to detect and classify each of these classes in a granular way, there are also limitations due to having a suitably large and balanced training set. Accordingly, the current version of this tool is focussed on the main region types background, text region, image and separator. 
24 | 
25 | ## Textline detection
26 | In a subsequent step, binary pixelwise segmentation is used again to classify pixels in a document that constitute textlines. For textline segmentation, a model was initially trained on documents with only one column/block of text and some augmentation with regards to scaling. By fine-tuning the parameters also for multi-column documents, additional training data was produced that resulted in a much more robust textline detection model.
27 | 
28 | ## Heuristic methods
29 | Some heuristic methods are also employed to further improve the model predictions: 
30 | * After border detection, the largest contour is determined by a bounding box and the image cropped to these coordinates. 
31 | * For text region detection, the image is scaled up to make it easier for the model to detect background space between text regions.
32 | * A minimum area is defined for text regions in relation to the overall image dimensions, so that very small regions that are actually noise can be filtered out. 
33 | * Deskewing is applied on the text region level (due to regions having different degrees of skew) in order to improve the textline segmentation result. 
34 | * After deskewing, a calculation of the pixel distribution on the X-axis allows the separation of textlines (foreground) and background pixels.
35 | * Finally, using the derived coordinates, bounding boxes are determined for each textline.
36 | 
37 | ## Installation
38 | `pip install .`
39 | 
40 | ### Models
41 | In order to run this tool you also need trained models. You can download our pretrained models from here:   
42 | https://qurator-data.de/sbb_textline_detector/
43 | 
44 | ## Usage
45 | 
46 | The basic command-line interface can be called like this:
47 | 
48 |     sbb_textline_detector -i <image file name> -o <directory to write output xml> -m <directory of models>
49 | 
50 | The tool does accept raw (RGB/grayscale) images as input, but results will be much improved when a properly binarized image is used instead. We also provide a [tool](https://github.com/qurator-spk/sbb_binarization) to perform this binarization step.
51 | 
52 | ### Usage with OCR-D
53 | 
54 | In addition, there is a CLI for [OCR-D](https://ocr-d.de/en/spec/cli):
55 | 
56 |     ocrd-sbb-textline-detector -I OCR-D-IMG -O OCR-D-SEG-LINE-SBB -P model /path/to/the/models/textline_detection
57 | 
58 | Segmentation works on raw (RGB/grayscale) images, but honours `AlternativeImage`s from earlier preprocessing steps, so it's OK to perform (say) deskewing first, followed by textline detection. Results from previous cropping or binarization steps are allowed and retained, but will be ignored. (So these are only useful if themselves needed for deskewing or dewarping prior to segmentation.) 
59 | 
60 | This processor will replace any previously existing `Border`, `ReadingOrder` and `TextRegion` instances (but keep other region types unchanged).
61 | 


--------------------------------------------------------------------------------
/ocrd-tool.json:
--------------------------------------------------------------------------------
1 | qurator/sbb_textline_detector/ocrd-tool.json


--------------------------------------------------------------------------------
/qurator/sbb_textline_detector/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import *
2 | from .ocrd_cli import *
3 | 


--------------------------------------------------------------------------------
/qurator/sbb_textline_detector/main.py:
--------------------------------------------------------------------------------
   1 | #! /usr/bin/env python3
   2 | 
   3 | __version__ = '1.0'
   4 | 
   5 | import os
   6 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
   7 | import sys
   8 | import cv2
   9 | import numpy as np
  10 | import matplotlib.pyplot as plt
  11 | import seaborn as sns
  12 | from sys import getsizeof
  13 | import random
  14 | from tqdm import tqdm
  15 | from keras.models import model_from_json
  16 | from keras.models import load_model
  17 | import math
  18 | from shapely import geometry
  19 | from sklearn.cluster import KMeans
  20 | import gc
  21 | import tensorflow as tf
  22 | tf.get_logger().setLevel('ERROR')
  23 | from keras import backend as K
  24 | from scipy.signal import find_peaks
  25 | from scipy.ndimage import gaussian_filter1d
  26 | import xml.etree.ElementTree as ET
  27 | import warnings
  28 | import click
  29 | import time
  30 | from multiprocessing import Process, Queue, cpu_count
  31 | import datetime
  32 | 
  33 | 
  34 | warnings.filterwarnings('ignore')
  35 | 
  36 | ##with warnings.catch_warnings():
  37 |     ##warnings.simplefilter("ignore",category=RuntimeWarning)
  38 | 
  39 | __doc__ = \
  40 |     """
  41 |     tool to extract text lines from document images
  42 |     """
  43 | 
  44 | 
  45 | class textline_detector:
  46 |     def __init__(self, image_dir, dir_out, f_name, dir_models):
  47 |         self.image_dir = image_dir  # XXX This does not seem to be a directory as the name suggests, but a file
  48 |         self.dir_out = dir_out
  49 |         self.f_name = f_name
  50 |         if self.f_name is None:
  51 |             try:
  52 |                 self.f_name = image_dir.split('/')[len(image_dir.split('/')) - 1]
  53 |                 self.f_name = self.f_name.split('.')[0]
  54 |             except:
  55 |                 self.f_name = self.f_name.split('.')[0]
  56 |         self.dir_models = dir_models
  57 |         self.kernel = np.ones((5, 5), np.uint8)
  58 |         self.model_page_dir = dir_models + '/model_page_mixed_best.h5'
  59 |         self.model_region_dir = dir_models + '/model_strukturerkennung.h5'
  60 |         self.model_textline_dir = dir_models + '/model_textline_new.h5'
  61 | 
  62 |     def find_polygons_size_filter(self, contours, median_area, scaler_up=1.2, scaler_down=0.8):
  63 |         found_polygons_early = list()
  64 | 
  65 |         for c in contours:
  66 |             if len(c) < 3:  # A polygon cannot have less than 3 points
  67 |                 continue
  68 | 
  69 |             polygon = geometry.Polygon([point[0] for point in c])
  70 |             area = polygon.area
  71 |             # Check that polygon has area greater than minimal area
  72 |             if area >= median_area * scaler_down and area <= median_area * scaler_up:
  73 |                 found_polygons_early.append(
  74 |                     np.array([point for point in polygon.exterior.coords], dtype=np.uint))
  75 |         return found_polygons_early
  76 | 
  77 |     def filter_contours_area_of_image(self, image, contours, hierarchy, max_area, min_area):
  78 |         found_polygons_early = list()
  79 | 
  80 |         jv = 0
  81 |         for c in contours:
  82 |             if len(c) < 3:  # A polygon cannot have less than 3 points
  83 |                 continue
  84 | 
  85 |             polygon = geometry.Polygon([point[0] for point in c])
  86 |             area = polygon.area
  87 |             if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(
  88 |                     image.shape[:2]) and hierarchy[0][jv][3] == -1 :  # and hierarchy[0][jv][3]==-1 :
  89 |                 found_polygons_early.append(
  90 |                     np.array([ [point] for point in polygon.exterior.coords], dtype=np.uint))
  91 |             jv += 1
  92 |         return found_polygons_early
  93 | 
  94 |     def filter_contours_area_of_image_interiors(self, image, contours, hierarchy, max_area, min_area):
  95 |         found_polygons_early = list()
  96 | 
  97 |         jv = 0
  98 |         for c in contours:
  99 |             if len(c) < 3:  # A polygon cannot have less than 3 points
 100 |                 continue
 101 | 
 102 |             polygon = geometry.Polygon([point[0] for point in c])
 103 |             area = polygon.area
 104 |             if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and \
 105 |                     hierarchy[0][jv][3] != -1:
 106 |                 # print(c[0][0][1])
 107 |                 found_polygons_early.append(
 108 |                     np.array([point for point in polygon.exterior.coords], dtype=np.uint))
 109 |             jv += 1
 110 |         return found_polygons_early
 111 | 
 112 |     def resize_image(self, img_in, input_height, input_width):
 113 |         return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
 114 | 
 115 |     def resize_ann(self, seg_in, input_height, input_width):
 116 |         return cv2.resize(seg_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
 117 | 
 118 |     def get_one_hot(self, seg, input_height, input_width, n_classes):
 119 |         seg = seg[:, :, 0]
 120 |         seg_f = np.zeros((input_height, input_width, n_classes))
 121 |         for j in range(n_classes):
 122 |             seg_f[:, :, j] = (seg == j).astype(int)
 123 |         return seg_f
 124 | 
 125 | 
 126 |     def color_images(self, seg, n_classes):
 127 |         ann_u = range(n_classes)
 128 |         if len(np.shape(seg)) == 3:
 129 |             seg = seg[:, :, 0]
 130 | 
 131 |         seg_img = np.zeros((np.shape(seg)[0], np.shape(seg)[1], 3)).astype(np.uint8)
 132 |         colors = sns.color_palette("hls", n_classes)
 133 | 
 134 |         for c in ann_u:
 135 |             c = int(c)
 136 |             segl = (seg == c)
 137 |             seg_img[:, :, 0] = segl * c
 138 |             seg_img[:, :, 1] = segl * c
 139 |             seg_img[:, :, 2] = segl * c
 140 |         return seg_img
 141 | 
 142 |     def color_images_diva(self, seg, n_classes):
 143 |         ann_u = range(n_classes)
 144 |         if len(np.shape(seg)) == 3:
 145 |             seg = seg[:, :, 0]
 146 | 
 147 |         seg_img = np.zeros((np.shape(seg)[0], np.shape(seg)[1], 3)).astype(float)
 148 |         # colors=sns.color_palette("hls", n_classes)
 149 |         colors = [[1, 0, 0], [8, 0, 0], [2, 0, 0], [4, 0, 0]]
 150 | 
 151 |         for c in ann_u:
 152 |             c = int(c)
 153 |             segl = (seg == c)
 154 |             seg_img[:, :, 0][seg == c] = colors[c][0]  # segl*(colors[c][0])
 155 |             seg_img[:, :, 1][seg == c] = colors[c][1]  # seg_img[:,:,1]=segl*(colors[c][1])
 156 |             seg_img[:, :, 2][seg == c] = colors[c][2]  # seg_img[:,:,2]=segl*(colors[c][2])
 157 |         return seg_img
 158 | 
 159 |     def rotate_image(self, img_patch, slope):
 160 |         (h, w) = img_patch.shape[:2]
 161 |         center = (w // 2, h // 2)
 162 |         M = cv2.getRotationMatrix2D(center, slope, 1.0)
 163 |         return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
 164 | 
 165 |     def cleaning_probs(self, probs: np.ndarray, sigma: float) -> np.ndarray:
 166 |         # Smooth
 167 |         if sigma > 0.:
 168 |             return cv2.GaussianBlur(probs, (int(3 * sigma) * 2 + 1, int(3 * sigma) * 2 + 1), sigma)
 169 |         elif sigma == 0.:
 170 |             return cv2.fastNlMeansDenoising((probs * 255).astype(np.uint8), h=20) / 255
 171 |         else:  # Negative sigma, do not do anything
 172 |             return probs
 173 | 
 174 |     def crop_image_inside_box(self, box, img_org_copy):
 175 |         image_box = img_org_copy[box[1]:box[1] + box[3], box[0]:box[0] + box[2]]
 176 |         return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]
 177 | 
 178 |     def otsu_copy(self, img):
 179 |         img_r = np.zeros(img.shape)
 180 |         img1 = img[:, :, 0]
 181 |         img2 = img[:, :, 1]
 182 |         img3 = img[:, :, 2]
 183 |         # print(img.min())
 184 |         # print(img[:,:,0].min())
 185 |         # blur = cv2.GaussianBlur(img,(5,5))
 186 |         # ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
 187 |         retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 188 |         retval2, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 189 |         retval3, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 190 | 
 191 |         img_r[:, :, 0] = threshold1
 192 |         img_r[:, :, 1] = threshold1
 193 |         img_r[:, :, 2] = threshold1
 194 |         return img_r
 195 | 
 196 |     def get_image_and_scales(self):
 197 |         self.image = cv2.imread(self.image_dir)
 198 |         self.height_org = self.image.shape[0]
 199 |         self.width_org = self.image.shape[1]
 200 | 
 201 |         if self.image.shape[0] < 2500:
 202 |             self.img_hight_int = 2800
 203 |             self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
 204 | 
 205 |         else:
 206 |             self.img_hight_int = int(self.image.shape[0]*1.2)# 6500
 207 |             self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
 208 |             #self.img_hight_int = self.image.shape[0]
 209 |             #self.img_width_int = self.image.shape[1]
 210 | 
 211 |         self.scale_y = self.img_hight_int / float(self.image.shape[0])
 212 |         self.scale_x = self.img_width_int / float(self.image.shape[1])
 213 | 
 214 |         self.image = self.resize_image(self.image, self.img_hight_int, self.img_width_int)
 215 | 
 216 |     def start_new_session_and_model(self, model_dir):
 217 |         config = tf.ConfigProto()
 218 |         config.gpu_options.allow_growth = True
 219 | 
 220 |         session = tf.InteractiveSession()
 221 |         model = load_model(model_dir, compile=False)
 222 | 
 223 |         return model, session
 224 |     
 225 |     def do_prediction(self,patches,img,model):
 226 |         
 227 |         img_height_model = model.layers[len(model.layers) - 1].output_shape[1]
 228 |         img_width_model = model.layers[len(model.layers) - 1].output_shape[2]
 229 |         n_classes = model.layers[len(model.layers) - 1].output_shape[3]
 230 | 
 231 |         if patches:
 232 | 
 233 |             margin = int(0.1 * img_width_model)
 234 | 
 235 |             width_mid = img_width_model - 2 * margin
 236 |             height_mid = img_height_model - 2 * margin
 237 | 
 238 | 
 239 |             img = img / float(255.0)
 240 | 
 241 |             img_h = img.shape[0]
 242 |             img_w = img.shape[1]
 243 | 
 244 |             prediction_true = np.zeros((img_h, img_w, 3))
 245 |             mask_true = np.zeros((img_h, img_w))
 246 |             nxf = img_w / float(width_mid)
 247 |             nyf = img_h / float(height_mid)
 248 | 
 249 |             if nxf > int(nxf):
 250 |                 nxf = int(nxf) + 1
 251 |             else:
 252 |                 nxf = int(nxf)
 253 | 
 254 |             if nyf > int(nyf):
 255 |                 nyf = int(nyf) + 1
 256 |             else:
 257 |                 nyf = int(nyf)
 258 | 
 259 |             for i in range(nxf):
 260 |                 for j in range(nyf):
 261 | 
 262 |                     if i == 0:
 263 |                         index_x_d = i * width_mid
 264 |                         index_x_u = index_x_d + img_width_model
 265 |                     elif i > 0:
 266 |                         index_x_d = i * width_mid
 267 |                         index_x_u = index_x_d + img_width_model
 268 | 
 269 |                     if j == 0:
 270 |                         index_y_d = j * height_mid
 271 |                         index_y_u = index_y_d + img_height_model
 272 |                     elif j > 0:
 273 |                         index_y_d = j * height_mid
 274 |                         index_y_u = index_y_d + img_height_model
 275 | 
 276 |                     if index_x_u > img_w:
 277 |                         index_x_u = img_w
 278 |                         index_x_d = img_w - img_width_model
 279 |                     if index_y_u > img_h:
 280 |                         index_y_u = img_h
 281 |                         index_y_d = img_h - img_height_model
 282 |                         
 283 |                     
 284 | 
 285 |                     img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
 286 | 
 287 |                     label_p_pred = model.predict(
 288 |                         img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]))
 289 | 
 290 |                     seg = np.argmax(label_p_pred, axis=3)[0]
 291 | 
 292 |                     seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
 293 | 
 294 |                     if i==0 and j==0:
 295 |                         seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]
 296 |                         seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin]
 297 | 
 298 |                         mask_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg
 299 |                         prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin,
 300 |                         :] = seg_color
 301 |                         
 302 |                     elif i==nxf-1 and j==nyf-1:
 303 |                         seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - 0, :]
 304 |                         seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - 0]
 305 | 
 306 |                         mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0] = seg
 307 |                         prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0,
 308 |                         :] = seg_color
 309 |                         
 310 |                     elif i==0 and j==nyf-1:
 311 |                         seg_color = seg_color[margin:seg_color.shape[0] - 0, 0:seg_color.shape[1] - margin, :]
 312 |                         seg = seg[margin:seg.shape[0] - 0, 0:seg.shape[1] - margin]
 313 | 
 314 |                         mask_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin] = seg
 315 |                         prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin,
 316 |                         :] = seg_color
 317 |                         
 318 |                     elif i==nxf-1 and j==0:
 319 |                         seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :]
 320 |                         seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - 0]
 321 | 
 322 |                         mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg
 323 |                         prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0,
 324 |                         :] = seg_color
 325 |                         
 326 |                     elif i==0 and j!=0 and j!=nyf-1:
 327 |                         seg_color = seg_color[margin:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :]
 328 |                         seg = seg[margin:seg.shape[0] - margin, 0:seg.shape[1] - margin]
 329 | 
 330 |                         mask_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg
 331 |                         prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin,
 332 |                         :] = seg_color
 333 |                         
 334 |                     elif i==nxf-1 and j!=0 and j!=nyf-1:
 335 |                         seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :]
 336 |                         seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - 0]
 337 | 
 338 |                         mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg
 339 |                         prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0,
 340 |                         :] = seg_color
 341 |                         
 342 |                     elif i!=0 and i!=nxf-1 and j==0:
 343 |                         seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :]
 344 |                         seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - margin]
 345 | 
 346 |                         mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg
 347 |                         prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin,
 348 |                         :] = seg_color
 349 |                         
 350 |                     elif i!=0 and i!=nxf-1 and j==nyf-1:
 351 |                         seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - margin, :]
 352 |                         seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - margin]
 353 | 
 354 |                         mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin] = seg
 355 |                         prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin,
 356 |                         :] = seg_color
 357 | 
 358 |                     else:
 359 |                         seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :]
 360 |                         seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - margin]
 361 | 
 362 |                         mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg
 363 |                         prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin,
 364 |                         :] = seg_color
 365 | 
 366 |             prediction_true = prediction_true.astype(np.uint8)
 367 |                 
 368 |         if not patches:
 369 | 
 370 |             img = img /float( 255.0)
 371 |             img = self.resize_image(img, img_height_model, img_width_model)
 372 | 
 373 |             label_p_pred = model.predict(
 374 |                 img.reshape(1, img.shape[0], img.shape[1], img.shape[2]))
 375 | 
 376 |             seg = np.argmax(label_p_pred, axis=3)[0]
 377 |             seg_color =np.repeat(seg[:, :, np.newaxis], 3, axis=2)
 378 |             prediction_true = self.resize_image(seg_color, self.image.shape[0], self.image.shape[1])
 379 |             prediction_true = prediction_true.astype(np.uint8)
 380 |         return prediction_true
 381 |             
 382 |         
 383 | 
 384 |     def extract_page(self):
 385 |         patches=False
 386 |         model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
 387 |         img = self.image#self.otsu_copy(self.image)
 388 |         #for ii in range(1):
 389 |         #    img = cv2.GaussianBlur(img, (15, 15), 0)
 390 | 
 391 |         
 392 |         img_page_prediction=self.do_prediction(patches,img,model_page)
 393 |         
 394 |         imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
 395 |         _, thresh = cv2.threshold(imgray, 0, 255, 0)
 396 | 
 397 |         thresh = cv2.dilate(thresh, self.kernel, iterations=6)
 398 |         contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 399 | 
 400 |         cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
 401 | 
 402 |         cnt = contours[np.argmax(cnt_size)]
 403 | 
 404 |         x, y, w, h = cv2.boundingRect(cnt)
 405 |         
 406 |         try:
 407 |             box = [x, y, w, h]
 408 |             
 409 |             croped_page, page_coord = self.crop_image_inside_box(box, self.image)
 410 |             
 411 | 
 412 |             self.cont_page=[]
 413 |             self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , 
 414 |                                                         [ page_coord[3] , page_coord[0] ] ,
 415 |                                                         [ page_coord[3] , page_coord[1] ] ,
 416 |                                                     [ page_coord[2] , page_coord[1] ]] ) )
 417 |         except:
 418 |             box = [0, 0, self.image.shape[1]-1, self.image.shape[0]-1]
 419 |             croped_page, page_coord = self.crop_image_inside_box(box, self.image)
 420 |             
 421 | 
 422 |             self.cont_page=[]
 423 |             self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , 
 424 |                                                         [ page_coord[3] , page_coord[0] ] ,
 425 |                                                         [ page_coord[3] , page_coord[1] ] ,
 426 |                                                     [ page_coord[2] , page_coord[1] ]] ) )
 427 | 
 428 |         session_page.close()
 429 |         del model_page
 430 |         del session_page
 431 |         del self.image
 432 |         del contours
 433 |         del thresh
 434 |         del img
 435 | 
 436 |         gc.collect()
 437 |         return croped_page, page_coord
 438 | 
 439 |     def extract_text_regions(self, img):
 440 |         
 441 |         patches=True
 442 |         model_region, session_region = self.start_new_session_and_model(self.model_region_dir)
 443 |         img = self.otsu_copy(img)
 444 |         img = img.astype(np.uint8)
 445 |         
 446 | 
 447 |         prediction_regions=self.do_prediction(patches,img,model_region)
 448 |         
 449 |         
 450 |         session_region.close()
 451 |         del model_region
 452 |         del session_region
 453 |         gc.collect()
 454 |         return prediction_regions
 455 | 
 456 |     def get_text_region_contours_and_boxes(self, image):
 457 |         rgb_class_of_texts = (1, 1, 1)
 458 |         mask_texts = np.all(image == rgb_class_of_texts, axis=-1)
 459 | 
 460 |         image = np.repeat(mask_texts[:, :, np.newaxis], 3, axis=2) * 255
 461 |         image = image.astype(np.uint8)
 462 | 
 463 |         image = cv2.morphologyEx(image, cv2.MORPH_OPEN, self.kernel)
 464 |         image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, self.kernel)
 465 | 
 466 | 
 467 |         imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 468 | 
 469 |         _, thresh = cv2.threshold(imgray, 0, 255, 0)
 470 | 
 471 |         contours, hierarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 472 |         
 473 |         main_contours = self.filter_contours_area_of_image(thresh, contours, hierarchy, max_area=1, min_area=0.00001)
 474 |         self.boxes = []
 475 |         
 476 |         for jj in range(len(main_contours)):
 477 |             x, y, w, h = cv2.boundingRect(main_contours[jj])
 478 |             self.boxes.append([x, y, w, h])
 479 |             
 480 | 
 481 |         return main_contours
 482 | 
 483 |     def get_all_image_patches_coordination(self, image_page):
 484 |         self.all_box_coord=[]
 485 |         for jk in range(len(self.boxes)):
 486 |             _,crop_coor=self.crop_image_inside_box(self.boxes[jk],image_page)
 487 |             self.all_box_coord.append(crop_coor) 
 488 |         
 489 | 
 490 |     def textline_contours(self, img):
 491 |         patches=True
 492 |         model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir)
 493 |         #####img = self.otsu_copy(img)
 494 |         img = img.astype(np.uint8)
 495 |         
 496 |         prediction_textline=self.do_prediction(patches,img,model_textline)
 497 | 
 498 |         session_textline.close()
 499 | 
 500 |         del model_textline
 501 |         del session_textline
 502 |         gc.collect()
 503 |         return prediction_textline[:,:,0]
 504 | 
 505 |     def get_textlines_for_each_textregions(self, textline_mask_tot, boxes):
 506 |         ########textline_mask_tot = cv2.erode(textline_mask_tot, self.kernel, iterations=1) ####should be changed
 507 |         self.area_of_cropped = []
 508 |         self.all_text_region_raw = []
 509 |         for jk in range(len(boxes)):
 510 |             crop_img, crop_coor = self.crop_image_inside_box(boxes[jk],
 511 |                                                              np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
 512 |             crop_img=crop_img.astype(np.uint8)
 513 |             self.all_text_region_raw.append(crop_img[:, :, 0])
 514 |             self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1])
 515 |             
 516 |     def seperate_lines(self, img_patch, contour_text_interest, thetha):
 517 |         (h, w) = img_patch.shape[:2]
 518 |         center = (w // 2, h // 2)
 519 |         M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
 520 |         x_d = M[0, 2]
 521 |         y_d = M[1, 2]
 522 | 
 523 |         thetha = thetha / 180. * np.pi
 524 |         rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
 525 |         contour_text_interest_copy = contour_text_interest.copy()
 526 | 
 527 |         x_cont = contour_text_interest[:, 0, 0]
 528 |         y_cont = contour_text_interest[:, 0, 1]
 529 |         x_cont = x_cont - np.min(x_cont)
 530 |         y_cont = y_cont - np.min(y_cont)
 531 | 
 532 |         x_min_cont = 0
 533 |         x_max_cont = img_patch.shape[1]
 534 |         y_min_cont = 0
 535 |         y_max_cont = img_patch.shape[0]
 536 | 
 537 |         xv = np.linspace(x_min_cont, x_max_cont, 1000)
 538 | 
 539 |         textline_patch_sum_along_width = img_patch.sum(axis=1)
 540 | 
 541 |         first_nonzero = 0  # (next((i for i, x in enumerate(mada_n) if x), None))
 542 | 
 543 |         y = textline_patch_sum_along_width[:]  # [first_nonzero:last_nonzero]
 544 |         y_padded = np.zeros(len(y) + 40)
 545 |         y_padded[20:len(y) + 20] = y
 546 |         x = np.array(range(len(y)))
 547 | 
 548 |         peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
 549 |         if 1>0:
 550 | 
 551 |             try:
 552 | 
 553 |                 y_padded_smoothed_e= gaussian_filter1d(y_padded, 2)
 554 |                 y_padded_up_to_down_e=-y_padded+np.max(y_padded)
 555 |                 y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40)
 556 |                 y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e)+20]=y_padded_up_to_down_e
 557 |                 y_padded_up_to_down_padded_e= gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
 558 |                 
 559 | 
 560 |                 peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
 561 |                 peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
 562 |                 neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
 563 | 
 564 |                 arg_neg_must_be_deleted= np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3  ] 
 565 |                 diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
 566 |                 
 567 | 
 568 |                 
 569 |                 arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
 570 |                 arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1]
 571 |                 
 572 | 
 573 |                 peaks_new=peaks_e[:]
 574 |                 peaks_neg_new=peaks_neg_e[:]
 575 | 
 576 |                 clusters_to_be_deleted=[]
 577 |                 if len(arg_diff_cluster)>0:
 578 |                     
 579 |                     clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1])
 580 |                     for i in range(len(arg_diff_cluster)-1):
 581 |                         clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1])
 582 |                     clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:])
 583 |                     
 584 | 
 585 |                 if len(clusters_to_be_deleted)>0:
 586 |                     peaks_new_extra=[]
 587 |                     for m in range(len(clusters_to_be_deleted)):
 588 |                         min_cluster=np.min(peaks_e[clusters_to_be_deleted[m]])
 589 |                         max_cluster=np.max(peaks_e[clusters_to_be_deleted[m]])
 590 |                         peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) )
 591 |                         for m1 in range(len(clusters_to_be_deleted[m])):
 592 |                             peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]]
 593 |                             peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]]
 594 |                             
 595 |                             peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]]
 596 |                     peaks_new_tot=[]
 597 |                     for i1 in peaks_new:
 598 |                         peaks_new_tot.append(i1)
 599 |                     for i1 in peaks_new_extra:
 600 |                         peaks_new_tot.append(i1)
 601 |                     peaks_new_tot=np.sort(peaks_new_tot)
 602 |                     
 603 |                     
 604 |                 else:
 605 |                     peaks_new_tot=peaks_e[:]
 606 | 
 607 | 
 608 |                 textline_con,hierachy=self.return_contours_of_image(img_patch)
 609 |                 textline_con_fil=self.filter_contours_area_of_image(img_patch,textline_con,hierachy,max_area=1,min_area=0.0008)
 610 |                 y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil)
 611 | 
 612 |                 sigma_gaus=int(  y_diff_mean * (7./40.0) )
 613 |                 #print(sigma_gaus,'sigma_gaus')
 614 |             except:
 615 |                 sigma_gaus=12
 616 |             if sigma_gaus<3:
 617 |                 sigma_gaus=3
 618 |             #print(sigma_gaus,'sigma')
 619 |     
 620 |     
 621 |         y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus)
 622 |         y_padded_up_to_down=-y_padded+np.max(y_padded)
 623 |         y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40)
 624 |         y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down
 625 |         y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
 626 |         
 627 | 
 628 |         peaks, _ = find_peaks(y_padded_smoothed, height=0)
 629 |         peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
 630 |         
 631 |         
 632 |         #plt.plot(y_padded_up_to_down_padded)
 633 |         #plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
 634 |         #plt.title('negs')
 635 |         #plt.show()
 636 |         
 637 | 
 638 |         
 639 |         #plt.plot(y_padded_smoothed)
 640 |         #plt.plot(peaks,y_padded_smoothed[peaks],'*')
 641 |         #plt.title('poss')
 642 |         #plt.show()
 643 | 
 644 |             
 645 | 
 646 |         neg_peaks_max=np.max(y_padded_smoothed[peaks])
 647 |         
 648 | 
 649 |         arg_neg_must_be_deleted= np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42  ] 
 650 | 
 651 | 
 652 |         diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
 653 |         
 654 | 
 655 |         
 656 |         arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
 657 |         arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1]
 658 |         
 659 | 
 660 |         peaks_new=peaks[:]
 661 |         peaks_neg_new=peaks_neg[:]
 662 |         clusters_to_be_deleted=[]
 663 |         
 664 | 
 665 |         if len(arg_diff_cluster)>=2 and len(arg_diff_cluster)>0:
 666 |         
 667 |             clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1])
 668 |             for i in range(len(arg_diff_cluster)-1):
 669 |                 clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1])
 670 |             clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:])
 671 |         elif len(arg_neg_must_be_deleted)>=2 and len(arg_diff_cluster)==0:
 672 |             clusters_to_be_deleted.append(arg_neg_must_be_deleted[:])
 673 |             
 674 |     
 675 |     
 676 |         if  len(arg_neg_must_be_deleted)==1:
 677 |             clusters_to_be_deleted.append(arg_neg_must_be_deleted)
 678 |             
 679 | 
 680 |         if len(clusters_to_be_deleted)>0:
 681 |             peaks_new_extra=[]
 682 |             for m in range(len(clusters_to_be_deleted)):
 683 |                 min_cluster=np.min(peaks[clusters_to_be_deleted[m]])
 684 |                 max_cluster=np.max(peaks[clusters_to_be_deleted[m]])
 685 |                 peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) )
 686 |                 for m1 in range(len(clusters_to_be_deleted[m])):
 687 |                     peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]-1]]
 688 |                     peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]]]
 689 |                     
 690 |                     peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg[clusters_to_be_deleted[m][m1]]]
 691 |             peaks_new_tot=[]
 692 |             for i1 in peaks_new:
 693 |                 peaks_new_tot.append(i1)
 694 |             for i1 in peaks_new_extra:
 695 |                 peaks_new_tot.append(i1)
 696 |             peaks_new_tot=np.sort(peaks_new_tot)
 697 |             
 698 |             #plt.plot(y_padded_up_to_down_padded)
 699 |             #plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
 700 |             #plt.show()
 701 |             
 702 |             #plt.plot(y_padded_up_to_down_padded)
 703 |             #plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
 704 |             #plt.show()
 705 |             
 706 |             #plt.plot(y_padded_smoothed)
 707 |             #plt.plot(peaks,y_padded_smoothed[peaks],'*')
 708 |             #plt.show()
 709 |             
 710 |             #plt.plot(y_padded_smoothed)
 711 |             #plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*')
 712 |             #plt.show()
 713 |             
 714 |             peaks=peaks_new_tot[:]
 715 |             peaks_neg=peaks_neg_new[:]
 716 |             
 717 |             
 718 |         else:
 719 |             peaks_new_tot=peaks[:]
 720 |             peaks=peaks_new_tot[:]
 721 |             peaks_neg=peaks_neg_new[:]
 722 |         
 723 |         mean_value_of_peaks=np.mean(y_padded_smoothed[peaks])
 724 |         std_value_of_peaks=np.std(y_padded_smoothed[peaks])
 725 |         peaks_values=y_padded_smoothed[peaks]
 726 |         
 727 | 
 728 |         peaks_neg = peaks_neg - 20 - 20
 729 |         peaks = peaks - 20
 730 | 
 731 |         for jj in range(len(peaks_neg)):
 732 |             if peaks_neg[jj] > len(x) - 1:
 733 |                 peaks_neg[jj] = len(x) - 1
 734 | 
 735 |         for jj in range(len(peaks)):
 736 |             if peaks[jj] > len(x) - 1:
 737 |                 peaks[jj] = len(x) - 1
 738 |                 
 739 |         
 740 | 
 741 |         textline_boxes = []
 742 |         textline_boxes_rot = []
 743 | 
 744 |         if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3:
 745 |             #print('11')
 746 |             for jj in range(len(peaks)):
 747 |                 
 748 |                 if jj==(len(peaks)-1):
 749 |                     dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
 750 |                     dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
 751 |                     
 752 |                     if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
 753 |                         point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
 754 |                         point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
 755 |                     else:
 756 |                         point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
 757 |                         point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
 758 | 
 759 |                     point_down_narrow = peaks[jj] + first_nonzero + int(
 760 |                         1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
 761 |                 else:
 762 |                     dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
 763 |                     dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
 764 |                     
 765 |                     if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
 766 |                         point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
 767 |                         point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
 768 |                     else:
 769 |                         point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
 770 |                         point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
 771 | 
 772 |                     point_down_narrow = peaks[jj] + first_nonzero + int(
 773 |                         1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
 774 | 
 775 | 
 776 | 
 777 |                 if point_down_narrow >= img_patch.shape[0]:
 778 |                     point_down_narrow = img_patch.shape[0] - 2
 779 | 
 780 |                 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
 781 |                              for mj in range(len(xv))]
 782 |                 distances = np.array(distances)
 783 | 
 784 |                 xvinside = xv[distances >= 0]
 785 | 
 786 |                 if len(xvinside) == 0:
 787 |                     x_min = x_min_cont
 788 |                     x_max = x_max_cont
 789 |                 else:
 790 |                     x_min = np.min(xvinside)  # max(x_min_interest,x_min_cont)
 791 |                     x_max = np.max(xvinside)  # min(x_max_interest,x_max_cont)
 792 | 
 793 |                 p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)])
 794 |                 p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)])
 795 |                 p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)])
 796 |                 p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)])
 797 | 
 798 |                 x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d
 799 |                 x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d
 800 |                 x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
 801 |                 x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
 802 |                 
 803 |                 if x_min_rot1<0:
 804 |                     x_min_rot1=0
 805 |                 if x_min_rot4<0:
 806 |                     x_min_rot4=0
 807 |                 if point_up_rot1<0:
 808 |                     point_up_rot1=0
 809 |                 if point_up_rot2<0:
 810 |                     point_up_rot2=0
 811 | 
 812 |                 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
 813 |                                                     [int(x_max_rot2), int(point_up_rot2)],
 814 |                                                     [int(x_max_rot3), int(point_down_rot3)],
 815 |                                                     [int(x_min_rot4), int(point_down_rot4)]]))
 816 | 
 817 |                 textline_boxes.append(np.array([[int(x_min), int(point_up)],
 818 |                                                 [int(x_max), int(point_up)],
 819 |                                                 [int(x_max), int(point_down)],
 820 |                                                 [int(x_min), int(point_down)]]))
 821 | 
 822 |         elif len(peaks) < 1:
 823 |             pass
 824 | 
 825 |         elif len(peaks) == 1:
 826 |             x_min = x_min_cont
 827 |             x_max = x_max_cont
 828 | 
 829 |             y_min = y_min_cont
 830 |             y_max = y_max_cont
 831 | 
 832 |             p1 = np.dot(rotation_matrix, [int(x_min), int(y_min)])
 833 |             p2 = np.dot(rotation_matrix, [int(x_max), int(y_min)])
 834 |             p3 = np.dot(rotation_matrix, [int(x_max), int(y_max)])
 835 |             p4 = np.dot(rotation_matrix, [int(x_min), int(y_max)])
 836 | 
 837 |             x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d
 838 |             x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d
 839 |             x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
 840 |             x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
 841 |             
 842 |             
 843 |             if x_min_rot1<0:
 844 |                 x_min_rot1=0
 845 |             if x_min_rot4<0:
 846 |                 x_min_rot4=0
 847 |             if point_up_rot1<0:
 848 |                 point_up_rot1=0
 849 |             if point_up_rot2<0:
 850 |                 point_up_rot2=0
 851 | 
 852 |             textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
 853 |                                                 [int(x_max_rot2), int(point_up_rot2)],
 854 |                                                 [int(x_max_rot3), int(point_down_rot3)],
 855 |                                                 [int(x_min_rot4), int(point_down_rot4)]]))
 856 | 
 857 |             textline_boxes.append(np.array([[int(x_min), int(y_min)],
 858 |                                             [int(x_max), int(y_min)],
 859 |                                             [int(x_max), int(y_max)],
 860 |                                             [int(x_min), int(y_max)]]))
 861 | 
 862 | 
 863 | 
 864 |         elif len(peaks) == 2:
 865 |             dis_to_next = np.abs(peaks[1] - peaks[0])
 866 |             for jj in range(len(peaks)):
 867 |                 if jj == 0:
 868 |                     point_up = 0#peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next)
 869 |                     if point_up < 0:
 870 |                         point_up = 1
 871 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
 872 |                 elif jj == 1:
 873 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
 874 |                     if point_down >= img_patch.shape[0]:
 875 |                         point_down = img_patch.shape[0] - 2
 876 |                     point_up = peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
 877 | 
 878 |                 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
 879 |                              for mj in range(len(xv))]
 880 |                 distances = np.array(distances)
 881 | 
 882 |                 xvinside = xv[distances >= 0]
 883 | 
 884 |                 if len(xvinside) == 0:
 885 |                     x_min = x_min_cont
 886 |                     x_max = x_max_cont
 887 |                 else:
 888 |                     x_min = np.min(xvinside)
 889 |                     x_max = np.max(xvinside)
 890 | 
 891 |                 p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)])
 892 |                 p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)])
 893 |                 p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)])
 894 |                 p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)])
 895 | 
 896 |                 x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d
 897 |                 x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d
 898 |                 x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
 899 |                 x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
 900 |                 
 901 |                 if x_min_rot1<0:
 902 |                     x_min_rot1=0
 903 |                 if x_min_rot4<0:
 904 |                     x_min_rot4=0
 905 |                 if point_up_rot1<0:
 906 |                     point_up_rot1=0
 907 |                 if point_up_rot2<0:
 908 |                     point_up_rot2=0
 909 | 
 910 |                 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
 911 |                                                     [int(x_max_rot2), int(point_up_rot2)],
 912 |                                                     [int(x_max_rot3), int(point_down_rot3)],
 913 |                                                     [int(x_min_rot4), int(point_down_rot4)]]))
 914 | 
 915 |                 textline_boxes.append(np.array([[int(x_min), int(point_up)],
 916 |                                                 [int(x_max), int(point_up)],
 917 |                                                 [int(x_max), int(point_down)],
 918 |                                                 [int(x_min), int(point_down)]]))
 919 |         else:
 920 |             for jj in range(len(peaks)):
 921 | 
 922 |                 if jj == 0:
 923 |                     dis_to_next = peaks[jj + 1] - peaks[jj]
 924 |                     # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next)
 925 |                     point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
 926 |                     if point_up < 0:
 927 |                         point_up = 1
 928 |                     # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next)
 929 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next)
 930 |                 elif jj == len(peaks) - 1:
 931 |                     dis_to_next = peaks[jj] - peaks[jj - 1]
 932 |                     # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next)
 933 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.7 * dis_to_next)
 934 |                     if point_down >= img_patch.shape[0]:
 935 |                         point_down = img_patch.shape[0] - 2
 936 |                     # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next)
 937 |                     point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
 938 |                 else:
 939 |                     dis_to_next_down = peaks[jj + 1] - peaks[jj]
 940 |                     dis_to_next_up = peaks[jj] - peaks[jj - 1]
 941 | 
 942 |                     point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next_up)
 943 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down)
 944 | 
 945 |                 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
 946 |                              for mj in range(len(xv))]
 947 |                 distances = np.array(distances)
 948 | 
 949 |                 xvinside = xv[distances >= 0]
 950 | 
 951 |                 if len(xvinside) == 0:
 952 |                     x_min = x_min_cont
 953 |                     x_max = x_max_cont
 954 |                 else:
 955 |                     x_min = np.min(xvinside)  # max(x_min_interest,x_min_cont)
 956 |                     x_max = np.max(xvinside)  # min(x_max_interest,x_max_cont)
 957 | 
 958 |                 p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)])
 959 |                 p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)])
 960 |                 p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)])
 961 |                 p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)])
 962 | 
 963 |                 x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d
 964 |                 x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d
 965 |                 x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
 966 |                 x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
 967 |                 
 968 |                 
 969 |                 if x_min_rot1<0:
 970 |                     x_min_rot1=0
 971 |                 if x_min_rot4<0:
 972 |                     x_min_rot4=0
 973 |                 if point_up_rot1<0:
 974 |                     point_up_rot1=0
 975 |                 if point_up_rot2<0:
 976 |                     point_up_rot2=0
 977 |                     
 978 | 
 979 | 
 980 |                 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
 981 |                                                     [int(x_max_rot2), int(point_up_rot2)],
 982 |                                                     [int(x_max_rot3), int(point_down_rot3)],
 983 |                                                     [int(x_min_rot4), int(point_down_rot4)]]))
 984 | 
 985 |                 textline_boxes.append(np.array([[int(x_min), int(point_up)],
 986 |                                                 [int(x_max), int(point_up)],
 987 |                                                 [int(x_max), int(point_down)],
 988 |                                                 [int(x_min), int(point_down)]]))
 989 | 
 990 | 
 991 |         return peaks, textline_boxes_rot
 992 | 
 993 |     def seperate_lines_vertical(self, img_patch, contour_text_interest, thetha):
 994 |         
 995 |         
 996 |         thetha=thetha+90
 997 | 
 998 |         (h, w) = img_patch.shape[:2]
 999 |         center = (w // 2, h // 2)
1000 |         M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
1001 |         x_d = M[0, 2]
1002 |         y_d = M[1, 2]
1003 | 
1004 |         thetha = thetha / 180. * np.pi
1005 |         rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
1006 |         contour_text_interest_copy = contour_text_interest.copy()
1007 | 
1008 |         x_cont = contour_text_interest[:, 0, 0]
1009 |         y_cont = contour_text_interest[:, 0, 1]
1010 |         x_cont = x_cont - np.min(x_cont)
1011 |         y_cont = y_cont - np.min(y_cont)
1012 | 
1013 |         x_min_cont = 0
1014 |         x_max_cont = img_patch.shape[1]
1015 |         y_min_cont = 0
1016 |         y_max_cont = img_patch.shape[0]
1017 | 
1018 |         xv = np.linspace(x_min_cont, x_max_cont, 1000)
1019 | 
1020 |         textline_patch_sum_along_width = img_patch.sum(axis=0)
1021 | 
1022 |         first_nonzero = 0  # (next((i for i, x in enumerate(mada_n) if x), None))
1023 | 
1024 |         y = textline_patch_sum_along_width[:]  # [first_nonzero:last_nonzero]
1025 |         y_padded = np.zeros(len(y) + 40)
1026 |         y_padded[20:len(y) + 20] = y
1027 |         x = np.array(range(len(y)))
1028 | 
1029 |         peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
1030 |         if 1>0:
1031 | 
1032 |             try:
1033 | 
1034 |                 y_padded_smoothed_e= gaussian_filter1d(y_padded, 2)
1035 |                 y_padded_up_to_down_e=-y_padded+np.max(y_padded)
1036 |                 y_padded_up_to_down_padded_e=np.zeros(len(y_padded_up_to_down_e)+40)
1037 |                 y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e)+20]=y_padded_up_to_down_e
1038 |                 y_padded_up_to_down_padded_e= gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
1039 |                 
1040 | 
1041 |                 peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
1042 |                 peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
1043 |                 neg_peaks_max=np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
1044 | 
1045 |                 arg_neg_must_be_deleted= np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3  ] 
1046 |                 diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
1047 |                 
1048 | 
1049 |                 
1050 |                 arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
1051 |                 arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1]
1052 |                 
1053 | 
1054 |                 peaks_new=peaks_e[:]
1055 |                 peaks_neg_new=peaks_neg_e[:]
1056 | 
1057 |                 clusters_to_be_deleted=[]
1058 |                 if len(arg_diff_cluster)>0:
1059 |                     
1060 |                     clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1])
1061 |                     for i in range(len(arg_diff_cluster)-1):
1062 |                         clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1])
1063 |                     clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:])
1064 |                     
1065 | 
1066 |                 if len(clusters_to_be_deleted)>0:
1067 |                     peaks_new_extra=[]
1068 |                     for m in range(len(clusters_to_be_deleted)):
1069 |                         min_cluster=np.min(peaks_e[clusters_to_be_deleted[m]])
1070 |                         max_cluster=np.max(peaks_e[clusters_to_be_deleted[m]])
1071 |                         peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) )
1072 |                         for m1 in range(len(clusters_to_be_deleted[m])):
1073 |                             peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]-1]]
1074 |                             peaks_new=peaks_new[peaks_new!=peaks_e[clusters_to_be_deleted[m][m1]]]
1075 |                             
1076 |                             peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg_e[clusters_to_be_deleted[m][m1]]]
1077 |                     peaks_new_tot=[]
1078 |                     for i1 in peaks_new:
1079 |                         peaks_new_tot.append(i1)
1080 |                     for i1 in peaks_new_extra:
1081 |                         peaks_new_tot.append(i1)
1082 |                     peaks_new_tot=np.sort(peaks_new_tot)
1083 |                     
1084 |                     
1085 |                 else:
1086 |                     peaks_new_tot=peaks_e[:]
1087 | 
1088 | 
1089 |                 textline_con,hierachy=self.return_contours_of_image(img_patch)
1090 |                 textline_con_fil=self.filter_contours_area_of_image(img_patch,textline_con,hierachy,max_area=1,min_area=0.0008)
1091 |                 y_diff_mean=np.mean(np.diff(peaks_new_tot))#self.find_contours_mean_y_diff(textline_con_fil)
1092 | 
1093 |                 sigma_gaus=int(  y_diff_mean * (7./40.0) )
1094 |                 #print(sigma_gaus,'sigma_gaus')
1095 |             except:
1096 |                 sigma_gaus=12
1097 |             if sigma_gaus<3:
1098 |                 sigma_gaus=3
1099 |             #print(sigma_gaus,'sigma')
1100 |     
1101 |     
1102 |         y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus)
1103 |         y_padded_up_to_down=-y_padded+np.max(y_padded)
1104 |         y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40)
1105 |         y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down
1106 |         y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
1107 |         
1108 | 
1109 |         peaks, _ = find_peaks(y_padded_smoothed, height=0)
1110 |         peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
1111 |         
1112 |         
1113 |         #plt.plot(y_padded_up_to_down_padded)
1114 |         #plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
1115 |         #plt.title('negs')
1116 |         #plt.show()
1117 |         
1118 | 
1119 |         
1120 |         #plt.plot(y_padded_smoothed)
1121 |         #plt.plot(peaks,y_padded_smoothed[peaks],'*')
1122 |         #plt.title('poss')
1123 |         #plt.show()
1124 | 
1125 |             
1126 | 
1127 |         neg_peaks_max=np.max(y_padded_up_to_down_padded[peaks_neg])
1128 |         
1129 | 
1130 |         arg_neg_must_be_deleted= np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg]/float(neg_peaks_max)<0.42  ] 
1131 | 
1132 | 
1133 |         diff_arg_neg_must_be_deleted=np.diff(arg_neg_must_be_deleted)
1134 |         
1135 | 
1136 |         
1137 |         arg_diff=np.array(range(len(diff_arg_neg_must_be_deleted)))
1138 |         arg_diff_cluster=arg_diff[diff_arg_neg_must_be_deleted>1]
1139 |         
1140 | 
1141 |         peaks_new=peaks[:]
1142 |         peaks_neg_new=peaks_neg[:]
1143 |         clusters_to_be_deleted=[]
1144 |         
1145 | 
1146 |         if len(arg_diff_cluster)>=2 and len(arg_diff_cluster)>0:
1147 |         
1148 |             clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0]+1])
1149 |             for i in range(len(arg_diff_cluster)-1):
1150 |                 clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1])
1151 |             clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster)-1]+1:])
1152 |         elif len(arg_neg_must_be_deleted)>=2 and len(arg_diff_cluster)==0:
1153 |             clusters_to_be_deleted.append(arg_neg_must_be_deleted[:])
1154 |             
1155 |     
1156 |     
1157 |         if  len(arg_neg_must_be_deleted)==1:
1158 |             clusters_to_be_deleted.append(arg_neg_must_be_deleted)
1159 |             
1160 | 
1161 |         if len(clusters_to_be_deleted)>0:
1162 |             peaks_new_extra=[]
1163 |             for m in range(len(clusters_to_be_deleted)):
1164 |                 min_cluster=np.min(peaks[clusters_to_be_deleted[m]])
1165 |                 max_cluster=np.max(peaks[clusters_to_be_deleted[m]])
1166 |                 peaks_new_extra.append( int( (min_cluster+max_cluster)/2.0) )
1167 |                 for m1 in range(len(clusters_to_be_deleted[m])):
1168 |                     peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]-1]]
1169 |                     peaks_new=peaks_new[peaks_new!=peaks[clusters_to_be_deleted[m][m1]]]
1170 |                     
1171 |                     peaks_neg_new=peaks_neg_new[peaks_neg_new!=peaks_neg[clusters_to_be_deleted[m][m1]]]
1172 |             peaks_new_tot=[]
1173 |             for i1 in peaks_new:
1174 |                 peaks_new_tot.append(i1)
1175 |             for i1 in peaks_new_extra:
1176 |                 peaks_new_tot.append(i1)
1177 |             peaks_new_tot=np.sort(peaks_new_tot)
1178 |             
1179 | 
1180 |             peaks=peaks_new_tot[:]
1181 |             peaks_neg=peaks_neg_new[:]
1182 |             
1183 |             
1184 |         else:
1185 |             peaks_new_tot=peaks[:]
1186 |             peaks=peaks_new_tot[:]
1187 |             peaks_neg=peaks_neg_new[:]
1188 |         
1189 |         mean_value_of_peaks=np.mean(y_padded_smoothed[peaks])
1190 |         std_value_of_peaks=np.std(y_padded_smoothed[peaks])
1191 |         peaks_values=y_padded_smoothed[peaks]
1192 |         
1193 | 
1194 |         peaks_neg = peaks_neg - 20 - 20
1195 |         peaks = peaks - 20
1196 | 
1197 |         for jj in range(len(peaks_neg)):
1198 |             if peaks_neg[jj] > len(x) - 1:
1199 |                 peaks_neg[jj] = len(x) - 1
1200 | 
1201 |         for jj in range(len(peaks)):
1202 |             if peaks[jj] > len(x) - 1:
1203 |                 peaks[jj] = len(x) - 1
1204 |                 
1205 |         
1206 | 
1207 |         textline_boxes = []
1208 |         textline_boxes_rot = []
1209 | 
1210 |         if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3:
1211 |             #print('11')
1212 |             for jj in range(len(peaks)):
1213 |                 
1214 |                 if jj==(len(peaks)-1):
1215 |                     dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
1216 |                     dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
1217 |                     
1218 |                     if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
1219 |                         point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
1220 |                         point_down =x_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
1221 |                     else:
1222 |                         point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
1223 |                         point_down =x_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
1224 | 
1225 |                     point_down_narrow = peaks[jj] + first_nonzero + int(
1226 |                         1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
1227 |                 else:
1228 |                     dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
1229 |                     dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
1230 |                     
1231 |                     if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
1232 |                         point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
1233 |                         point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
1234 |                     else:
1235 |                         point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
1236 |                         point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
1237 | 
1238 |                     point_down_narrow = peaks[jj] + first_nonzero + int(
1239 |                         1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
1240 | 
1241 | 
1242 | 
1243 |                 if point_down_narrow >= img_patch.shape[0]:
1244 |                     point_down_narrow = img_patch.shape[0] - 2
1245 | 
1246 |                 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
1247 |                              for mj in range(len(xv))]
1248 |                 distances = np.array(distances)
1249 | 
1250 |                 xvinside = xv[distances >= 0]
1251 | 
1252 |                 if len(xvinside) == 0:
1253 |                     x_min = x_min_cont
1254 |                     x_max = x_max_cont
1255 |                 else:
1256 |                     x_min = np.min(xvinside)  # max(x_min_interest,x_min_cont)
1257 |                     x_max = np.max(xvinside)  # min(x_max_interest,x_max_cont)
1258 | 
1259 |                 p1 = np.dot(rotation_matrix, [int(point_up), int(y_min_cont)])
1260 |                 p2 = np.dot(rotation_matrix, [int(point_down), int(y_min_cont)])
1261 |                 p3 = np.dot(rotation_matrix, [int(point_down), int(y_max_cont)])
1262 |                 p4 = np.dot(rotation_matrix, [int(point_up), int(y_max_cont)])
1263 | 
1264 |                 x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d
1265 |                 x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d
1266 |                 x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
1267 |                 x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
1268 |                 
1269 |                 if x_min_rot1<0:
1270 |                     x_min_rot1=0
1271 |                 if x_min_rot4<0:
1272 |                     x_min_rot4=0
1273 |                 if point_up_rot1<0:
1274 |                     point_up_rot1=0
1275 |                 if point_up_rot2<0:
1276 |                     point_up_rot2=0
1277 | 
1278 |                 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
1279 |                                                     [int(x_max_rot2), int(point_up_rot2)],
1280 |                                                     [int(x_max_rot3), int(point_down_rot3)],
1281 |                                                     [int(x_min_rot4), int(point_down_rot4)]]))
1282 | 
1283 |                 textline_boxes.append(np.array([[int(x_min), int(point_up)],
1284 |                                                 [int(x_max), int(point_up)],
1285 |                                                 [int(x_max), int(point_down)],
1286 |                                                 [int(x_min), int(point_down)]]))
1287 | 
1288 |         elif len(peaks) < 1:
1289 |             pass
1290 | 
1291 |         elif len(peaks) == 1:
1292 |             x_min = x_min_cont
1293 |             x_max = x_max_cont
1294 | 
1295 |             y_min = y_min_cont
1296 |             y_max = y_max_cont
1297 | 
1298 |             p1 = np.dot(rotation_matrix, [int(point_up), int(y_min_cont)])
1299 |             p2 = np.dot(rotation_matrix, [int(point_down), int(y_min_cont)])
1300 |             p3 = np.dot(rotation_matrix, [int(point_down), int(y_max_cont)])
1301 |             p4 = np.dot(rotation_matrix, [int(point_up), int(y_max_cont)])
1302 | 
1303 |             x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d
1304 |             x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d
1305 |             x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
1306 |             x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
1307 |             
1308 |             
1309 |             if x_min_rot1<0:
1310 |                 x_min_rot1=0
1311 |             if x_min_rot4<0:
1312 |                 x_min_rot4=0
1313 |             if point_up_rot1<0:
1314 |                 point_up_rot1=0
1315 |             if point_up_rot2<0:
1316 |                 point_up_rot2=0
1317 | 
1318 |             textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
1319 |                                                 [int(x_max_rot2), int(point_up_rot2)],
1320 |                                                 [int(x_max_rot3), int(point_down_rot3)],
1321 |                                                 [int(x_min_rot4), int(point_down_rot4)]]))
1322 | 
1323 |             textline_boxes.append(np.array([[int(x_min), int(y_min)],
1324 |                                             [int(x_max), int(y_min)],
1325 |                                             [int(x_max), int(y_max)],
1326 |                                             [int(x_min), int(y_max)]]))
1327 | 
1328 | 
1329 | 
1330 |         elif len(peaks) == 2:
1331 |             dis_to_next = np.abs(peaks[1] - peaks[0])
1332 |             for jj in range(len(peaks)):
1333 |                 if jj == 0:
1334 |                     point_up = 0#peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next)
1335 |                     if point_up < 0:
1336 |                         point_up = 1
1337 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
1338 |                 elif jj == 1:
1339 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
1340 |                     if point_down >= img_patch.shape[0]:
1341 |                         point_down = img_patch.shape[0] - 2
1342 |                     point_up = peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
1343 | 
1344 |                 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
1345 |                              for mj in range(len(xv))]
1346 |                 distances = np.array(distances)
1347 | 
1348 |                 xvinside = xv[distances >= 0]
1349 | 
1350 |                 if len(xvinside) == 0:
1351 |                     x_min = x_min_cont
1352 |                     x_max = x_max_cont
1353 |                 else:
1354 |                     x_min = np.min(xvinside)
1355 |                     x_max = np.max(xvinside)
1356 | 
1357 |                 p1 = np.dot(rotation_matrix, [int(point_up), int(y_min_cont)])
1358 |                 p2 = np.dot(rotation_matrix, [int(point_down), int(y_min_cont)])
1359 |                 p3 = np.dot(rotation_matrix, [int(point_down), int(y_max_cont)])
1360 |                 p4 = np.dot(rotation_matrix, [int(point_up), int(y_max_cont)])
1361 | 
1362 |                 x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d
1363 |                 x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d
1364 |                 x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
1365 |                 x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
1366 |                 
1367 |                 if x_min_rot1<0:
1368 |                     x_min_rot1=0
1369 |                 if x_min_rot4<0:
1370 |                     x_min_rot4=0
1371 |                 if point_up_rot1<0:
1372 |                     point_up_rot1=0
1373 |                 if point_up_rot2<0:
1374 |                     point_up_rot2=0
1375 | 
1376 |                 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
1377 |                                                     [int(x_max_rot2), int(point_up_rot2)],
1378 |                                                     [int(x_max_rot3), int(point_down_rot3)],
1379 |                                                     [int(x_min_rot4), int(point_down_rot4)]]))
1380 | 
1381 |                 textline_boxes.append(np.array([[int(x_min), int(point_up)],
1382 |                                                 [int(x_max), int(point_up)],
1383 |                                                 [int(x_max), int(point_down)],
1384 |                                                 [int(x_min), int(point_down)]]))
1385 |         else:
1386 |             for jj in range(len(peaks)):
1387 | 
1388 |                 if jj == 0:
1389 |                     dis_to_next = peaks[jj + 1] - peaks[jj]
1390 |                     # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next)
1391 |                     point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
1392 |                     if point_up < 0:
1393 |                         point_up = 1
1394 |                     # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next)
1395 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next)
1396 |                 elif jj == len(peaks) - 1:
1397 |                     dis_to_next = peaks[jj] - peaks[jj - 1]
1398 |                     # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next)
1399 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.7 * dis_to_next)
1400 |                     if point_down >= img_patch.shape[0]:
1401 |                         point_down = img_patch.shape[0] - 2
1402 |                     # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next)
1403 |                     point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
1404 |                 else:
1405 |                     dis_to_next_down = peaks[jj + 1] - peaks[jj]
1406 |                     dis_to_next_up = peaks[jj] - peaks[jj - 1]
1407 | 
1408 |                     point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next_up)
1409 |                     point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down)
1410 | 
1411 |                 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
1412 |                              for mj in range(len(xv))]
1413 |                 distances = np.array(distances)
1414 | 
1415 |                 xvinside = xv[distances >= 0]
1416 | 
1417 |                 if len(xvinside) == 0:
1418 |                     x_min = x_min_cont
1419 |                     x_max = x_max_cont
1420 |                 else:
1421 |                     x_min = np.min(xvinside)  # max(x_min_interest,x_min_cont)
1422 |                     x_max = np.max(xvinside)  # min(x_max_interest,x_max_cont)
1423 | 
1424 |                 p1 = np.dot(rotation_matrix, [int(point_up), int(y_min_cont)])
1425 |                 p2 = np.dot(rotation_matrix, [int(point_down), int(y_min_cont)])
1426 |                 p3 = np.dot(rotation_matrix, [int(point_down), int(y_max_cont)])
1427 |                 p4 = np.dot(rotation_matrix, [int(point_up), int(y_max_cont)])
1428 | 
1429 |                 x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d
1430 |                 x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d
1431 |                 x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
1432 |                 x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
1433 |                 
1434 |                 
1435 |                 if x_min_rot1<0:
1436 |                     x_min_rot1=0
1437 |                 if x_min_rot4<0:
1438 |                     x_min_rot4=0
1439 |                 if point_up_rot1<0:
1440 |                     point_up_rot1=0
1441 |                 if point_up_rot2<0:
1442 |                     point_up_rot2=0
1443 |                     
1444 | 
1445 | 
1446 |                 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
1447 |                                                     [int(x_max_rot2), int(point_up_rot2)],
1448 |                                                     [int(x_max_rot3), int(point_down_rot3)],
1449 |                                                     [int(x_min_rot4), int(point_down_rot4)]]))
1450 | 
1451 |                 textline_boxes.append(np.array([[int(x_min), int(point_up)],
1452 |                                                 [int(x_max), int(point_up)],
1453 |                                                 [int(x_max), int(point_down)],
1454 |                                                 [int(x_min), int(point_down)]]))
1455 | 
1456 | 
1457 |         return peaks, textline_boxes_rot
1458 |     
1459 |     def return_rotated_contours(self,slope,img_patch):
1460 |             dst = self.rotate_image(img_patch, slope)
1461 |             dst = dst.astype(np.uint8)
1462 |             dst = dst[:, :, 0]
1463 |             dst[dst != 0] = 1
1464 |             
1465 |             imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
1466 |             _, thresh = cv2.threshold(imgray, 0, 255, 0)
1467 |             thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
1468 |             thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
1469 |             contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
1470 |             return contours
1471 |             
1472 |     def textline_contours_postprocessing(self, textline_mask, slope, contour_text_interest, box_ind):
1473 |         
1474 | 
1475 |         textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
1476 |         textline_mask = textline_mask.astype(np.uint8)
1477 |         kernel = np.ones((5, 5), np.uint8)
1478 |         textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel)
1479 |         textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel)
1480 |         ###textline_mask = cv2.erode(textline_mask, kernel, iterations=2)##should be changed
1481 |         
1482 |         
1483 |         try:
1484 | 
1485 |             dst = self.rotate_image(textline_mask, slope)
1486 |             dst = dst[:, :, 0]
1487 |             dst[dst != 0] = 1
1488 |             
1489 |             #plt.imshow(dst)
1490 |             #plt.show()
1491 | 
1492 |             contour_text_copy = contour_text_interest.copy()
1493 | 
1494 |             contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
1495 |                 0]
1496 |             contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
1497 | 
1498 |             img_contour = np.zeros((box_ind[3], box_ind[2], 3))
1499 |             img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255))
1500 | 
1501 | 
1502 |  
1503 |             img_contour_rot = self.rotate_image(img_contour, slope)
1504 | 
1505 |             img_contour_rot = img_contour_rot.astype(np.uint8)
1506 |             imgrayrot = cv2.cvtColor(img_contour_rot, cv2.COLOR_BGR2GRAY)
1507 |             _, threshrot = cv2.threshold(imgrayrot, 0, 255, 0)
1508 |             contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
1509 | 
1510 |             len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))]
1511 |             ind_big_con = np.argmax(len_con_text_rot)
1512 | 
1513 | 
1514 |             if abs(slope)>45:
1515 |                 _, contours_rotated_clean = self.seperate_lines_vertical(dst, contours_text_rot[ind_big_con], slope)
1516 |             else:
1517 |                 _, contours_rotated_clean = self.seperate_lines(dst, contours_text_rot[ind_big_con], slope)
1518 | 
1519 | 
1520 |         except:
1521 | 
1522 |             contours_rotated_clean = []
1523 | 
1524 |         return contours_rotated_clean
1525 | 
1526 | 
1527 |     def return_contours_of_image(self,image_box_tabels_1):
1528 |         
1529 |         image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2)
1530 |         image_box_tabels=image_box_tabels.astype(np.uint8)
1531 |         imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY)
1532 |         ret, thresh = cv2.threshold(imgray, 0, 255, 0)
1533 |         contours,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
1534 |         return contours,hierachy
1535 |     
1536 |     def find_contours_mean_y_diff(self,contours_main):
1537 |         M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))]
1538 |         cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))]
1539 |         return np.mean( np.diff( np.sort( np.array(cy_main) ) ) )
1540 |     
1541 |     
1542 |     def isNaN(self,num):
1543 |         return num != num
1544 |     
1545 |     def get_standard_deviation_of_summed_textline_patch_along_width(self,img_patch,sigma_,multiplier=3.8 ):
1546 |         img_patch_sum_along_width=img_patch[:,:].sum(axis=1)
1547 | 
1548 |         img_patch_sum_along_width_updown=img_patch_sum_along_width[len(img_patch_sum_along_width)::-1]
1549 | 
1550 |         first_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width) if x), 0))
1551 |         last_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width_updown) if x), 0))
1552 | 
1553 |         last_nonzero=len(img_patch_sum_along_width)-last_nonzero
1554 | 
1555 | 
1556 |         y=img_patch_sum_along_width#[first_nonzero:last_nonzero]
1557 | 
1558 |         y_help=np.zeros(len(y)+20)
1559 | 
1560 |         y_help[10:len(y)+10]=y
1561 | 
1562 |         x=np.array( range(len(y)) )
1563 | 
1564 | 
1565 | 
1566 | 
1567 |         zneg_rev=-y_help+np.max(y_help)
1568 | 
1569 |         zneg=np.zeros(len(zneg_rev)+20)
1570 | 
1571 |         zneg[10:len(zneg_rev)+10]=zneg_rev
1572 | 
1573 |         z=gaussian_filter1d(y, sigma_)
1574 |         zneg= gaussian_filter1d(zneg, sigma_)
1575 | 
1576 | 
1577 |         peaks_neg, _ = find_peaks(zneg, height=0)
1578 |         peaks, _ = find_peaks(z, height=0)
1579 | 
1580 |         peaks_neg=peaks_neg-10-10
1581 |         
1582 |         interest_pos=z[peaks]
1583 |         
1584 |         interest_pos=interest_pos[interest_pos>10]
1585 |         
1586 |         interest_neg=z[peaks_neg]
1587 |         
1588 |         min_peaks_pos=np.mean(interest_pos)
1589 |         min_peaks_neg=0#np.min(interest_neg)
1590 |         
1591 |         dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
1592 |         #print(interest_pos)
1593 |         grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
1594 | 
1595 |         interest_neg_fin=interest_neg[(interest_neg<grenze)]
1596 |         peaks_neg_fin=peaks_neg[(interest_neg<grenze)]
1597 |         interest_neg_fin=interest_neg[(interest_neg<grenze)]
1598 |         
1599 |         return interest_neg_fin,np.std(z)
1600 |     
1601 |     def return_deskew_slope(self,img_patch,sigma_des):
1602 |         max_x_y=max(img_patch.shape[0],img_patch.shape[1])
1603 |         
1604 |         ##img_patch=self.resize_image(img_patch,max_x_y,max_x_y)
1605 |         
1606 | 
1607 |         
1608 |         img_patch_copy=np.zeros((img_patch.shape[0],img_patch.shape[1]))
1609 |         img_patch_copy[:,:]=img_patch[:,:]#img_patch_org[:,:,0]
1610 |         
1611 |         
1612 |         img_patch_padded=np.zeros((int( max_x_y*(1.4) ) , int( max_x_y*(1.4) ) ))
1613 |         
1614 |         img_patch_padded_center_p=int(img_patch_padded.shape[0]/2.)
1615 |         len_x_org_patch_half=int(img_patch_copy.shape[1]/2.)
1616 |         len_y_org_patch_half=int(img_patch_copy.shape[0]/2.)
1617 |         
1618 |         img_patch_padded[img_patch_padded_center_p-len_y_org_patch_half:img_patch_padded_center_p-len_y_org_patch_half+img_patch_copy.shape[0],img_patch_padded_center_p-len_x_org_patch_half:img_patch_padded_center_p-len_x_org_patch_half+img_patch_copy.shape[1] ]=img_patch_copy[:,:]
1619 |         #img_patch_padded[ int( img_patch_copy.shape[0]*(.1)):int( img_patch_copy.shape[0]*(.1))+img_patch_copy.shape[0] , int( img_patch_copy.shape[1]*(.8)):int( img_patch_copy.shape[1]*(.8))+img_patch_copy.shape[1] ]=img_patch_copy[:,:]
1620 |         angles=np.linspace(-25,25,80)
1621 | 
1622 |         res=[]
1623 |         num_of_peaks=[]
1624 |         index_cor=[]
1625 |         var_res=[]
1626 |         
1627 |         #plt.imshow(img_patch)
1628 |         #plt.show()
1629 |         indexer=0
1630 |         for rot in angles:
1631 |             #print(rot,'rot')
1632 |             img_rotated=self.rotate_image(img_patch_padded,rot)
1633 |             img_rotated[img_rotated!=0]=1
1634 |             
1635 |             #plt.imshow(img_rotated)
1636 |             #plt.show()
1637 | 
1638 |             try:
1639 |                 neg_peaks,var_spectrum=self.get_standard_deviation_of_summed_textline_patch_along_width(img_rotated,sigma_des,20.3  )
1640 |                 res_me=np.mean(neg_peaks)
1641 |                 if res_me==0:
1642 |                     res_me=1000000000000000000000
1643 |                 else:
1644 |                     pass
1645 |                     
1646 |                 res_num=len(neg_peaks)
1647 |             except:
1648 |                 res_me=1000000000000000000000
1649 |                 res_num=0
1650 |                 var_spectrum=0
1651 |             if self.isNaN(res_me):
1652 |                 pass
1653 |             else:
1654 |                 res.append( res_me )
1655 |                 var_res.append(var_spectrum)
1656 |                 num_of_peaks.append( res_num )
1657 |                 index_cor.append(indexer)
1658 |             indexer=indexer+1
1659 | 
1660 | 
1661 |         try:
1662 |             var_res=np.array(var_res)
1663 |             #print(var_res)
1664 |             
1665 |             ang_int=angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
1666 |         except:
1667 |             ang_int=0
1668 |             
1669 |         if abs(ang_int)>15:
1670 |             angles=np.linspace(-90,-50,30)
1671 |             res=[]
1672 |             num_of_peaks=[]
1673 |             index_cor=[]
1674 |             var_res=[]
1675 |             
1676 |             #plt.imshow(img_patch)
1677 |             #plt.show()
1678 |             indexer=0
1679 |             for rot in angles:
1680 |                 #print(rot,'rot')
1681 |                 img_rotated=self.rotate_image(img_patch_padded,rot)
1682 |                 img_rotated[img_rotated!=0]=1
1683 |                 
1684 |                 #plt.imshow(img_rotated)
1685 |                 #plt.show()
1686 | 
1687 |                 try:
1688 |                     neg_peaks,var_spectrum=self.get_standard_deviation_of_summed_textline_patch_along_width(img_rotated,sigma_des,20.3  )
1689 |                     res_me=np.mean(neg_peaks)
1690 |                     if res_me==0:
1691 |                         res_me=1000000000000000000000
1692 |                     else:
1693 |                         pass
1694 |                         
1695 |                     res_num=len(neg_peaks)
1696 |                 except:
1697 |                     res_me=1000000000000000000000
1698 |                     res_num=0
1699 |                     var_spectrum=0
1700 |                 if self.isNaN(res_me):
1701 |                     pass
1702 |                 else:
1703 |                     res.append( res_me )
1704 |                     var_res.append(var_spectrum)
1705 |                     num_of_peaks.append( res_num )
1706 |                     index_cor.append(indexer)
1707 |                 indexer=indexer+1
1708 | 
1709 | 
1710 |             try:
1711 |                 var_res=np.array(var_res)
1712 |                 #print(var_res)
1713 |                 
1714 |                 ang_int=angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
1715 |             except:
1716 |                 ang_int=0
1717 | 
1718 |         return ang_int
1719 | 
1720 |                           
1721 |     def do_work_of_slopes(self,queue_of_all_params,boxes_per_process,textline_mask_tot,contours_per_process):
1722 |         
1723 |         slopes_per_each_subprocess = []
1724 |         bounding_box_of_textregion_per_each_subprocess=[]
1725 |         textlines_rectangles_per_each_subprocess=[]
1726 |         contours_textregion_per_each_subprocess=[]
1727 | 
1728 |         for mv in range(len(boxes_per_process)):
1729 |             
1730 |             contours_textregion_per_each_subprocess.append(contours_per_process[mv])
1731 |             crop_img, _ = self.crop_image_inside_box(boxes_per_process[mv],
1732 |                                                                         np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
1733 |             crop_img=crop_img[:,:,0]
1734 |             crop_img=cv2.erode(crop_img,self.kernel,iterations = 2)
1735 |             
1736 |             try:
1737 |                 sigma_des=2
1738 |                 slope_corresponding_textregion=self.return_deskew_slope(crop_img,sigma_des)
1739 |             except:
1740 |                 slope_corresponding_textregion=999
1741 |             #print(slope_corresponding_textregion,'slope_corresponding_textregion')
1742 |                 
1743 |         
1744 |             if np.abs(slope_corresponding_textregion)>120.5 and slope_corresponding_textregion!=999:
1745 |                 slope_corresponding_textregion=0
1746 |             elif slope_corresponding_textregion==999:
1747 |                 slope_corresponding_textregion=0
1748 |             slopes_per_each_subprocess.append(slope_corresponding_textregion)
1749 |             
1750 |             bounding_rectangle_of_textlines = self.textline_contours_postprocessing(crop_img
1751 |                                                                                         , slope_corresponding_textregion,
1752 |                                                                                         contours_per_process[mv], boxes_per_process[mv])
1753 |             
1754 |             textlines_rectangles_per_each_subprocess.append(bounding_rectangle_of_textlines)
1755 |             bounding_box_of_textregion_per_each_subprocess.append(boxes_per_process[mv] )
1756 | 
1757 |         
1758 |         queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess])
1759 | 
1760 |     def get_slopes_and_deskew(self, contours,textline_mask_tot):
1761 |         num_cores =cpu_count()
1762 |         queue_of_all_params = Queue()
1763 |         
1764 |         processes = []
1765 |         nh=np.linspace(0, len(self.boxes), num_cores+1)
1766 |         
1767 |         
1768 |         for i in range(num_cores):
1769 |             boxes_per_process=self.boxes[int(nh[i]):int(nh[i+1])]
1770 |             contours_per_process=contours[int(nh[i]):int(nh[i+1])]
1771 |                                                                           
1772 |             processes.append(Process(target=self.do_work_of_slopes, args=(queue_of_all_params,  boxes_per_process, textline_mask_tot, contours_per_process)))
1773 |         
1774 |         for i in range(num_cores):
1775 |             processes[i].start()
1776 |             
1777 |         self.slopes = []
1778 |         self.all_found_texline_polygons=[]
1779 |         all_found_text_regions=[]
1780 |         self.boxes=[]
1781 |         
1782 |         for i in range(num_cores):
1783 |             list_all_par=queue_of_all_params.get(True)
1784 |             
1785 |             slopes_for_sub_process=list_all_par[0]
1786 |             polys_for_sub_process=list_all_par[1]
1787 |             boxes_for_sub_process=list_all_par[2]
1788 |             contours_for_subprocess=list_all_par[3]
1789 |             
1790 |             for j in range(len(slopes_for_sub_process)):
1791 |                 self.slopes.append(slopes_for_sub_process[j])
1792 |                 self.all_found_texline_polygons.append(polys_for_sub_process[j])
1793 |                 self.boxes.append(boxes_for_sub_process[j])
1794 |                 all_found_text_regions.append(contours_for_subprocess[j])
1795 |                 
1796 |         for i in range(num_cores):
1797 |             processes[i].join()
1798 | 
1799 |         return all_found_text_regions
1800 |             
1801 |         
1802 |     def order_of_regions(self, textline_mask,contours_main):
1803 |         textline_sum_along_width=textline_mask.sum(axis=1)
1804 |         
1805 |         y=textline_sum_along_width[:]
1806 |         y_padded=np.zeros(len(y)+40)
1807 |         y_padded[20:len(y)+20]=y
1808 |         x=np.array( range(len(y)) )
1809 | 
1810 | 
1811 |         peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
1812 |         
1813 | 
1814 |         sigma_gaus=8
1815 | 
1816 |         z= gaussian_filter1d(y_padded, sigma_gaus)
1817 |         zneg_rev=-y_padded+np.max(y_padded)
1818 | 
1819 |         zneg=np.zeros(len(zneg_rev)+40)
1820 |         zneg[20:len(zneg_rev)+20]=zneg_rev
1821 |         zneg= gaussian_filter1d(zneg, sigma_gaus)
1822 | 
1823 | 
1824 |         peaks, _ = find_peaks(z, height=0)
1825 |         peaks_neg, _ = find_peaks(zneg, height=0)
1826 | 
1827 |         peaks_neg=peaks_neg-20-20
1828 |         peaks=peaks-20
1829 |         
1830 | 
1831 |         
1832 |         if contours_main!=None:
1833 |             areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
1834 |             M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))]
1835 |             cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))]
1836 |             cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))]
1837 |             x_min_main=np.array([np.min(contours_main[j][:,0,0]) for j in range(len(contours_main))])
1838 |             x_max_main=np.array([np.max(contours_main[j][:,0,0]) for j in range(len(contours_main))])
1839 | 
1840 |             y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))])
1841 |             y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))])
1842 | 
1843 | 
1844 |         
1845 |         
1846 |         if contours_main!=None:
1847 |             indexer_main=np.array(range(len(contours_main)))
1848 | 
1849 |         
1850 |         if contours_main!=None:
1851 |             len_main=len(contours_main)
1852 |         else:
1853 |             len_main=0
1854 | 
1855 |         
1856 |         matrix_of_orders=np.zeros((len_main,5))
1857 |         
1858 |         matrix_of_orders[:,0]=np.array( range( len_main ) )
1859 |         
1860 |         matrix_of_orders[:len_main,1]=1
1861 |         matrix_of_orders[len_main:,1]=2
1862 |         
1863 |         matrix_of_orders[:len_main,2]=cx_main
1864 |         matrix_of_orders[:len_main,3]=cy_main
1865 | 
1866 |         matrix_of_orders[:len_main,4]=np.array( range( len_main ) )
1867 | 
1868 |         peaks_neg_new=[]
1869 |         peaks_neg_new.append(0)
1870 |         for iii in range(len(peaks_neg)):
1871 |             peaks_neg_new.append(peaks_neg[iii])
1872 |         peaks_neg_new.append(textline_mask.shape[0])
1873 |         
1874 |         final_indexers_sorted=[]
1875 |         for i in range(len(peaks_neg_new)-1):
1876 |             top=peaks_neg_new[i]
1877 |             down=peaks_neg_new[i+1]
1878 |             
1879 |             indexes_in=matrix_of_orders[:,0][(matrix_of_orders[:,3]>=top) & ((matrix_of_orders[:,3]<down))]
1880 |             cxs_in=matrix_of_orders[:,2][(matrix_of_orders[:,3]>=top) & ((matrix_of_orders[:,3]<down))]
1881 |             
1882 |             sorted_inside=np.argsort(cxs_in)
1883 |             
1884 |             ind_in_int=indexes_in[sorted_inside]
1885 |             
1886 |             for j in range(len(ind_in_int)):
1887 |                 final_indexers_sorted.append(int(ind_in_int[j]) )
1888 |         
1889 |         return final_indexers_sorted, matrix_of_orders
1890 | 
1891 |             
1892 | 
1893 |     
1894 |     def order_and_id_of_texts(self, found_polygons_text_region ,matrix_of_orders ,indexes_sorted ):
1895 |         id_of_texts=[]
1896 |         order_of_texts=[]
1897 |         index_b=0
1898 |         for mm in range(len(found_polygons_text_region)):
1899 |             id_of_texts.append('r'+str(index_b) )
1900 |             index_matrix=matrix_of_orders[:,0][( matrix_of_orders[:,1]==1 ) & ( matrix_of_orders[:,4]==mm ) ]
1901 |             order_of_texts.append(np.where(indexes_sorted == index_matrix)[0][0])
1902 | 
1903 |             index_b+=1
1904 |             
1905 |         order_of_texts
1906 |         return order_of_texts, id_of_texts
1907 |     
1908 |     def write_into_page_xml(self,contours,page_coord,dir_of_image,order_of_texts , id_of_texts):
1909 | 
1910 |         found_polygons_text_region=contours
1911 | 
1912 | 
1913 |         # create the file structure
1914 |         data = ET.Element('PcGts')
1915 | 
1916 |         data.set('xmlns',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")
1917 |         data.set('xmlns:xsi',"http://www.w3.org/2001/XMLSchema-instance")
1918 |         data.set('xsi:schemaLocation',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")
1919 | 
1920 | 
1921 | 
1922 |         metadata=ET.SubElement(data,'Metadata')
1923 | 
1924 |         author=ET.SubElement(metadata, 'Creator')
1925 |         author.text = 'SBB_QURATOR'
1926 | 
1927 | 
1928 |         created=ET.SubElement(metadata, 'Created')
1929 |         created.text = datetime.datetime.now().isoformat()
1930 |         changetime=ET.SubElement(metadata, 'LastChange')
1931 |         changetime.text = datetime.datetime.now().isoformat()
1932 | 
1933 | 
1934 |         page=ET.SubElement(data,'Page')
1935 | 
1936 |         page.set('imageFilename', self.image_dir)
1937 |         page.set('imageHeight',str(self.height_org) ) 
1938 |         page.set('imageWidth',str(self.width_org) )
1939 |         page.set('type',"content")
1940 |         page.set('readingDirection',"left-to-right")
1941 |         page.set('textLineOrder',"top-to-bottom" )
1942 | 
1943 | 
1944 |         
1945 |         page_print_sub=ET.SubElement(page, 'Border')
1946 |         coord_page = ET.SubElement(page_print_sub, 'Coords')
1947 |         points_page_print=''
1948 | 
1949 |         for lmm in range(len(self.cont_page[0])):
1950 |             if len(self.cont_page[0][lmm])==2:
1951 |                 points_page_print=points_page_print+str( int( (self.cont_page[0][lmm][0])/self.scale_x ) )
1952 |                 points_page_print=points_page_print+','
1953 |                 points_page_print=points_page_print+str( int( (self.cont_page[0][lmm][1])/self.scale_y ) )
1954 |             else:
1955 |                 points_page_print=points_page_print+str( int((self.cont_page[0][lmm][0][0])/self.scale_x) )
1956 |                 points_page_print=points_page_print+','
1957 |                 points_page_print=points_page_print+str( int((self.cont_page[0][lmm][0][1])/self.scale_y) )
1958 | 
1959 |             if lmm<(len(self.cont_page[0])-1):
1960 |                 points_page_print=points_page_print+' '
1961 |         coord_page.set('points',points_page_print)
1962 |         
1963 | 
1964 |         if len(contours)>0:
1965 |             region_order=ET.SubElement(page, 'ReadingOrder')
1966 |             region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
1967 |             
1968 |             region_order_sub.set('id',"ro357564684568544579089")
1969 |     
1970 |             args_sort=np.argsort(order_of_texts)
1971 |             for vj in args_sort:
1972 |                 name="coord_text_"+str(vj)
1973 |                 name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
1974 |                 name.set('index',str(order_of_texts[vj]) )
1975 |                 name.set('regionRef',id_of_texts[vj])
1976 |     
1977 |     
1978 |             id_indexer=0
1979 |             id_indexer_l=0
1980 |     
1981 |             for mm in range(len(found_polygons_text_region)):
1982 |                 textregion=ET.SubElement(page, 'TextRegion')
1983 |     
1984 |                 textregion.set('id','r'+str(id_indexer))
1985 |                 id_indexer+=1
1986 |                 
1987 |                 textregion.set('type','paragraph')
1988 |                 #if mm==0:
1989 |                 #    textregion.set('type','heading')
1990 |                 #else:
1991 |                 #    textregion.set('type','paragraph')
1992 |                 coord_text = ET.SubElement(textregion, 'Coords')
1993 |                 
1994 |                 points_co=''
1995 |                 for lmm in range(len(found_polygons_text_region[mm])):
1996 |                     if len(found_polygons_text_region[mm][lmm])==2:
1997 |                         points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][0] +page_coord[2])/self.scale_x ) )
1998 |                         points_co=points_co+','
1999 |                         points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][1] +page_coord[0])/self.scale_y ) )
2000 |                     else:
2001 |                         points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][0] +page_coord[2])/self.scale_x) )
2002 |                         points_co=points_co+','
2003 |                         points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][1] +page_coord[0])/self.scale_y) )
2004 |     
2005 |                     if lmm<(len(found_polygons_text_region[mm])-1):
2006 |                         points_co=points_co+' '
2007 |                 #print(points_co)
2008 |                 coord_text.set('points',points_co)
2009 |                 
2010 |                 
2011 |                 
2012 |                 for j in range(len(self.all_found_texline_polygons[mm])):
2013 |     
2014 |                     textline=ET.SubElement(textregion, 'TextLine')
2015 |                     
2016 |                     textline.set('id','l'+str(id_indexer_l))
2017 |                     
2018 |                     id_indexer_l+=1
2019 |                     
2020 |     
2021 |                     coord = ET.SubElement(textline, 'Coords')
2022 |                     #points = ET.SubElement(coord, 'Points') 
2023 |     
2024 |                     points_co=''
2025 |                     for l in range(len(self.all_found_texline_polygons[mm][j])):
2026 |                         #point = ET.SubElement(coord, 'Point') 
2027 |     
2028 |     
2029 |     
2030 |                         #point.set('x',str(found_polygons[j][l][0]))  
2031 |                         #point.set('y',str(found_polygons[j][l][1]))
2032 |                         if len(self.all_found_texline_polygons[mm][j][l])==2:
2033 |                             points_co=points_co+str( int( (self.all_found_texline_polygons[mm][j][l][0] +page_coord[2]
2034 |                                                     +self.all_box_coord[mm][2])/self.scale_x) )
2035 |                             points_co=points_co+','
2036 |                             points_co=points_co+str( int( (self.all_found_texline_polygons[mm][j][l][1] +page_coord[0]
2037 |                                                     +self.all_box_coord[mm][0])/self.scale_y) )
2038 |                         else:
2039 |                             points_co=points_co+str( int( ( self.all_found_texline_polygons[mm][j][l][0][0] +page_coord[2]
2040 |                                                     +self.all_box_coord[mm][2])/self.scale_x ) )
2041 |                             points_co=points_co+','
2042 |                             points_co=points_co+str( int( ( self.all_found_texline_polygons[mm][j][l][0][1] +page_coord[0]
2043 |                                                     +self.all_box_coord[mm][0])/self.scale_y) ) 
2044 |     
2045 |                         if l<(len(self.all_found_texline_polygons[mm][j])-1):
2046 |                             points_co=points_co+' '
2047 |                     #print(points_co)
2048 |                     coord.set('points',points_co)
2049 | 
2050 | 
2051 | 
2052 |         tree = ET.ElementTree(data)
2053 |         tree.write(os.path.join(self.dir_out, self.f_name) + ".xml")
2054 |  
2055 |     
2056 |     def run(self):
2057 |         
2058 |         #get image and scales, then extract the page of scanned image
2059 |         t1=time.time()
2060 |         self.get_image_and_scales()
2061 |         image_page,page_coord=self.extract_page()
2062 | 
2063 |         
2064 |         ##########  
2065 |         K.clear_session()
2066 |         gc.collect()
2067 |         t2=time.time()
2068 |         
2069 |         try:
2070 |             try:
2071 |                 # extract text regions and corresponding contours and surrounding box
2072 |                 text_regions=self.extract_text_regions(image_page)
2073 |                 
2074 |                 text_regions = cv2.erode(text_regions, self.kernel, iterations=3)
2075 |                 text_regions = cv2.dilate(text_regions, self.kernel, iterations=4)
2076 |                 
2077 |                 #plt.imshow(text_regions[:,:,0])
2078 |                 #plt.show()
2079 | 
2080 |                 contours=self.get_text_region_contours_and_boxes(text_regions)
2081 |             
2082 |             
2083 |             
2084 |                 ##########  
2085 |                 K.clear_session()
2086 |                 gc.collect()
2087 |             
2088 |             
2089 |             except:
2090 |                 text_regions=None
2091 |                 contours=[]
2092 |                 
2093 |                 
2094 |             t3=time.time()
2095 | 
2096 |             
2097 |             if len(contours)>0:
2098 |                 
2099 | 
2100 |                 
2101 |                 # extracting textlines using segmentation
2102 |                 textline_mask_tot=self.textline_contours(image_page)
2103 |                 ##########  
2104 |                 K.clear_session()
2105 |                 gc.collect()
2106 |                 
2107 |                 t4=time.time()
2108 |                 
2109 |                 
2110 |                 # calculate the slope for deskewing for each box of text region.
2111 |                 contours=self.get_slopes_and_deskew(contours,textline_mask_tot)
2112 |                 
2113 |                 gc.collect()
2114 |                 t5=time.time()
2115 |                 
2116 |                 
2117 |                 # get orders of each textregion. This method by now only works for one column documents. 
2118 |                 indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours)
2119 |                 order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted )
2120 |                 
2121 |                 
2122 |                 ##########  
2123 |                 gc.collect()
2124 |                 t6=time.time()
2125 |                 
2126 |                 
2127 |                 self.get_all_image_patches_coordination(image_page)
2128 |                 
2129 |                 ########## 
2130 |                 ##########  
2131 |                 gc.collect()
2132 |                 
2133 |                 t7=time.time()
2134 | 
2135 |             else:
2136 |                 contours=[]
2137 |                 order_of_texts=None
2138 |                 id_of_texts=None
2139 |             self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts)
2140 | 
2141 |             # Destroy the current Keras session/graph to free memory
2142 |             K.clear_session()
2143 |             
2144 |             print( "time total = "+"{0:.2f}".format(time.time()-t1) )
2145 |             print( "time needed for page extraction = "+"{0:.2f}".format(t2-t1) )
2146 |             print( "time needed for text region extraction and get contours = "+"{0:.2f}".format(t3-t2) )
2147 |             if len(contours)>0:
2148 |                 print( "time needed for textlines = "+"{0:.2f}".format(t4-t3) )
2149 |                 print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t5-t4) )
2150 |                 print( "time needed to get order of regions = "+"{0:.2f}".format(t6-t5) )
2151 |                 print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) )
2152 |         except:
2153 |             contours=[]
2154 |             order_of_texts=None
2155 |             id_of_texts=None
2156 |             self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts)
2157 |             print( "time total = "+"{0:.2f}".format(time.time()-t1) )
2158 |         
2159 | 
2160 |         
2161 | 
2162 | @click.command()
2163 | @click.option('--image', '-i', help='image filename',
2164 |               type=click.Path(exists=True, dir_okay=False), required=True)
2165 | @click.option('--out', '-o', help='directory to write output xml data',
2166 |               type=click.Path(exists=True, file_okay=False), required=True)
2167 | @click.option('--model', '-m', help='directory of models',
2168 |               type=click.Path(exists=True, file_okay=False), required=True)
2169 | def main(image, out, model):
2170 |     x = textline_detector(image, out, None, model)
2171 |     x.run()
2172 | 
2173 | 
2174 | if __name__ == "__main__":
2175 |     main()
2176 |  
2177 | 


--------------------------------------------------------------------------------
/qurator/sbb_textline_detector/ocrd-tool.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "0.0.1",
 3 |   "git_url": "https://github.com/qurator-spk/sbb_textline_detection",
 4 |   "tools": {
 5 |     "ocrd-sbb-textline-detector": {
 6 |       "executable": "ocrd-sbb-textline-detector",
 7 |       "categories": ["Layout analysis"],
 8 |       "description": "Printspace, region and textline detection",
 9 |       "steps": ["layout/segmentation/region", "layout/segmentation/line"],
10 |       "input_file_grp": [
11 |         "OCR-D-IMG"
12 |       ],
13 |       "output_file_grp": [
14 |         "OCR-D-SBB-SEG-LINE"
15 |       ],
16 |       "parameters": {
17 |         "model": {
18 |           "type": "string",
19 |           "format": "uri",
20 |           "content-type": "text/directory",
21 |           "cacheable": true,
22 |           "description": "Path to directory containing models to be used (See https://qurator-data.de/sbb_textline_detector/)"
23 |         }
24 |       }
25 |     }
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/qurator/sbb_textline_detector/ocrd_cli.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import tempfile
  4 | 
  5 | import click
  6 | import ocrd_models.ocrd_page
  7 | from ocrd import Processor
  8 | from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
  9 | from ocrd_modelfactory import page_from_file
 10 | from ocrd_models.ocrd_page_generateds import CoordsType, PageType
 11 | from ocrd_utils import (
 12 |     assert_file_grp_cardinality,
 13 |     getLogger,
 14 |     make_file_id,
 15 |     coordinates_for_segment,
 16 |     polygon_from_points, points_from_polygon,
 17 | )
 18 | import numpy as np
 19 | from shapely.geometry import Polygon, asPolygon
 20 | from shapely.ops import unary_union
 21 | 
 22 | from pkg_resources import resource_string
 23 | 
 24 | from qurator.sbb_textline_detector import textline_detector
 25 | 
 26 | OCRD_TOOL = json.loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))
 27 | 
 28 | 
 29 | @click.command()
 30 | @ocrd_cli_options
 31 | def ocrd_sbb_textline_detector(*args, **kwargs):
 32 |     return ocrd_cli_wrap_processor(OcrdSbbTextlineDetectorRecognize, *args, **kwargs)
 33 | 
 34 | 
 35 | TOOL = 'ocrd-sbb-textline-detector'
 36 | 
 37 | class OcrdSbbTextlineDetectorRecognize(Processor):
 38 | 
 39 |     def __init__(self, *args, **kwargs):
 40 |         kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
 41 |         kwargs['version'] = OCRD_TOOL['version']
 42 |         super(OcrdSbbTextlineDetectorRecognize, self).__init__(*args, **kwargs)
 43 | 
 44 | 
 45 |     def process(self):
 46 |         log = getLogger('processor.OcrdSbbTextlineDetectorRecognize')
 47 |         assert_file_grp_cardinality(self.input_file_grp, 1)
 48 |         assert_file_grp_cardinality(self.output_file_grp, 1)
 49 |         model = self.resolve_resource(self.parameter['model'])
 50 | 
 51 |         for (n, input_file) in enumerate(self.input_files):
 52 |             page_id = input_file.pageId or input_file.ID
 53 |             log.info("INPUT FILE %i / %s", n, input_file)
 54 | 
 55 |             file_id = make_file_id(input_file, self.output_file_grp)
 56 | 
 57 |             # Process the files
 58 |             try:
 59 |                 os.mkdir(self.output_file_grp)
 60 |             except FileExistsError:
 61 |                 pass
 62 | 
 63 |             pcgts = page_from_file(self.workspace.download_file(input_file))
 64 |             page = pcgts.get_Page()
 65 |             page_image, page_coords, page_image_info = \
 66 |                 self.workspace.image_from_page(
 67 |                     page, page_id,
 68 |                     feature_filter='cropped,binarized,grayscale_normalized'
 69 |                 )
 70 | 
 71 |             with tempfile.TemporaryDirectory() as tmp_dirname:
 72 |                 # Save the image
 73 |                 image_file = tempfile.mkstemp(dir=tmp_dirname, suffix='.png')[1]
 74 |                 page_image.save(image_file)
 75 | 
 76 |                 # Segment the image
 77 |                 x = textline_detector(image_file, tmp_dirname, file_id, model)
 78 |                 x.run()
 79 | 
 80 |                 # Read segmentation results
 81 |                 tmp_filename = os.path.join(tmp_dirname, file_id) + '.xml'
 82 |                 tmp_pcgts = ocrd_models.ocrd_page.parse(tmp_filename, silence=True)
 83 |                 tmp_page = tmp_pcgts.get_Page()
 84 | 
 85 |             # Create a new PAGE file from the input file
 86 |             pcgts.set_pcGtsId(file_id)
 87 | 
 88 |             # Merge results → PAGE file
 89 | 
 90 |             # 1. Border
 91 |             if page.get_Border():
 92 |                 log.warning("Removing existing page border")
 93 |             page.set_Border(None)
 94 |             # We need to translate the coordinates:
 95 |             text_border = adapt_coords(tmp_page.get_Border(), page, page_coords)
 96 |             if text_border is None:
 97 |                 # intersection is empty (border outside of rotated original image)
 98 |                 log.warning("new border would be empty, skipping")
 99 |             else:
100 |                 page.set_Border(text_border)
101 | 
102 |             # 2. ReadingOrder
103 |             if page.get_ReadingOrder():
104 |                 log.warning("Removing existing regions' reading order")
105 |             page.set_ReadingOrder(tmp_page.get_ReadingOrder())
106 | 
107 |             # 3. TextRegion
108 |             # FIXME: what about table and image regions?
109 |             if page.get_TextRegion():
110 |                 log.warning("Removing existing text regions")
111 |             # We need to translate the coordinates:
112 |             text_regions_new = []
113 |             for text_region in tmp_page.get_TextRegion():
114 |                 text_region = adapt_coords(text_region, page, page_coords)
115 |                 if text_region is None:
116 |                     # intersection is empty (polygon outside of above border)
117 |                     log.warning("new text region polygon would be empty, skipping")
118 |                     continue
119 |                 text_regions_new.append(text_region)
120 |                 text_lines_new = []
121 |                 for text_line in text_region.get_TextLine():
122 |                     text_line = adapt_coords(text_line, text_region, page_coords)
123 |                     if text_line is None:
124 |                         # intersection is empty (polygon outside of region)
125 |                         log.warning("new text line polygon would be empty, skipping")
126 |                         continue
127 |                     text_lines_new.append(text_line)
128 |                 text_region.set_TextLine(text_lines_new)
129 |             page.set_TextRegion(text_regions_new)
130 | 
131 |             # Save metadata about this operation
132 |             self.add_metadata(pcgts)
133 | 
134 |             self.workspace.add_file(
135 |                 ID=file_id,
136 |                 file_grp=self.output_file_grp,
137 |                 pageId=page_id,
138 |                 mimetype='application/vnd.prima.page+xml',
139 |                 local_filename=os.path.join(self.output_file_grp, file_id) + '.xml',
140 |                 content=ocrd_models.ocrd_page.to_xml(pcgts)
141 |             )
142 | 
143 | 
144 | def adapt_coords(segment, parent, transform):
145 |     points = segment.get_Coords().get_points()
146 |     polygon = polygon_from_points(points)
147 |     # polygon absolute coords (after transforming back from page coords, e.g. deskewing)
148 |     polygon_new = coordinates_for_segment(polygon, None, transform)
149 |     # intersection with parent polygon
150 |     polygon_new = polygon_for_parent(polygon_new, parent)
151 |     if polygon_new is None:
152 |         return None
153 |     points_new = points_from_polygon(polygon_new)
154 |     segment.set_Coords(CoordsType(points=points_new))
155 |     return segment
156 | 
157 | # from ocrd_tesserocr, to be integrated into core (somehow)...
158 | def polygon_for_parent(polygon, parent):
159 |     """Clip polygon to parent polygon range.
160 | 
161 |     (Should be moved to ocrd_utils.coordinates_for_segment.)
162 |     """
163 |     childp = Polygon(polygon)
164 |     if isinstance(parent, PageType):
165 |         if parent.get_Border():
166 |             parentp = Polygon(polygon_from_points(parent.get_Border().get_Coords().points))
167 |         else:
168 |             parentp = Polygon([[0, 0], [0, parent.get_imageHeight()],
169 |                                [parent.get_imageWidth(), parent.get_imageHeight()],
170 |                                [parent.get_imageWidth(), 0]])
171 |     else:
172 |         parentp = Polygon(polygon_from_points(parent.get_Coords().points))
173 |     # check if clipping is necessary
174 |     if childp.within(parentp):
175 |         return polygon
176 |     # ensure input coords have valid paths (without self-intersection)
177 |     # (this can happen when shapes valid in floating point are rounded)
178 |     childp = make_valid(childp)
179 |     parentp = make_valid(parentp)
180 |     # clip to parent
181 |     interp = childp.intersection(parentp)
182 |     if interp.is_empty or interp.area == 0.0:
183 |         # this happens if Tesseract "finds" something
184 |         # outside of the valid Border of a deskewed/cropped page
185 |         # (empty corners created by masking); will be ignored
186 |         return None
187 |     if interp.type == 'GeometryCollection':
188 |         # heterogeneous result: filter zero-area shapes (LineString, Point)
189 |         interp = unary_union([geom for geom in interp.geoms if geom.area > 0])
190 |     if interp.type == 'MultiPolygon':
191 |         # homogeneous result: construct convex hull to connect
192 |         # FIXME: construct concave hull / alpha shape
193 |         interp = interp.convex_hull
194 |     if interp.minimum_clearance < 1.0:
195 |         # follow-up calculations will necessarily be integer;
196 |         # so anticipate rounding here and then ensure validity
197 |         interp = asPolygon(np.round(interp.exterior.coords))
198 |         interp = make_valid(interp)
199 |     return interp.exterior.coords[:-1] # keep open
200 | 
201 | # from ocrd_tesserocr, to be integrated into core (somehow)...
202 | def make_valid(polygon):
203 |     for split in range(1, len(polygon.exterior.coords)-1):
204 |         if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
205 |             break
206 |         # simplification may not be possible (at all) due to ordering
207 |         # in that case, try another starting point
208 |         polygon = Polygon(polygon.exterior.coords[-split:]+polygon.exterior.coords[:-split])
209 |     for tolerance in range(1, int(polygon.area)):
210 |         if polygon.is_valid:
211 |             break
212 |         # simplification may require a larger tolerance
213 |         polygon = polygon.simplify(tolerance)
214 |     return polygon
215 | 
216 | if __name__ == '__main__':
217 |     ocrd_sbb_textline_detector()
218 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python-headless == 4.5.1.48  # XXX https://github.com/qurator-spk/sbb_textline_detection/issues/60
 2 | matplotlib
 3 | seaborn
 4 | tqdm
 5 | keras == 2.3.*
 6 | h5py < 3
 7 | shapely
 8 | scikit-learn
 9 | numpy == 1.18.*  # XXX for tensorflow-gpu 1.15
10 | tensorflow-gpu == 1.15.*
11 | scipy
12 | ocrd >= 2.22.3
13 | shapely >= 1.7.1
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from io import open
 2 | from setuptools import find_namespace_packages, setup
 3 | 
 4 | with open('requirements.txt') as fp:
 5 |     install_requires = fp.read()
 6 | 
 7 | setup(
 8 |     name="qurator-sbb-textline",
 9 |     version="0.0.1",
10 |     author="The Qurator Team",
11 |     author_email="qurator@sbb.spk-berlin.de",
12 |     description="Qurator",
13 |     long_description=open("README.md", "r", encoding='utf-8').read(),
14 |     long_description_content_type="text/markdown",
15 |     keywords='qurator',
16 |     license='Apache',
17 |     url="https://qurator.ai",
18 |     packages=find_namespace_packages(include=['qurator']),
19 |     install_requires=install_requires,
20 |     package_data={
21 |         '': ['*.json'],
22 |     },
23 |     entry_points={
24 |       'console_scripts': [
25 |         "sbb_textline_detector=qurator.sbb_textline_detector:main",
26 |         "ocrd-sbb-textline-detector=qurator.sbb_textline_detector:ocrd_sbb_textline_detector",
27 |       ]
28 |     },
29 |     python_requires='>=3.6.0',
30 |     tests_require=['pytest'],
31 |     classifiers=[
32 |           'Intended Audience :: Science/Research',
33 |           'License :: OSI Approved :: Apache Software License',
34 |           'Programming Language :: Python :: 3',
35 |           'Topic :: Scientific/Engineering :: Artificial Intelligence',
36 |     ],
37 | )
38 | 


--------------------------------------------------------------------------------