├── .gitignore ├── LICENSE ├── README.md ├── lineaug ├── __init__.py ├── libs │ ├── __init__.py │ └── ocrodeg.py ├── scripts │ ├── __init__.py │ └── augment.py └── src │ ├── Augmentation.py │ ├── __init__.py │ └── utils.py ├── requirements.txt ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .idea -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Maximilian Nöth 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LineAug 2 | Augment line images for improving OCR datasets 3 | 4 | ## Getting Started 5 | ### PyPi 6 | `pip install lineaug` 7 | ### Manually 8 | Run `python setup.py install` inside the clone repository. 9 | 10 | ## CLI 11 | ``` 12 | usage: augment.py [-h] -i [IMAGES [IMAGES ...]] [-o OUTPUT] [-gt GROUND_TRUTH] [-n N] [-bg] [-e] 13 | 14 | Augment OCR in the form of line images. 15 | 16 | optional arguments: 17 | -h, --help show this help message and exit 18 | -i [IMAGES [IMAGES ...]], --images [IMAGES [IMAGES ...]] 19 | Path to line image(s). 20 | -o OUTPUT, --output OUTPUT 21 | Output path where augmented images will be saved. 22 | -gt GROUND_TRUTH, --ground_truth GROUND_TRUTH 23 | Extension of the ground truth text files. Will create new ground truth files for the augmented line images containing the existing ground truth for the associated 24 | line (optional). 25 | -n N Number of augmented line image variants to create for each input. 26 | -bg, --background Whether to add noise to the background of the line image. 27 | -e, --enumerate Enumerate output file names instead of using input file names. 28 | ``` 29 | -------------------------------------------------------------------------------- /lineaug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxnth/LineAug/32704faa8b861ff7d91b0fd70cc7dc34ad84ccc5/lineaug/__init__.py -------------------------------------------------------------------------------- /lineaug/libs/__init__.py: -------------------------------------------------------------------------------- 1 | from .ocrodeg import * 2 | -------------------------------------------------------------------------------- /lineaug/libs/ocrodeg.py: -------------------------------------------------------------------------------- 1 | import random as pyr 2 | import warnings 3 | from random import randint 4 | 5 | import scipy.ndimage as ndi 6 | import numpy as np 7 | 8 | """ 9 | The code below is a modified version of ocrodeg by NVlabs which was taken from Ocropus-OCR 10 | (see: https://github.com/Calamari-OCR/calamari/blob/master/calamari_ocr/ocr/augmentation/data_augmenter.py). 11 | """ 12 | 13 | 14 | def autoinvert(image): 15 | assert np.amin(image) >= 0 16 | assert np.amax(image) <= 1 17 | if np.sum(image > 0.9) > np.sum(image < 0.1): 18 | return 1 - image 19 | else: 20 | return image 21 | 22 | 23 | def random_transform(translation=(-0.05, 0.05), rotation=(-2, 2), scale=(-0.1, 0.1), aniso=(-0.1, 0.1)): 24 | dx = pyr.uniform(*translation) 25 | dy = pyr.uniform(*translation) 26 | angle = pyr.uniform(*rotation) 27 | angle = angle * np.pi / 180.0 28 | scale = 10**pyr.uniform(*scale) 29 | aniso = 10**pyr.uniform(*aniso) 30 | return dict(angle=angle, scale=scale, aniso=aniso, translation=(dx, dy)) 31 | 32 | 33 | def transform_image(image, angle=0.0, scale=1.0, aniso=1.0, translation=(0, 0), order=1): 34 | dx, dy = translation 35 | scale = 1.0/scale 36 | c = np.cos(angle) 37 | s = np.sin(angle) 38 | sm = np.array([[scale / aniso, 0], [0, scale * aniso]], 'f') 39 | m = np.array([[c, -s], [s, c]], 'f') 40 | m = np.dot(sm, m) 41 | w, h = image.shape 42 | c = np.array([w, h]) / 2.0 43 | d = c - np.dot(m, c) + np.array([dx * w, dy * h]) 44 | return ndi.affine_transform(image, m, offset=d, order=order, mode="nearest", output=np.dtype("f")) 45 | 46 | 47 | def random_pad(image, horizontal=(0, 100)): 48 | l, r = np.random.randint(*horizontal, size=1), np.random.randint(*horizontal, size=1) 49 | return np.pad(image, ((l[0], r[0]), (0, 0)), mode="constant") 50 | 51 | 52 | def bounded_gaussian_noise(shape, sigma, maxdelta): 53 | n, m = shape 54 | deltas = np.random.rand(2, n, m) 55 | deltas = ndi.gaussian_filter(deltas, (0, sigma, sigma)) 56 | deltas -= np.amin(deltas) 57 | deltas /= np.amax(deltas) 58 | deltas = (2*deltas-1) * maxdelta 59 | return deltas 60 | 61 | 62 | def distort_with_noise(image, deltas, order=1): 63 | assert deltas.shape[0] == 2 64 | assert image.shape == deltas.shape[1:], (image.shape, deltas.shape) 65 | n, m = image.shape 66 | xy = np.transpose(np.array(np.meshgrid( 67 | range(n), range(m))), axes=[0, 2, 1]) 68 | deltas += xy 69 | return ndi.map_coordinates(image, deltas, order=order, mode="reflect") 70 | 71 | 72 | def noise_distort1d(shape, sigma=100.0, magnitude=100.0): 73 | h, w = shape 74 | noise = ndi.gaussian_filter(np.random.randn(w), sigma) 75 | noise *= magnitude / np.amax(abs(noise)) 76 | dys = np.array([noise]*h) 77 | deltas = np.array([dys, np.zeros((h, w))]) 78 | return deltas 79 | 80 | 81 | def percent_black(image): 82 | n = np.prod(image.shape) 83 | k = sum(image < 0.5) 84 | return k * 100.0 / n 85 | 86 | 87 | def binary_blur(image, sigma, noise=0.0): 88 | p = percent_black(image) 89 | blurred = ndi.gaussian_filter(image, sigma) 90 | if noise > 0: 91 | blurred += np.random.randn(*blurred.shape) * noise 92 | t = np.percentile(blurred, p) 93 | return np.array(blurred > t, 'f') 94 | 95 | 96 | def make_noise_at_scale(shape, scale): 97 | h, w = shape 98 | h0, w0 = int(h/scale+1), int(w/scale+1) 99 | data = np.random.rand(h0, w0) 100 | with warnings.catch_warnings(): 101 | warnings.simplefilter("ignore") 102 | result = ndi.zoom(data, scale) 103 | return result[:h, :w] 104 | 105 | 106 | def make_multiscale_noise(shape, scales, weights=None, span=(0.0, 1.0)): 107 | if weights is None: weights = [1.0] * len(scales) 108 | result = make_noise_at_scale(shape, scales[0]) * weights[0] 109 | for s, w in zip(scales, weights): 110 | result += make_noise_at_scale(shape, s) * w 111 | lo, hi = span 112 | result -= np.amin(result) 113 | result /= np.amax(result) 114 | result *= (hi-lo) 115 | result += lo 116 | return result 117 | 118 | 119 | def make_multiscale_noise_uniform(shape, srange=(1.0, 100.0), nscales=4, span=(0.0, 1.0)): 120 | lo, hi = np.log10(srange[0]), np.log10(srange[1]) 121 | scales = np.random.uniform(size=nscales) 122 | scales = np.add.accumulate(scales) 123 | scales -= np.amin(scales) 124 | scales /= np.amax(scales) 125 | scales *= hi-lo 126 | scales += lo 127 | scales = 10**scales 128 | weights = 2.0 * np.random.uniform(size=nscales) 129 | return make_multiscale_noise(shape, scales, weights=weights, span=span) 130 | 131 | 132 | def random_blobs(shape, blobdensity, size, roughness=2.0): 133 | from random import randint 134 | from builtins import range # python2 compatible 135 | h, w = shape 136 | numblobs = int(blobdensity * w * h) 137 | mask = np.zeros((h, w), 'i') 138 | for i in range(numblobs): 139 | mask[randint(0, h-1), randint(0, w-1)] = 1 140 | dt = ndi.distance_transform_edt(1-mask) 141 | mask = np.array(dt < size, 'f') 142 | mask = ndi.gaussian_filter(mask, size/(2*roughness)) 143 | mask -= np.amin(mask) 144 | mask /= np.amax(mask) 145 | noise = np.random.rand(h, w) 146 | noise = ndi.gaussian_filter(noise, size/(2*roughness)) 147 | noise -= np.amin(noise) 148 | noise /= np.amax(noise) 149 | return np.array(mask * noise > 0.5, 'f') 150 | 151 | 152 | def random_blotches(image, fgblobs, bgblobs, fgscale=10, bgscale=10): 153 | fg = random_blobs(image.shape, fgblobs, fgscale) 154 | bg = random_blobs(image.shape, bgblobs, bgscale) 155 | return np.minimum(np.maximum(image, fg), 1-bg) 156 | 157 | 158 | def make_fiber(l, a, stepsize=0.5): 159 | angles = np.random.standard_cauchy(l) * a 160 | angles[0] += 2 * np.pi * np.random.rand() 161 | angles = np.add.accumulate(angles) 162 | coss = np.add.accumulate(np.cos(angles)*stepsize) 163 | sins = np.add.accumulate(np.sin(angles)*stepsize) 164 | return np.array([coss, sins]).transpose(1, 0) 165 | 166 | 167 | def make_fibrous_image(shape, nfibers=300, l=300, a=0.2, stepsize=0.5, span=(0.1, 1.0), blur=1.0): 168 | from builtins import range # python2 compatible 169 | h, w = shape 170 | lo, hi = span 171 | result = np.zeros(shape) 172 | for i in range(nfibers): 173 | v = np.random.rand() * (hi-lo) + lo 174 | fiber = make_fiber(l, a, stepsize=stepsize) 175 | y, x = randint(0, h-1), randint(0, w-1) 176 | fiber[:, 0] += y 177 | fiber[:, 0] = np.clip(fiber[:, 0], 0, h-.1) 178 | fiber[:, 1] += x 179 | fiber[:, 1] = np.clip(fiber[:, 1], 0, w-.1) 180 | for y, x in fiber: 181 | result[int(y), int(x)] = v 182 | result = ndi.gaussian_filter(result, blur) 183 | result -= np.amin(result) 184 | result /= np.amax(result) 185 | result *= (hi-lo) 186 | result += lo 187 | return result 188 | 189 | 190 | def printlike_multiscale(image, blur=1.0, blotches=5e-5, inverted=None): 191 | if inverted: 192 | selector = image 193 | elif inverted is None: 194 | selector = autoinvert(image) 195 | else: 196 | selector = 1 - image 197 | 198 | selector = random_blotches(selector, 3*blotches, blotches) 199 | paper = make_multiscale_noise_uniform(image.shape, span=(0.8, 1.0)) 200 | ink = make_multiscale_noise_uniform(image.shape, span=(0.0, 0.2)) 201 | blurred = (ndi.gaussian_filter(selector, blur) + selector) / 2 202 | printed = blurred * ink + (1-blurred) * paper 203 | if inverted: 204 | return 1 - printed 205 | else: 206 | return printed 207 | 208 | 209 | def printlike_fibrous(image, blur=1.0, blotches=5e-5, inverted=None): 210 | if inverted: 211 | selector = image 212 | elif inverted is None: 213 | selector = autoinvert(image) 214 | else: 215 | selector = 1 - image 216 | 217 | selector = random_blotches(selector, 3*blotches, blotches) 218 | paper = make_multiscale_noise(image.shape, [1.0, 5.0, 10.0, 50.0], weights=[1.0, 0.3, 0.5, 0.3], span=(0.7, 1.0)) 219 | paper -= make_fibrous_image(image.shape, 300, 500, 0.01, span=(0.0, 0.25), blur=0.5) 220 | ink = make_multiscale_noise(image.shape, [1.0, 5.0, 10.0, 50.0], span=(0.0, 0.5)) 221 | blurred = ndi.gaussian_filter(selector, blur) 222 | printed = blurred * ink + (1-blurred) * paper 223 | if inverted: 224 | return 1 - printed 225 | else: 226 | return printed 227 | -------------------------------------------------------------------------------- /lineaug/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxnth/LineAug/32704faa8b861ff7d91b0fd70cc7dc34ad84ccc5/lineaug/scripts/__init__.py -------------------------------------------------------------------------------- /lineaug/scripts/augment.py: -------------------------------------------------------------------------------- 1 | from lineaug.src import Augmentation 2 | from lineaug.src.utils import get_gt, data_from_image 3 | 4 | from pathlib import Path 5 | import shutil 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Augment OCR in the form of line images.") 9 | 10 | parser.add_argument("-i", "--images", required=True, type=str, nargs="*", help="Path to line image(s).") 11 | parser.add_argument("-o", "--output", default=Path("../.."), type=str, 12 | help="Output path where augmented images will be saved.") 13 | parser.add_argument("-gt", "--ground_truth", type=str, help="Extension of the ground truth text files. " 14 | "Will create new ground truth files for the augmented " 15 | "line images containing the existing ground truth for the " 16 | "associated line (optional).") 17 | parser.add_argument("-n", type=int, default=1, help="Number of augmented line image variants to create for each input.") 18 | parser.add_argument("-bg", "--background", action="store_true", 19 | default=False, help="Whether to add noise to the background of the line image.") 20 | parser.add_argument("-e", "--enumerate", action="store_true", 21 | default=False, help="Enumerate output file names instead of using input file names.") 22 | 23 | args = parser.parse_args() 24 | 25 | if __name__ == "__main__": 26 | counter = 1 27 | 28 | for image in args.images: 29 | img_counter = 1 30 | image = Path(image) 31 | 32 | img = data_from_image(image) 33 | 34 | for n in range(args.n): 35 | out_filename = Path(args.output, str(counter).zfill(5)).with_suffix("".join(image.suffixes)) if args.enumerate else \ 36 | Path(args.output, f"{image.name.split('.')[0]}_{str(img_counter).zfill(4)}").with_suffix("".join(image.suffixes)) 37 | 38 | aug = Augmentation(img, args.background) 39 | aug.export(out_filename) 40 | 41 | if args.ground_truth: 42 | gt = get_gt(image, args.ground_truth) 43 | shutil.copyfile(str(gt), str(Path(out_filename.parent, 44 | out_filename.stem).with_suffix("".join(gt.suffixes)))) 45 | 46 | counter += 1 47 | img_counter += 1 48 | -------------------------------------------------------------------------------- /lineaug/src/Augmentation.py: -------------------------------------------------------------------------------- 1 | import lineaug.libs as ocrodeg 2 | 3 | import numpy as np 4 | from PIL import Image 5 | 6 | 7 | class Augmentation: 8 | """Represents a single augmentation of a line image. 9 | 10 | Args: 11 | img_data (np.ndarray): Numpy array representation of an image. 12 | bg (bool): Whether to add background noise during the augmentation. 13 | 14 | Attributes: 15 | img_data (np.ndarray): Numpy array representation of an image. 16 | bg (bool): Whether to add background noise during the augmentation. 17 | aug_data (np.ndarray): Numpy array representation of the augmented image. Basis for augmentation calculations. 18 | """ 19 | def __init__(self, img_data: np.ndarray, bg: False): 20 | self.img_data = img_data.T 21 | self.bg = bg 22 | 23 | self.aug_data = self.augment_line() 24 | 25 | def augment_line(self) -> np.ndarray: 26 | """Calculate augmented line image based on the original image data. 27 | 28 | Returns: 29 | np.ndarray: Numpy array representation of the augmented image. 30 | """ 31 | original_dtype = self.img_data.dtype 32 | data = self.img_data.astype(np.float) 33 | m = data.max() 34 | data = data / (1 if m == 0 else m) 35 | data = ocrodeg.random_pad(data, (0, data.shape[1] * 2)) 36 | 37 | for sigma in [2, 5]: 38 | noise = ocrodeg.bounded_gaussian_noise(data.shape, sigma, 3.0) 39 | data = ocrodeg.distort_with_noise(data, noise) 40 | 41 | if self.bg: 42 | data = ocrodeg.printlike_multiscale(data, blur=1, inverted=True) 43 | data = (data * 255 / data.max()).astype(original_dtype) 44 | return data.T 45 | 46 | def export(self, out_filename: str): 47 | """Exports the augmented image to image file. 48 | 49 | Args: 50 | out_filename (str): Output filename for the generated line image. 51 | """ 52 | formatted = (255 - self.aug_data * 255 / np.max(255 - self.aug_data)).astype('uint8') 53 | img = Image.fromarray(formatted).convert("RGB") 54 | img.save(out_filename) 55 | -------------------------------------------------------------------------------- /lineaug/src/__init__.py: -------------------------------------------------------------------------------- 1 | from lineaug.src import Augmentation 2 | 3 | -------------------------------------------------------------------------------- /lineaug/src/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Union 3 | 4 | import numpy as np 5 | from PIL import Image 6 | 7 | 8 | def get_gt(path: Path, suffix: str) -> Union[Path, None]: 9 | """Gets ground truth file associated with line image. 10 | 11 | Args: 12 | path (Path): Path to the line image. 13 | suffix (str): Suffix for the ground truth text files. 14 | 15 | Returns: 16 | Union[Path, None]: Path to the ground truth text file or None if not ground truth text file exists. 17 | """ 18 | gt_path = Path(path.parent, path.stem).with_suffix(suffix) 19 | if gt_path.is_file(): 20 | return gt_path 21 | return None 22 | 23 | 24 | def data_from_image(image: Path) -> np.ndarray: 25 | """Converts an image file into a numpy array for augmentation calculations. 26 | 27 | Args: 28 | image (Path): Path to the image file. 29 | Returns: 30 | np.ndarray: Numpy array representing the image. 31 | """ 32 | data = 255 - np.mean(np.array(Image.open(str(image)).convert("RGB"))[:, :, 0:2], axis=-1) 33 | return data 34 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow==9.3.0 2 | numpy==1.22.0 3 | scipy==1.4.1 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | licence-file = LICENSE -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="lineaug", 8 | version="0.1", 9 | license="MIT License", 10 | author="Maximilian Nöth", 11 | author_email="maximilian.noeth@protonmail.com", 12 | description="Augment line images for improving OCR datasets ", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/maxnth/lineaug", 16 | packages=setuptools.find_packages(), 17 | install_requires=[ 18 | "numpy>=1.18.2", 19 | "Pillow>=7.1.1", 20 | "scipy>=1.4.1" 21 | ], 22 | classifiers=[ 23 | "Programming Language :: Python :: 3", 24 | "License :: OSI Approved :: MIT License", 25 | "Operating System :: OS Independent", 26 | ], 27 | entry_points={ 28 | "console_scripts": [ 29 | "lineaug-augment = lineaug.scripts.augment:main" 30 | ] 31 | }, 32 | keywords=["Augmentation", "OCR", "optical character recognition"], 33 | python_requires='>=3.6', 34 | ) 35 | --------------------------------------------------------------------------------