├── kraken ├── lib │ ├── __init__.py │ ├── sl.py │ ├── exceptions.py │ ├── log.py │ ├── util.py │ ├── lineest.py │ ├── lstm.py │ ├── morph.py │ ├── models.py │ ├── ctc_decoder.py │ ├── train.py │ ├── codec.py │ ├── clstm_pb2.py │ └── pyrnn_pb2.py ├── script.clstm ├── __init__.py ├── templates │ ├── report │ ├── hocr │ ├── abbyyxml │ ├── style.css │ ├── alto │ └── layout.html ├── contrib │ ├── recognition_boxes.py │ └── generate_scripts.py ├── iso15924.json ├── binarization.py ├── transcribe.py ├── repo.py └── serialization.py ├── docs ├── _static │ └── kraken.png ├── gpu.rst ├── _templates │ └── sidebarintro.html ├── api.rst ├── models.rst ├── index.rst ├── vgsl.rst ├── Makefile ├── make.bat ├── advanced.rst └── conf.py ├── tests ├── resources │ ├── bw.png │ ├── input.jpg │ ├── input.tif │ ├── toy.clstm │ ├── model.pronn │ ├── model.pyrnn.gz │ └── segmentation.json ├── test_train.py ├── test_rpred.py ├── test_transcribe.py ├── test_pageseg.py ├── test_vgsl.py ├── test_models.py ├── test_binarization.py ├── test_serialization.py ├── test_layers.py └── test_codec.py ├── requirements.txt ├── setup.py ├── environment_cuda.yml ├── environment.yml ├── setup.cfg ├── .gitignore ├── .travis.yml ├── README.rst └── LICENSE /kraken/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /kraken/script.clstm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/kraken/master/kraken/script.clstm -------------------------------------------------------------------------------- /docs/_static/kraken.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/kraken/master/docs/_static/kraken.png -------------------------------------------------------------------------------- /tests/resources/bw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/bw.png -------------------------------------------------------------------------------- /tests/resources/input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/input.jpg -------------------------------------------------------------------------------- /tests/resources/input.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/input.tif -------------------------------------------------------------------------------- /tests/resources/toy.clstm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/toy.clstm -------------------------------------------------------------------------------- /tests/resources/model.pronn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/model.pronn -------------------------------------------------------------------------------- /tests/resources/model.pyrnn.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/model.pyrnn.gz -------------------------------------------------------------------------------- /kraken/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | entry point for kraken functionality 3 | """ 4 | 5 | from __future__ import absolute_import, division, print_function 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | future 3 | requests 4 | click>=7.0 5 | numpy 6 | Pillow 7 | regex 8 | scipy 9 | protobuf>=3.0.0 10 | jinja2 11 | python-bidi 12 | torchvision 13 | torch>=0.4.1 14 | coremltools 15 | -------------------------------------------------------------------------------- /docs/gpu.rst: -------------------------------------------------------------------------------- 1 | .. _gpu: 2 | 3 | GPU Acceleration 4 | ================ 5 | 6 | The latest version of kraken uses a new pytorch backend which enables GPU 7 | acceleration both for training and recognition. Apart from a compatible Nvidia 8 | GPU, CUDA and cuDNN have to be installed so pytorch can run computation on it. 9 | 10 | 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | from setuptools import setup 6 | 7 | setup( 8 | include_package_data=True, 9 | test_suite="nose.collector", 10 | tests_require=['nose', 'hocr-spec'], 11 | setup_requires=['pbr'], 12 | pbr=True, 13 | ) 14 | -------------------------------------------------------------------------------- /docs/_templates/sidebarintro.html: -------------------------------------------------------------------------------- 1 |

Useful Links

2 | 8 | -------------------------------------------------------------------------------- /environment_cuda.yml: -------------------------------------------------------------------------------- 1 | name: kraken 2 | channels: 3 | - pytorch 4 | - fastai 5 | - defaults 6 | dependencies: 7 | - python>=3.6 8 | - lxml 9 | - future 10 | - regex 11 | - requests 12 | - click>=7.0 13 | - numpy 14 | - pillow 15 | - scipy 16 | - protobuf>=3.0.0 17 | - jinja2 18 | - torchvision-nightly 19 | - pytorch-nightly 20 | - pip: 21 | - coremltools 22 | - python-bidi 23 | - git+https://github.com/mittagessen/kraken.git@master 24 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: kraken 2 | channels: 3 | - pytorch 4 | - fastai 5 | - defaults 6 | dependencies: 7 | - python>=3.6 8 | - lxml 9 | - future 10 | - regex 11 | - requests 12 | - click>=7.0 13 | - numpy 14 | - pillow 15 | - scipy 16 | - protobuf>=3.0.0 17 | - jinja2 18 | - torchvision-nightly-cpu 19 | - pytorch-nightly-cpu 20 | - pip: 21 | - coremltools 22 | - python-bidi 23 | - git+https://github.com/mittagessen/kraken.git@master 24 | -------------------------------------------------------------------------------- /kraken/templates/report: -------------------------------------------------------------------------------- 1 | === report {{ report.name }} === 2 | 3 | {{ report.chars }} Characters 4 | {{ report.errors }} Errors 5 | {{ '%0.2f'| format(report.accuracy) }}% Accuracy 6 | 7 | {{ report.insertions }} Insertions 8 | {{ report.deletions }} Deletions 9 | {{ report.substitutions }} Substitutions 10 | 11 | Count Missed %Right 12 | {% for script in report.scripts %} 13 | {{ script.count }} {{ script.errors }} {{'%0.2f'| format(script.accuracy) }}% {{ script.script }} 14 | {% endfor %} 15 | 16 | Errors Correct-Generated 17 | {% for count in report.counts %} 18 | {{ count.errors }} {{ '{ ' }}{{ count.correct }}{{ ' }' }} - {{ '{ ' }}{{ count.generated }}{{ ' }' }} 19 | {% endfor %} 20 | -------------------------------------------------------------------------------- /kraken/lib/sl.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def dim0(s): 5 | """Dimension of the slice list for dimension 0.""" 6 | return s[0].stop-s[0].start 7 | 8 | 9 | def dim1(s): 10 | """Dimension of the slice list for dimension 1.""" 11 | return s[1].stop-s[1].start 12 | 13 | 14 | def area(a): 15 | """Return the area of the slice list (ignores anything past a[:2].""" 16 | return np.prod([max(x.stop-x.start, 0) for x in a[:2]]) 17 | 18 | 19 | def width(s): 20 | return s[1].stop-s[1].start 21 | 22 | 23 | def height(s): 24 | return s[0].stop-s[0].start 25 | 26 | 27 | def aspect(a): 28 | return height(a)*1.0/width(a) 29 | 30 | 31 | def xcenter(s): 32 | return np.mean([s[1].stop, s[1].start]) 33 | 34 | 35 | def ycenter(s): 36 | return np.mean([s[0].stop, s[0].start]) 37 | 38 | 39 | def center(s): 40 | return (ycenter(s), xcenter(s)) 41 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = kraken 3 | author = Benjamin Kiessling 4 | author-email = mittagessen@l.unchti.me 5 | summary = OCR/HTR engine for all the languages 6 | home-page = http://kraken.re 7 | description-file = README.rst 8 | license = Apache 9 | classifier = 10 | Development Status :: 5 - Stable 11 | Environment :: Console 12 | Intended Audience :: Science/Research 13 | License :: OSI Approved :: Apache Software License 14 | Operating System :: POSIX 15 | Programming Language :: Python :: 3.6 16 | Programming Language :: Python :: 3.7 17 | 18 | keywords = 19 | ocr 20 | ocropus 21 | 22 | [bdist_wheel] 23 | universal = 1 24 | 25 | [files] 26 | packages = kraken 27 | 28 | [entry_points] 29 | console_scripts = 30 | kraken = kraken.kraken:cli 31 | ketos = kraken.ketos:cli 32 | 33 | [flake8] 34 | max-line-length = 160 35 | exclude = tests/* 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | [._]*.s[a-w][a-z] 2 | [._]s[a-w][a-z] 3 | *.un~ 4 | Session.vim 5 | .netrwhist 6 | *~ 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | -------------------------------------------------------------------------------- /kraken/templates/hocr: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | {% if page.scripts %} 8 | 9 | {% endif %} 10 | 11 | 12 |
13 | {% for line in page.lines %} 14 | 15 | {% for segment in line.recognition %} 16 | {{ segment.text }} 17 | {% endfor %} 18 | 19 |
20 | {% endfor %} 21 | 22 |
23 | 24 | 25 | -------------------------------------------------------------------------------- /tests/test_train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | from nose.tools import raises 5 | 6 | from kraken.lib import train 7 | from itertools import cycle 8 | 9 | class TestTrain(unittest.TestCase): 10 | """ 11 | Testing model trainer interrupter classes 12 | """ 13 | def test_early_stopping(self): 14 | """ 15 | Tests early stopping interrupter. 16 | """ 17 | it = train.EarlyStopping(cycle('a'), min_delta = 1, lag = 5) 18 | for epoch, _ in enumerate(it): 19 | it.update(epoch if epoch < 10 else 10) 20 | self.assertEqual(15, epoch) 21 | self.assertEqual(it.best_epoch, 10) 22 | self.assertEqual(it.best_loss, 10) 23 | 24 | def test_epoch_stopping(self): 25 | """ 26 | Tests stopping after n epochs. 27 | """ 28 | it = train.EpochStopping(cycle('a'), epochs = 57) 29 | for epoch, _ in enumerate(it): 30 | it.update(epoch) 31 | self.assertEqual(56, epoch) 32 | self.assertEqual(it.best_epoch, 56) 33 | self.assertEqual(it.best_loss, 56) 34 | -------------------------------------------------------------------------------- /kraken/contrib/recognition_boxes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | 6 | from PIL import Image, ImageDraw 7 | 8 | from kraken.pageseg import segment 9 | from kraken.binarization import nlbin 10 | from kraken.rpred import rpred 11 | from itertools import cycle 12 | from kraken.lib import models 13 | 14 | cmap = cycle([(230, 25, 75, 127), 15 | (60, 180, 75, 127), 16 | (255, 225, 25, 127), 17 | (0, 130, 200, 127), 18 | (245, 130, 48, 127), 19 | (145, 30, 180, 127), 20 | (70, 240, 240, 127)]) 21 | 22 | net = models.load_any(sys.argv[1]) 23 | 24 | for fname in sys.argv[2:]: 25 | im = Image.open(fname) 26 | print(fname) 27 | im = nlbin(im) 28 | res = segment(im, maxcolseps=0) 29 | pred = rpred(net, im, res) 30 | im = im.convert('RGBA') 31 | tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) 32 | draw = ImageDraw.Draw(tmp) 33 | for line in pred: 34 | for box in line.cuts: 35 | draw.rectangle(box, fill=next(cmap)) 36 | im = Image.alpha_composite(im, tmp) 37 | im.save('high_{}'.format(os.path.basename(fname))) 38 | -------------------------------------------------------------------------------- /tests/test_rpred.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | import os 6 | import unittest 7 | 8 | from PIL import Image 9 | from nose.tools import raises 10 | 11 | from kraken.lib.models import load_any 12 | from kraken.rpred import rpred 13 | from kraken.lib.exceptions import KrakenInputException 14 | 15 | thisfile = os.path.abspath(os.path.dirname(__file__)) 16 | resources = os.path.abspath(os.path.join(thisfile, 'resources')) 17 | 18 | class TestRecognition(unittest.TestCase): 19 | 20 | """ 21 | Tests of the recognition facility and associated routines. 22 | """ 23 | def setUp(self): 24 | self.im = Image.open(os.path.join(resources, 'bw.png')) 25 | 26 | def tearDown(self): 27 | self.im.close() 28 | 29 | @raises(KrakenInputException) 30 | def test_rpred_outbounds(self): 31 | """ 32 | Tests correct handling of invalid line coordinates. 33 | """ 34 | nn = load_any(os.path.join(resources, 'toy.clstm')) 35 | pred = rpred(nn, self.im, {'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal'}, True) 36 | next(pred) 37 | -------------------------------------------------------------------------------- /tests/test_transcribe.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | import os 6 | import json 7 | import unittest 8 | 9 | from PIL import Image 10 | from lxml import etree 11 | from io import BytesIO 12 | from kraken.transcribe import TranscriptionInterface 13 | 14 | thisfile = os.path.abspath(os.path.dirname(__file__)) 15 | resources = os.path.abspath(os.path.join(thisfile, 'resources')) 16 | 17 | class TestTranscriptionInterface(unittest.TestCase): 18 | 19 | """ 20 | Test of the transcription interface generation 21 | """ 22 | 23 | def test_transcription_generation(self): 24 | """ 25 | Tests creation of transcription interfaces with segmentation. 26 | """ 27 | tr = TranscriptionInterface() 28 | with open(os.path.join(resources, 'segmentation.json')) as fp: 29 | seg = json.load(fp) 30 | with Image.open(os.path.join(resources, 'input.jpg')) as im: 31 | tr.add_page(im, seg) 32 | fp = BytesIO() 33 | tr.write(fp) 34 | # this will not throw an exception ever so we need a better validator 35 | etree.HTML(fp.getvalue()) 36 | -------------------------------------------------------------------------------- /tests/resources/segmentation.json: -------------------------------------------------------------------------------- 1 | {"boxes": [[0, 29, 518, 56], [25, 54, 122, 82], [9, 74, 95, 119], [103, 75, 146, 131], [7, 138, 136, 231], [10, 228, 122, 348], [13, 230, 65, 285], [74, 304, 121, 354], [12, 353, 143, 405], [15, 450, 109, 521], [17, 511, 147, 574], [108, 544, 151, 597], [30, 591, 143, 694], [21, 696, 149, 838], [13, 832, 155, 900], [3, 880, 93, 970], [20, 989, 60, 1036], [13, 1096, 67, 1152], [87, 1502, 126, 1558], [7, 1866, 132, 1949], [21, 1978, 93, 2051], [26, 2048, 120, 2091], [518, 297, 580, 337], [654, 293, 1088, 332], [514, 353, 1294, 398], [519, 407, 1294, 447], [515, 453, 1292, 499], [518, 505, 1290, 546], [517, 553, 1292, 594], [514, 603, 1292, 647], [518, 652, 1293, 693], [519, 700, 1296, 742], [518, 750, 1296, 797], [518, 799, 1292, 841], [514, 848, 1296, 897], [515, 895, 885, 944], [517, 943, 1294, 990], [514, 995, 1351, 1043], [513, 1043, 1294, 1094], [513, 1094, 1293, 1141], [512, 1143, 1294, 1192], [512, 1192, 1293, 1240], [513, 1241, 1294, 1284], [517, 1290, 1292, 1331], [515, 1340, 1291, 1383], [514, 1388, 1295, 1438], [517, 1436, 1292, 1487], [516, 1483, 1291, 1539], [1078, 1546, 1283, 1584], [530, 1581, 1291, 1636], [514, 1639, 1291, 1689], [512, 1680, 859, 1716], [1389, 24, 1453, 45]], "text_direction": "horizontal-lr", "script_detection": false} -------------------------------------------------------------------------------- /kraken/contrib/generate_scripts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Script fetching the latest unicode Scripts.txt and dumping it as json. 4 | """ 5 | from urllib import request 6 | import json 7 | import regex 8 | 9 | uri = 'http://www.unicode.org/Public/UNIDATA/Scripts.txt' 10 | 11 | re = regex.compile('^(?P[0-9A-F]{4,6})(..(?P[0-9A-F]{4,6}))?\s+; (?P[A-Za-z]+)') 12 | 13 | with open('scripts.json', 'w') as fp, request.urlopen(uri) as req: 14 | d = [] 15 | for line in req: 16 | line = line.decode('utf-8') 17 | if line.startswith('#') or line.strip() == '': 18 | continue 19 | m = re.match(line) 20 | if m: 21 | print(line) 22 | start = int(m.group('start'), base=16) 23 | end = start 24 | if m.group('end'): 25 | end = int(m.group('end'), base=16) 26 | name = m.group('name') 27 | if len(d) > 0 and d[-1][2] == name and (start - 1 == d[-1][1] or start -1 == d[-1][0]): 28 | print('merging {} and ({}, {}, {})'.format(d[-1], start, end, name)) 29 | d[-1] = (d[-1][0], end, name) 30 | else: 31 | d.append((start, end if end != start else None, name)) 32 | json.dump(d, fp) 33 | -------------------------------------------------------------------------------- /kraken/templates/abbyyxml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | {% for line in page.lines %} 7 | 8 | {% for segment in line.recognition %} 9 | {% for char in segment.recognition %} 10 | {% if loop.first %} 11 | {{ char.text }} 12 | {% else %} 13 | {{ char.text }} 14 | {% endif %} 15 | {% endfor %} 16 | {% endfor %} 17 | 18 | 19 | {% endfor %} 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /kraken/lib/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | kraken.lib.exceptions 4 | ~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | All custom exceptions raised by kraken's modules and packages. Packages should 7 | always define their exceptions here. 8 | """ 9 | 10 | 11 | class KrakenEncodeException(Exception): 12 | 13 | def __init__(self, message=None): 14 | Exception.__init__(self, message) 15 | 16 | 17 | class KrakenRecordException(Exception): 18 | 19 | def __init__(self, message=None): 20 | Exception.__init__(self, message) 21 | 22 | 23 | class KrakenInvalidModelException(Exception): 24 | 25 | def __init__(self, message=None): 26 | Exception.__init__(self, message) 27 | 28 | 29 | class KrakenInputException(Exception): 30 | 31 | def __init__(self, message=None): 32 | Exception.__init__(self, message) 33 | 34 | 35 | class KrakenRepoException(Exception): 36 | 37 | def __init__(self, message=None): 38 | Exception.__init__(self, message) 39 | 40 | 41 | class KrakenCairoSurfaceException(Exception): 42 | """ 43 | Raised when the Cairo surface couldn't be created. 44 | 45 | Attributes: 46 | message (str): Error message 47 | width (int): Width of the surface 48 | height (int): Height of the surface 49 | """ 50 | def __init__(self, message: str, width: int, height: int) -> None: 51 | self.message = message 52 | self.width = width 53 | self.height = height 54 | 55 | def __repr__(self) -> str: 56 | return repr(self.message) 57 | -------------------------------------------------------------------------------- /tests/test_pageseg.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | import unittest 6 | import os 7 | 8 | from PIL import Image 9 | from nose.tools import raises 10 | 11 | from kraken.pageseg import segment 12 | from kraken.lib.exceptions import KrakenInputException 13 | 14 | thisfile = os.path.abspath(os.path.dirname(__file__)) 15 | resources = os.path.abspath(os.path.join(thisfile, 'resources')) 16 | 17 | 18 | class TestPageSeg(unittest.TestCase): 19 | 20 | """ 21 | Tests of the page segmentation functionality 22 | """ 23 | @raises(KrakenInputException) 24 | def test_segment_color(self): 25 | """ 26 | Test correct handling of color input. 27 | """ 28 | with Image.open(os.path.join(resources, 'input.jpg')) as im: 29 | segment(im) 30 | 31 | def test_segment_bw(self): 32 | """ 33 | Tests segmentation of bi-level input. 34 | """ 35 | with Image.open(os.path.join(resources, 'bw.png')) as im: 36 | lines = segment(im) 37 | # test if line count is roughly correct 38 | self.assertAlmostEqual(len(lines['boxes']), 30, msg='Segmentation differs ' 39 | 'wildly from true line count', delta=5) 40 | # check if lines do not extend beyond image 41 | for box in lines['boxes']: 42 | self.assertLess(0, box[0], msg='Line x0 < 0') 43 | self.assertLess(0, box[1], msg='Line y0 < 0') 44 | self.assertGreater(im.size[0], box[2], msg='Line x1 > {}'.format(im.size[0])) 45 | self.assertGreater(im.size[1], box[3], msg='Line y1 > {}'.format(im.size[1])) 46 | -------------------------------------------------------------------------------- /kraken/templates/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | background: #f3f3f3; 3 | {% if font.family %} 4 | font-family: {{ font.family }}; 5 | {% endif %} 6 | {% if font.style %} 7 | font-style: {{ font.style }}; 8 | {% endif %} 9 | {% if font.weight %} 10 | font-style: {{ font.weight }}; 11 | {% endif %} 12 | } 13 | 14 | [contenteditable=true]:empty:before { 15 | content: attr(data-placeholder); 16 | display: block; /* For Firefox */ 17 | } 18 | 19 | li[contenteditable=true]:hover, li[contenteditable=true].hovered, span[contenteditable=true]:hover, span[contenteditable=true].hovered { 20 | border: 1px solid #ff0000; 21 | } 22 | 23 | .rect:hover, a.hovered { 24 | box-shadow: inset 0 0 0 1px #ff0000; 25 | } 26 | 27 | li[contenteditable=true]{ 28 | border: 1px dashed #000; 29 | width: 100%; 30 | padding: 2px; 31 | margin: 0 0 5px 0; 32 | } 33 | 34 | ul { 35 | list-style-type:none; 36 | } 37 | 38 | nav { 39 | background: #444; 40 | position: fixed; 41 | top: 0; 42 | left: 0; 43 | height: 100%; 44 | width: 10%; 45 | font-family: "Helvetica Neue", Arial, sans-serif; 46 | } 47 | 48 | nav ul { 49 | list-style: none; 50 | margin-right: 1em; 51 | } 52 | 53 | nav li { 54 | display : inline-block; 55 | } 56 | 57 | nav a { 58 | color: white; 59 | text-decoration: none; 60 | } 61 | 62 | nav a:hover { 63 | text-decoration: underline; 64 | } 65 | 66 | .container { 67 | position: relative; 68 | margin-left: 15%; 69 | display: table; 70 | height: 100%; 71 | width: 85%; 72 | } 73 | 74 | .img_container { 75 | position: relative; 76 | } 77 | 78 | .column { 79 | display: table-cell; 80 | vertical-align: top; 81 | width: 50%; 82 | height: 100%; 83 | padding: 1rem; 84 | } 85 | 86 | #download_button { 87 | position: fixed; 88 | padding: 0; 89 | text-align: center; 90 | width: 10%; 91 | bottom: 50px; 92 | } 93 | 94 | .corrected { 95 | background-color: #73AD21; 96 | } 97 | 98 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | matrix: 3 | include: 4 | - python: 3.6 5 | - python: 3.7 6 | dist: xenial 7 | sudo: required 8 | notifications: 9 | email: false 10 | sudo: false 11 | install: 12 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 13 | - bash miniconda.sh -b -p $HOME/miniconda 14 | - export PATH="$HOME/miniconda/bin:$PATH" 15 | - conda config --set always_yes yes --set changeps1 no --set show_channel_urls yes 16 | - conda update conda 17 | - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION 18 | - source activate test-environment 19 | - conda install pbr nose pip 20 | - sed '/coremltools\|python-bidi\|regex\|torch/d' requirements.txt | xargs conda install 21 | - conda install torchvision-cpu -c pytorch 22 | - conda uninstall pytorch-cpu -c pytorch 23 | - conda install pytorch-nightly-cpu -c pytorch 24 | - pip install -r requirements.txt 25 | - conda list 26 | - pip freeze 27 | - python setup.py install 28 | script: 29 | - python setup.py nosetests 30 | deploy: 31 | provider: pypi 32 | username: ogl-iris 33 | distributions: sdist bdist_wheel 34 | skip_cleanup: true 35 | skip_upload_docs: true 36 | on: 37 | tags: true 38 | password: 39 | secure: i/TwRgfux3ebFtTgg8Od/7KGHr1AZgHJ/9r4Yop7HoZhKsgSW8Q3e65K/LJ9aQFxmggeneAdOZFboStl9li48FpfFTqJy9TioSyaDoxDv5oPmUDFKHzbjExlupa7BzeL/OaNYSzkD8S2CIcnaiQspFASCWy0pHvveTU0MvdeaFbZ+lEdwH7Kb4DotzRA2p0wOwuq84P6Vunqi9UEvVP4e/f2j1Hin+zGs08nnxfC8A1XXkKZlnnRtbaGqKkzcSyeYFDcHfFENU1E3KEbeR6xqpWgZla/WIxnQTjUaZy9/RVLja8JLoPI86WofYScKcvYRUBPX74RBgjQhpNusuZ1umGxG+1C5TzF705YqWdYCM96qqUA/hBlDSngk+ZjraPJAtSPlJCx6VaiuIu8VPgP2jcazKaMduq5C6NT0XJtNUS22cdoox3Fzhhf/f6mLPMeBxQJewYo3Qbj86Ll5M8O5SmGdwAnmGDEwL0+cqb5oULXQcK1fJMnqR68KqSoFq89zNdTEEHTjMCLJO9Yfjmpd6iY33nOXhCEWNFRKEQVbeyFcudQemDxSSGTq2LNrgzMjJj4O3chjqbU9y5KiQF5lpH28/S/ele7VrbpX9bbn3/QmSQnJhByiypOQ2vEricn3aEoToE8Ws//OCmqItoOYTzRNHs/EUST0Zah2W/LTX8= 40 | -------------------------------------------------------------------------------- /tests/test_vgsl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | from nose.tools import raises 5 | 6 | import os 7 | import torch 8 | import tempfile 9 | from kraken.lib import vgsl 10 | 11 | 12 | class TestVGSL(unittest.TestCase): 13 | """ 14 | Testing VGSL module 15 | """ 16 | def test_helper_train(self): 17 | """ 18 | Tests train/eval mode helper methods 19 | """ 20 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]') 21 | rnn.train() 22 | self.assertTrue(torch.is_grad_enabled()) 23 | self.assertTrue(rnn.nn.training) 24 | rnn.eval() 25 | self.assertFalse(torch.is_grad_enabled()) 26 | self.assertFalse(rnn.nn.training) 27 | 28 | def test_helper_threads(self): 29 | """ 30 | Test openmp threads helper method. 31 | """ 32 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]') 33 | rnn.set_num_threads(4) 34 | self.assertEqual(torch.get_num_threads(), 4) 35 | 36 | def test_save_model(self): 37 | """ 38 | Test model serialization. 39 | """ 40 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]') 41 | with tempfile.TemporaryDirectory() as dir: 42 | rnn.save_model(dir + '/foo.mlmodel') 43 | self.assertTrue(os.path.exists(dir + '/foo.mlmodel')) 44 | 45 | def test_resize(self): 46 | """ 47 | Tests resizing of output layers. 48 | """ 49 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]') 50 | rnn.resize_output(80) 51 | self.assertEqual(rnn.nn[-1].lin.out_features, 80) 52 | 53 | def test_del_resize(self): 54 | """ 55 | Tests resizing of output layers with entry deletion. 56 | """ 57 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]') 58 | rnn.resize_output(80, [2, 4, 5, 6, 7, 12, 25]) 59 | self.assertEqual(rnn.nn[-1].lin.out_features, 80) 60 | -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | import os 4 | import tempfile 5 | import pickle 6 | 7 | from nose.tools import raises 8 | 9 | import kraken.lib.lstm 10 | 11 | from kraken.lib import models 12 | from kraken.lib.exceptions import KrakenInvalidModelException 13 | 14 | thisfile = os.path.abspath(os.path.dirname(__file__)) 15 | resources = os.path.abspath(os.path.join(thisfile, 'resources')) 16 | 17 | class TestModels(unittest.TestCase): 18 | """ 19 | Testing model loading routines 20 | """ 21 | 22 | def setUp(self): 23 | self.temp = tempfile.NamedTemporaryFile(delete=False) 24 | 25 | def tearDown(self): 26 | self.temp.close() 27 | os.unlink(self.temp.name) 28 | 29 | @raises(KrakenInvalidModelException) 30 | def test_load_invalid(self): 31 | """ 32 | Tests correct handling of invalid files. 33 | """ 34 | models.load_any(self.temp.name) 35 | 36 | def test_load_clstm(self): 37 | """ 38 | Tests loading of valid clstm files. 39 | """ 40 | rnn = models.load_any(os.path.join(resources, 'toy.clstm').encode('utf-8')) 41 | self.assertIsInstance(rnn, models.TorchSeqRecognizer) 42 | 43 | @raises(KrakenInvalidModelException) 44 | def test_load_pyrnn_no_seqrecognizer(self): 45 | """ 46 | Test correct handling of non-SeqRecognizer pickles. 47 | """ 48 | pickle.dump(u'Iámnõtãrécðçnízer', self.temp) 49 | self.temp.close() 50 | models.load_any(self.temp.name) 51 | 52 | @raises(KrakenInvalidModelException) 53 | def test_load_any_pyrnn_py3(self): 54 | """ 55 | Test load_any doesn't load pickled models on python 3 56 | """ 57 | rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz')) 58 | 59 | def test_load_any_proto(self): 60 | """ 61 | Test load_any loads protobuf models. 62 | """ 63 | rnn = models.load_any(os.path.join(resources, 'model.pronn')) 64 | self.assertIsInstance(rnn, kraken.lib.models.TorchSeqRecognizer) 65 | -------------------------------------------------------------------------------- /tests/test_binarization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | import unittest 6 | import os 7 | 8 | from PIL import Image 9 | from kraken.binarization import nlbin 10 | 11 | thisfile = os.path.abspath(os.path.dirname(__file__)) 12 | resources = os.path.abspath(os.path.join(thisfile, 'resources')) 13 | 14 | class TestBinarization(unittest.TestCase): 15 | 16 | """ 17 | Tests of the nlbin function for binarization of images 18 | """ 19 | def test_not_binarize_bw(self): 20 | """ 21 | Test that mode '1' images aren't binarized again. 22 | """ 23 | with Image.new('1', (1000,1000)) as im: 24 | self.assertEqual(im, nlbin(im)) 25 | 26 | def test_binarize_no_bw(self): 27 | """ 28 | Tests binarization of image formats without a 1bpp mode (JPG). 29 | """ 30 | with Image.open(os.path.join(resources, 'input.jpg')) as im: 31 | res = nlbin(im) 32 | # calculate histogram and check if only pixels of value 0/255 exist 33 | self.assertEqual(254, res.histogram().count(0), msg='Output not ' 34 | 'binarized') 35 | 36 | def test_binarize_tif(self): 37 | """ 38 | Tests binarization of RGB TIFF images. 39 | """ 40 | with Image.open(os.path.join(resources, 'input.tif')) as im: 41 | res = nlbin(im) 42 | # calculate histogram and check if only pixels of value 0/255 exist 43 | self.assertEqual(254, res.histogram().count(0), msg='Output not ' 44 | 'binarized') 45 | 46 | def test_binarize_grayscale(self): 47 | """ 48 | Test binarization of mode 'L' images. 49 | """ 50 | with Image.open(os.path.join(resources, 'input.tif')) as im: 51 | res = nlbin(im.convert('L')) 52 | # calculate histogram and check if only pixels of value 0/255 exist 53 | self.assertEqual(254, res.histogram().count(0), msg='Output not ' 54 | 'binarized') 55 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | kraken API 2 | ========== 3 | 4 | .. module:: kraken 5 | 6 | Kraken provides routines which are usable by third party tools. In general 7 | you can expect function in the ``kraken`` package to remain stable. We will try 8 | to keep these backward compatible, but as kraken is still in an early 9 | development stage and the API is still quite rudimentary nothing can be 10 | garantueed. 11 | 12 | kraken.binarization module 13 | -------------------------- 14 | 15 | .. automodule:: kraken.binarization 16 | :members: 17 | :show-inheritance: 18 | 19 | kraken.serialization module 20 | --------------------------- 21 | 22 | .. automodule:: kraken.serialization 23 | :members: 24 | :show-inheritance: 25 | 26 | kraken.pageseg module 27 | --------------------- 28 | 29 | .. automodule:: kraken.pageseg 30 | :members: 31 | :show-inheritance: 32 | 33 | kraken.rpred module 34 | ------------------- 35 | 36 | .. automodule:: kraken.rpred 37 | :members: 38 | :show-inheritance: 39 | 40 | kraken.transcribe module 41 | ------------------------ 42 | 43 | .. automodule:: kraken.transcribe 44 | :members: 45 | :show-inheritance: 46 | 47 | kraken.linegen module 48 | --------------------- 49 | 50 | .. automodule:: kraken.linegen 51 | :members: 52 | :show-inheritance: 53 | 54 | kraken.lib.models module 55 | ------------------------ 56 | 57 | .. automodule:: kraken.lib.models 58 | :members: 59 | :show-inheritance: 60 | 61 | kraken.lib.vgsl module 62 | ---------------------- 63 | 64 | .. automodule:: kraken.lib.vgsl 65 | :members: 66 | :show-inheritance: 67 | 68 | kraken.lib.codec 69 | ---------------- 70 | 71 | .. automodule:: kraken.lib.codec 72 | :members: 73 | :show-inheritance: 74 | 75 | kraken.lib.train module 76 | ----------------------- 77 | 78 | .. automodule:: kraken.lib.train 79 | :members: 80 | :show-inheritance: 81 | 82 | kraken.lib.dataset module 83 | ------------------------- 84 | 85 | .. automodule:: kraken.lib.dataset 86 | :members: 87 | :show-inheritance: 88 | 89 | kraken.lib.ctc_decoder 90 | ---------------------- 91 | 92 | .. automodule:: kraken.lib.ctc_decoder 93 | :members: 94 | :show-inheritance: 95 | -------------------------------------------------------------------------------- /docs/models.rst: -------------------------------------------------------------------------------- 1 | .. _models: 2 | 3 | Models 4 | ====== 5 | 6 | There are currently three kinds of models containing the recurrent neural 7 | networks doing all the character recognition supported by kraken: ``pronn`` 8 | files serializing old pickled ``pyrnn`` models as protobuf, clstm's native 9 | serialization, and versatile `Core ML 10 | `_ models. 11 | 12 | .. _pyrnn: 13 | 14 | pyrnn 15 | ----- 16 | 17 | These are serialized instances of python ``lstm.SeqRecognizer`` objects. Using 18 | such a model just entails loading the pickle and calling the appropriate 19 | functions to perform recognition much like a shared library in other 20 | programming languages. 21 | 22 | Support for these models has been dropped with kraken 1.0 as python 2.7 is 23 | phased out. 24 | 25 | pronn 26 | ----- 27 | 28 | Legacy python models can be converted to a protobuf based serialization. These 29 | are loadable by kraken 1.0 and will be automatically converted to Core ML. 30 | 31 | Protobuf models have several advantages over pickled ones. They are noticeably 32 | smaller (80Mb vs 1.8Mb for the default model), don't allow arbitrary code 33 | execution, and are upward compatible with python 3. Because they are so much 34 | more lightweight they are also loaded much faster. 35 | 36 | clstm 37 | ----- 38 | 39 | `clstm `_, a small and fast implementation of 40 | LSTM networks that was used in previous kraken versions. The model files can be 41 | loaded with pytorch-based kraken and will be converted to Core ML. 42 | 43 | CoreML 44 | ------ 45 | 46 | Core ML allows arbitrary network architectures in a compact serialization with 47 | metadata. This is the default format in pytorch-based kraken. 48 | 49 | Conversion 50 | ---------- 51 | 52 | Per default pronn/clstm models are automatically converted to the new Core ML 53 | format when explicitely defined using the ``-m`` option to the ``ocr`` utility 54 | on the command line. They are stored in the user kraken directory (default is 55 | ~/.kraken) and will be automatically substituted in future runs. 56 | 57 | If conversion is not desired, e.g. because there is a bug in the conversion 58 | routine, it can be disabled using the ``--disable-autoconversion`` switch. 59 | -------------------------------------------------------------------------------- /kraken/lib/log.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2018 Benjamin Kiessling 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | """ 17 | kraken.lib.log 18 | ~~~~~~~~~~~~~~~~~ 19 | 20 | Handlers and formatters for logging. 21 | """ 22 | import time 23 | import click 24 | import logging 25 | 26 | 27 | class LogHandler(logging.Handler): 28 | def emit(self, record): 29 | msg = self.format(record) 30 | level = record.levelname.lower() 31 | err = level in ('warning', 'error', 'exception', 'critical') 32 | click.echo(msg, err=err) 33 | 34 | 35 | class LogFormatter(logging.Formatter): 36 | colors = { 37 | 'error': dict(fg='red'), 38 | 'exception': dict(fg='red'), 39 | 'critical': dict(fg='red'), 40 | 'warning': dict(fg='yellow'), 41 | } 42 | 43 | st_time = time.time() 44 | 45 | def format(self, record): 46 | if not record.exc_info: 47 | level = record.levelname.lower() 48 | msg = record.msg 49 | if level in self.colors: 50 | style = self.colors[level] 51 | else: 52 | style = {} 53 | msg = click.style(u'[{:2.4f}] {} '.format(time.time() - self.st_time, str(msg)), **style) 54 | return msg 55 | return logging.Formatter.format(self, record) 56 | 57 | 58 | def progressbar(*args, **kwargs): 59 | """ 60 | Slight extension to click's progressbar disabling output on when log level 61 | is set below 30. 62 | """ 63 | import logging 64 | logger = logging.getLogger(__name__) 65 | bar = click.progressbar(*args, **kwargs) 66 | if logger.getEffectiveLevel() < 30: 67 | bar.is_hidden = True # type: ignore 68 | return bar 69 | 70 | 71 | def set_logger(logger=None, level=logging.ERROR): 72 | handler = LogHandler() 73 | handler.setFormatter(LogFormatter()) 74 | logger.addHandler(handler) 75 | logger.setLevel(level) 76 | -------------------------------------------------------------------------------- /tests/test_serialization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | import unittest 6 | import json 7 | import os 8 | 9 | from lxml import etree 10 | from io import StringIO 11 | from hocr_spec import HocrValidator 12 | 13 | from kraken import rpred 14 | from kraken import serialization 15 | 16 | thisfile = os.path.abspath(os.path.dirname(__file__)) 17 | resources = os.path.abspath(os.path.join(thisfile, 'resources')) 18 | 19 | class TestSerializations(unittest.TestCase): 20 | """ 21 | Tests for output serialization 22 | """ 23 | def setUp(self): 24 | with open(os.path.join(resources, 'records.json'), 'r') as fp: 25 | self.records = [rpred.ocr_record(**x) for x in json.load(fp)] 26 | self.validator = HocrValidator('standard') 27 | 28 | def test_vertical_hocr_serialization(self): 29 | """ 30 | Test vertical line hOCR serialization 31 | """ 32 | fp = StringIO() 33 | 34 | fp.write(serialization.serialize(self.records, image_name='foo.png', writing_mode='vertical-lr', template='hocr')) 35 | fp.seek(0) 36 | 37 | report = self.validator.validate(fp, parse_strict=True) 38 | self.assertTrue(report.is_valid()) 39 | 40 | def test_hocr_serialization(self): 41 | """ 42 | Test hOCR serialization 43 | """ 44 | fp = StringIO() 45 | 46 | fp.write(serialization.serialize(self.records, image_name='foo.png', template='hocr')) 47 | fp.seek(0) 48 | 49 | report = self.validator.validate(fp, parse_strict=True) 50 | self.assertTrue(report.is_valid()) 51 | 52 | def test_alto_serialization_validation(self): 53 | """ 54 | Validates output against ALTO schema 55 | """ 56 | fp = StringIO() 57 | 58 | fp.write(serialization.serialize(self.records, image_name='foo.png', template='alto')) 59 | doc = etree.fromstring(fp.getvalue().encode('utf-8')) 60 | with open(os.path.join(resources, 'alto-4-0.xsd')) as schema_fp: 61 | alto_schema = etree.XMLSchema(etree.parse(schema_fp)) 62 | alto_schema.assertValid(doc) 63 | 64 | def test_abbyyxml_serialization_validation(self): 65 | """ 66 | Validates output against abbyyXML schema 67 | """ 68 | fp = StringIO() 69 | 70 | fp.write(serialization.serialize(self.records, image_name='foo.png', template='abbyyxml')) 71 | doc = etree.fromstring(fp.getvalue().encode('utf-8')) 72 | with open(os.path.join(resources, 'FineReader10-schema-v1.xml')) as schema_fp: 73 | abbyy_schema = etree.XMLSchema(etree.parse(schema_fp)) 74 | abbyy_schema.assertValid(doc) 75 | -------------------------------------------------------------------------------- /kraken/iso15924.json: -------------------------------------------------------------------------------- 1 | {"520": "Tang", "20": "Xsux", "30": "Xpeo", "550": "Blis", "40": "Ugar", "50": "Egyp", "570": "Brai", "60": "Egyh", "437": "Loma", "70": "Egyd", "80": "Hluw", "90": "Maya", "95": "Sgnw", "610": "Inds", "100": "Mero", "101": "Merc", "105": "Sarb", "106": "Narb", "620": "Roro", "115": "Phnx", "116": "Lydi", "120": "Tfng", "123": "Samr", "124": "Armi", "125": "Hebr", "126": "Palm", "127": "Hatr", "130": "Prti", "131": "Phli", "132": "Phlp", "133": "Phlv", "134": "Avst", "135": "Syrc", "136": "Syrn", "137": "Syrj", "138": "Syre", "139": "Mani", "140": "Mand", "145": "Mong", "159": "Nbat", "160": "Arab", "161": "Aran", "165": "Nkoo", "166": "Adlm", "170": "Thaa", "175": "Orkh", "176": "Hung", "200": "Grek", "201": "Cari", "202": "Lyci", "204": "Copt", "206": "Goth", "210": "Ital", "211": "Runr", "212": "Ogam", "215": "Latn", "216": "Latg", "217": "Latf", "218": "Moon", "219": "Osge", "220": "Cyrl", "221": "Cyrs", "225": "Glag", "226": "Elba", "227": "Perm", "230": "Armn", "239": "Aghb", "240": "Geor", "241": "Geok", "755": "Dupl", "250": "Dsrt", "259": "Bass", "260": "Osma", "261": "Olck", "262": "Wara", "263": "Pauc", "264": "Mroo", "265": "Medf", "280": "Visp", "281": "Shaw", "282": "Plrd", "284": "Jamo", "285": "Bopo", "286": "Hang", "287": "Kore", "288": "Kits", "290": "Teng", "291": "Cirt", "292": "Sara", "293": "Piqd", "300": "Brah", "302": "Sidd", "305": "Khar", "310": "Guru", "312": "Gong", "313": "Gonm", "314": "Mahj", "315": "Deva", "316": "Sylo", "317": "Kthi", "318": "Sind", "319": "Shrd", "320": "Gujr", "321": "Takr", "322": "Khoj", "323": "Mult", "324": "Modi", "325": "Beng", "326": "Tirh", "327": "Orya", "328": "Dogr", "329": "Soyo", "330": "Tibt", "331": "Phag", "332": "Marc", "333": "Newa", "334": "Bhks", "335": "Lepc", "336": "Limb", "337": "Mtei", "338": "Ahom", "339": "Zanb", "340": "Telu", "343": "Gran", "344": "Saur", "345": "Knda", "346": "Taml", "347": "Mlym", "348": "Sinh", "349": "Cakm", "350": "Mymr", "351": "Lana", "352": "Thai", "353": "Tale", "354": "Talu", "355": "Khmr", "356": "Laoo", "357": "Kali", "358": "Cham", "359": "Tavt", "360": "Bali", "361": "Java", "362": "Sund", "363": "Rjng", "364": "Leke", "365": "Batk", "366": "Maka", "367": "Bugi", "370": "Tglg", "371": "Hano", "372": "Buhd", "373": "Tagb", "900": "Qaaa", "398": "Sora", "399": "Lisu", "400": "Lina", "401": "Linb", "403": "Cprt", "410": "Hira", "411": "Kana", "412": "Hrkt", "413": "Jpan", "420": "Nkgb", "430": "Ethi", "435": "Bamu", "436": "Kpel", "949": "Qabx", "438": "Mend", "439": "Afak", "440": "Cans", "445": "Cher", "450": "Hmng", "460": "Yiii", "470": "Vaii", "480": "Wole", "993": "Zsye", "994": "Zinh", "995": "Zmth", "996": "Zsym", "997": "Zxxx", "998": "Zyyy", "999": "Zzzz", "499": "Nshu", "500": "Hani", "501": "Hans", "502": "Hant", "503": "Hanb", "505": "Kitl", "510": "Jurc"} 2 | -------------------------------------------------------------------------------- /kraken/templates/alto: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | pixel 7 | 8 | {{ page.name }} 9 | 10 | 11 | 12 | 13 | kraken 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | {% for line in page.lines %} 24 | 29 | {% for segment in line.recognition %} 30 | {# ALTO forbids encoding whitespace before any String/Shape tags #} 31 | {% if segment.text is whitespace and loop.index > 1 %} 32 | 37 | {% else %} 38 | 45 | {% for char in segment.recognition %} 46 | 53 | 54 | {% endfor %} 55 | 56 | {% endif %} 57 | {% endfor %} 58 | 59 | {% endfor %} 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /kraken/lib/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ocropus's magic PIL-numpy array conversion routines. They express slightly 3 | different behavior from PIL.Image.toarray(). 4 | """ 5 | import unicodedata 6 | import numpy as np 7 | 8 | from PIL import Image 9 | 10 | __all__ = ['pil2array', 'array2pil'] 11 | 12 | 13 | def pil2array(im: Image, alpha: int = 0) -> np.array: 14 | if im.mode == '1': 15 | return np.array(im.convert('L')) 16 | return np.array(im) 17 | 18 | 19 | def array2pil(a: np.array) -> Image: 20 | if a.dtype == np.dtype("B"): 21 | if a.ndim == 2: 22 | return Image.frombytes("L", (a.shape[1], a.shape[0]), 23 | a.tostring()) 24 | elif a.ndim == 3: 25 | return Image.frombytes("RGB", (a.shape[1], a.shape[0]), 26 | a.tostring()) 27 | else: 28 | raise Exception("bad image rank") 29 | elif a.dtype == np.dtype('float32'): 30 | return Image.frombytes("F", (a.shape[1], a.shape[0]), a.tostring()) 31 | else: 32 | raise Exception("unknown image type") 33 | 34 | 35 | def is_bitonal(im: Image) -> bool: 36 | """ 37 | Tests a PIL.Image for bitonality. 38 | 39 | Args: 40 | im (PIL.Image): Image to test 41 | 42 | Returns: 43 | True if the image contains only two different color values. False 44 | otherwise. 45 | """ 46 | return im.getcolors(2) is not None 47 | 48 | 49 | def get_im_str(im: Image) -> str: 50 | return im.filename if hasattr(im, 'filename') else str(im) 51 | 52 | 53 | def is_printable(char: str) -> bool: 54 | """ 55 | Determines if a chode point is printable/visible when printed. 56 | 57 | Args: 58 | char (str): Input code point. 59 | 60 | Returns: 61 | True if printable, False otherwise. 62 | """ 63 | letters = ('LC', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu') 64 | numbers = ('Nd', 'Nl', 'No') 65 | punctuation = ('Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps') 66 | symbol = ('Sc', 'Sk', 'Sm', 'So') 67 | printable = letters + numbers + punctuation + symbol 68 | 69 | return unicodedata.category(char) in printable 70 | 71 | 72 | def make_printable(char: str) -> str: 73 | """ 74 | Takes a Unicode code point and return a printable representation of it. 75 | 76 | Args: 77 | char (str): Input code point 78 | 79 | Returns: 80 | Either the original code point, the name of the code point if it is a 81 | combining mark, whitespace etc., or the hex code if it is a control 82 | symbol. 83 | """ 84 | if not char or is_printable(char): 85 | return char 86 | elif unicodedata.category(char) in ('Cc', 'Cs', 'Co'): 87 | return '0x{:x}'.format(ord(char)) 88 | else: 89 | return unicodedata.name(char) 90 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Description 2 | =========== 3 | 4 | .. image:: https://travis-ci.org/mittagessen/kraken.svg?branch=master 5 | :target: https://travis-ci.org/mittagessen/kraken 6 | 7 | kraken is a fork of ocropus intended to rectify a number of issues while 8 | preserving (mostly) functional equivalence. Its main features are: 9 | 10 | - Script detection and multiscript recognition support 11 | - `Right-to-Left `_, `BiDi 12 | `_, and Top-to-Bottom 13 | script support 14 | - `ALTO `_, abbyXML, and hOCR output 15 | - Word bounding boxes and character cuts 16 | - `Public repository `_ of model files 17 | - Dynamic recognition model architectures and GPU acceleration 18 | - Clean public API 19 | 20 | Installation 21 | ============ 22 | 23 | When using a recent version of pip all dependencies will be installed from 24 | binary wheel packages, so installing build-essential or your distributions 25 | equivalent is often unnecessary. 26 | 27 | Install the latest master version through `conda `_: 28 | 29 | :: 30 | 31 | $ wget https://raw.githubusercontent.com/mittagessen/kraken/master/environment.yml 32 | $ conda env create -f environment.yml 33 | 34 | or: 35 | 36 | :: 37 | 38 | $ wget https://raw.githubusercontent.com/mittagessen/kraken/master/environment_cuda.yml 39 | $ conda env create -f environment_cuda.yml 40 | 41 | for CUDA acceleration with the appropriate hardware. 42 | 43 | It is also possible to install the stable version with the old clstm backend from pypi: 44 | 45 | :: 46 | 47 | $ pip install kraken 48 | 49 | Finally you'll have to scrounge up a model to do the actual recognition of 50 | characters. To download the default model for printed English text and place it 51 | in the kraken directory for the current user: 52 | 53 | :: 54 | 55 | $ kraken get default 56 | 57 | A list of libre models available in the central repository can be retrieved by 58 | running: 59 | 60 | :: 61 | 62 | $ kraken list 63 | 64 | Quickstart 65 | ========== 66 | 67 | Recognizing text on an image using the default parameters including the 68 | prerequisite steps of binarization and page segmentation: 69 | 70 | :: 71 | 72 | $ kraken -i image.tif image.txt binarize segment ocr 73 | 74 | To binarize a single image using the nlbin algorithm: 75 | 76 | :: 77 | 78 | $ kraken -i image.tif bw.png binarize 79 | 80 | To segment a binarized image into reading-order sorted lines: 81 | 82 | :: 83 | 84 | $ kraken -i bw.png lines.json segment 85 | 86 | To OCR a binarized image using the default RNN and the previously generated 87 | page segmentation: 88 | 89 | :: 90 | 91 | $ kraken -i bw.png image.txt ocr --lines lines.json 92 | 93 | All subcommands and options are documented. Use the ``help`` option to get more 94 | information. 95 | 96 | Documentation 97 | ============= 98 | 99 | Have a look at the `docs `_ 100 | 101 | Funding 102 | ======= 103 | 104 | kraken is developed at `Université PSL `_. 105 | -------------------------------------------------------------------------------- /kraken/lib/lineest.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import PIL 3 | import numpy as np 4 | 5 | from kraken.lib.util import pil2array, array2pil 6 | from scipy.ndimage import interpolation, filters 7 | 8 | __all__ = ['CenterNormalizer', 'dewarp'] 9 | 10 | 11 | def scale_to_h(img, target_height, order=1, dtype=np.dtype('f'), cval=0): 12 | h, w = img.shape 13 | scale = target_height*1.0/h 14 | target_width = int(scale*w) 15 | with warnings.catch_warnings(): 16 | warnings.simplefilter('ignore', UserWarning) 17 | output = interpolation.affine_transform(1.0*img, np.ones(2)/scale, 18 | order=order, 19 | output_shape=(target_height, 20 | target_width), 21 | mode='constant', cval=cval) 22 | output = np.array(output, dtype=dtype) 23 | return output 24 | 25 | 26 | class CenterNormalizer(object): 27 | def __init__(self, target_height=48, params=(4, 1.0, 0.3)): 28 | self.target_height = target_height 29 | self.range, self.smoothness, self.extra = params 30 | 31 | def setHeight(self, target_height): 32 | self.target_height = target_height 33 | 34 | def measure(self, line): 35 | h, w = line.shape 36 | # XXX: this filter is awfully slow 37 | smoothed = filters.gaussian_filter(line, (h*0.5, h*self.smoothness), 38 | mode='constant') 39 | smoothed += 0.001*filters.uniform_filter(smoothed, (h*0.5, w), 40 | mode='constant') 41 | self.shape = (h, w) 42 | a = np.argmax(smoothed, axis=0) 43 | a = filters.gaussian_filter(a, h*self.extra) 44 | self.center = np.array(a, 'i') 45 | deltas = np.abs(np.arange(h)[:, np.newaxis]-self.center[np.newaxis, :]) 46 | self.mad = np.mean(deltas[line != 0]) 47 | self.r = int(1+self.range*self.mad) 48 | 49 | def dewarp(self, img, cval=0, dtype=np.dtype('f')): 50 | if img.shape != self.shape: 51 | raise Exception('Measured and dewarp image shapes different') 52 | h, w = img.shape 53 | padded = np.vstack([cval*np.ones((h, w)), img, cval*np.ones((h, w))]) 54 | center = self.center+h 55 | dewarped = [padded[center[i]-self.r:center[i]+self.r, i] for i in 56 | range(w)] 57 | dewarped = np.array(dewarped, dtype=dtype).T 58 | return dewarped 59 | 60 | def normalize(self, img, order=1, dtype=np.dtype('f'), cval=0): 61 | dewarped = self.dewarp(img, cval=cval, dtype=dtype) 62 | h, w = dewarped.shape 63 | scaled = scale_to_h(dewarped, self.target_height, order=order, 64 | dtype=dtype, cval=cval) 65 | return scaled 66 | 67 | 68 | def dewarp(normalizer: CenterNormalizer, im: PIL.Image) -> PIL.Image: 69 | """ 70 | Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer 71 | instance. 72 | 73 | Args: 74 | normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer 75 | instance 76 | im (PIL.Image): Image to dewarp 77 | 78 | Returns: 79 | PIL.Image containing the dewarped image. 80 | """ 81 | line = pil2array(im) 82 | temp = np.amax(line)-line 83 | temp = temp*1.0/np.amax(temp) 84 | normalizer.measure(temp) 85 | line = normalizer.normalize(line, cval=np.amax(line)) 86 | return array2pil(line) 87 | -------------------------------------------------------------------------------- /kraken/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 10 | 11 | 12 | 15 | 73 | 81 | 82 | {% for page in pages %} 83 |
84 |
85 |
86 | photo 87 | {% for line in page.lines %} 88 | 89 | {% endfor %} 90 |
91 |
92 |
93 |
    94 | {% for line in page.lines %} 95 |
  • 96 | {% if line.text %} 97 | {{ line.text }} 98 | {% endif %} 99 |
  • 100 | {% endfor %} 101 |
102 |
103 |
104 | {% endfor %} 105 | 106 | 107 | -------------------------------------------------------------------------------- /kraken/lib/lstm.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | import numpy as np 3 | 4 | from typing import Dict 5 | from scipy.ndimage import measurements 6 | from scipy.special import expit 7 | 8 | initial_range = 0.1 9 | 10 | 11 | class Codec(object): 12 | """Translate between integer codes and characters.""" 13 | def init(self, charset): 14 | charset = sorted(list(set(charset))) 15 | self.code2char = {} # type: Dict[int, str] 16 | self.char2code = {} # type: Dict[str, int] 17 | for code,char in enumerate(charset): 18 | self.code2char[code] = char 19 | self.char2code[char] = code 20 | return self 21 | def size(self): 22 | """The total number of codes (use this for the number of output 23 | classes when training a classifier.""" 24 | return len(list(self.code2char.keys())) 25 | def encode(self, s): 26 | "Encode the string `s` into a code sequence." 27 | tab = self.char2code 28 | dflt = self.char2code["~"] 29 | return [self.char2code.get(c,dflt) for c in s] 30 | def decode(self, l): 31 | "Decode a code sequence into a string." 32 | s = [self.code2char.get(c,"~") for c in l] 33 | return s 34 | 35 | class Network: 36 | def predict(self,xs): 37 | """Prediction is the same as forward propagation.""" 38 | return self.forward(xs) 39 | 40 | class Softmax(Network): 41 | """A logistic regression network.""" 42 | def __init__(self,Nh,No,initial_range=0.1,rand=None): 43 | pass 44 | def ninputs(self): 45 | pass 46 | def noutputs(self): 47 | pass 48 | def forward(self,ys): 49 | pass 50 | def backward(self,deltas): 51 | pass 52 | 53 | 54 | class LSTM(Network): 55 | """A standard LSTM network. This is a direct implementation of all the forward 56 | and backward propagation formulas, mainly for speed. (There is another, more 57 | abstract implementation as well, but that's significantly slower in Python 58 | due to function call overhead.)""" 59 | def __init__(self,ni,ns,initial=0.1,maxlen=5000): 60 | pass 61 | 62 | def init_weights(self,initial): 63 | pass 64 | 65 | def allocate(self,n): 66 | pass 67 | 68 | def reset(self,n): 69 | pass 70 | 71 | def forward(self,xs): 72 | pass 73 | 74 | ################################################################ 75 | # combination classifiers 76 | ################################################################ 77 | 78 | class Stacked(Network): 79 | """Stack two networks on top of each other.""" 80 | def __init__(self,nets): 81 | self.nets = nets 82 | def forward(self,xs): 83 | pass 84 | 85 | class Reversed(Network): 86 | """Run a network on the time-reversed input.""" 87 | def __init__(self,net): 88 | self.net = net 89 | def forward(self,xs): 90 | pass 91 | 92 | class Parallel(Network): 93 | """Run multiple networks in parallel on the same input.""" 94 | def __init__(self,*nets): 95 | self.nets = nets 96 | def forward(self,xs): 97 | pass 98 | 99 | def BIDILSTM(Ni,Ns,No): 100 | """A bidirectional LSTM, constructed from regular and reversed LSTMs.""" 101 | lstm1 = LSTM(Ni,Ns) 102 | lstm2 = Reversed(LSTM(Ni,Ns)) 103 | bidi = Parallel(lstm1,lstm2) 104 | logreg = Softmax(2*Ns,No) 105 | stacked = Stacked([bidi,logreg]) 106 | return stacked 107 | 108 | 109 | class SeqRecognizer(Network): 110 | """Perform sequence recognition using BIDILSTM and alignment.""" 111 | def __init__(self,ninput,nstates,noutput=-1,codec=None,normalize=None): 112 | self.Ni = ninput 113 | if codec: noutput = codec.size() 114 | self.No = noutput 115 | self.lstm = BIDILSTM(ninput,nstates,noutput) 116 | self.codec = codec 117 | def translate_back(self, output): 118 | pass 119 | def translate_back_locations(self, output): 120 | pass 121 | def predictSequence(self,xs): 122 | "Predict an integer sequence of codes." 123 | pass 124 | def l2s(self,l): 125 | "Convert a code sequence into a unicode string after recognition." 126 | l = self.codec.decode(l) 127 | return u"".join(l) 128 | def predictString(self,xs): 129 | "Predict output as a string. This uses codec and normalizer." 130 | pass 131 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | kraken 2 | ====== 3 | 4 | .. toctree:: 5 | :hidden: 6 | :maxdepth: 2 7 | 8 | advanced 9 | Training 10 | API 11 | Models 12 | 13 | kraken is a turn-key OCR system forked from `ocropus 14 | `_. It is intended to rectify a number of 15 | issues while preserving (mostly) functional equivalence. 16 | 17 | Features 18 | ======== 19 | 20 | kraken's main features are: 21 | 22 | - Script detection and multi-script recognition support 23 | - `Right-to-Left `_, `BiDi 24 | `_, and Top-to-Bottom 25 | script support 26 | - `ALTO `_, abbyXML, and hOCR output 27 | - Word bounding boxes and character cuts 28 | - `Public repository `_ of model files 29 | - :ref:`Lightweight model files ` 30 | - :ref:`Variable recognition network architectures ` 31 | 32 | All functionality not pertaining to OCR and prerequisite steps has been 33 | removed, i.e. no more error rate measuring, etc. 34 | 35 | Pull requests and code contributions are always welcome. 36 | 37 | Installation 38 | ============ 39 | 40 | kraken requires some external libraries to run. On Debian/Ubuntu they may be 41 | installed using: 42 | 43 | .. code-block:: console 44 | 45 | # apt install libpangocairo-1.0 libxml2 libblas3 liblapack3 python3-dev python3-pip 46 | 47 | pip 48 | --- 49 | 50 | .. code-block:: console 51 | 52 | $ pip3 install kraken 53 | 54 | or by running pip in the git repository: 55 | 56 | .. code-block:: console 57 | 58 | $ pip3 install . 59 | 60 | conda 61 | ----- 62 | 63 | If you are running `Anaconda `_/miniconda, use: 64 | 65 | .. code-block:: console 66 | 67 | $ conda install -c mittagessen kraken 68 | 69 | Models 70 | ------ 71 | 72 | Finally you'll have to scrounge up a recognition model to do the actual 73 | recognition of characters. To download the default English text recognition 74 | model and place it in the user's kraken directory: 75 | 76 | .. code-block:: console 77 | 78 | $ kraken get default 79 | 80 | A list of libre models available in the central repository can be retrieved by 81 | running: 82 | 83 | .. code-block:: console 84 | 85 | $ kraken list 86 | 87 | Model metadata can be extracted using: 88 | 89 | .. code-block:: console 90 | 91 | $ kraken show arabic-alam-al-kutub 92 | name: arabic-alam-al-kutub.clstm 93 | 94 | An experimental model for Classical Arabic texts. 95 | 96 | Network trained on 889 lines of [0] as a test case for a general Classical 97 | Arabic model. Ground truth was prepared by Sarah Savant 98 | and Maxim Romanov . 99 | 100 | Vocalization was omitted in the ground truth. Training was stopped at ~35000 101 | iterations with an accuracy of 97%. 102 | 103 | [0] Ibn al-Faqīh (d. 365 AH). Kitāb al-buldān. Edited by Yūsuf al-Hādī, 1st 104 | edition. Bayrūt: ʿĀlam al-kutub, 1416 AH/1996 CE. 105 | alphabet: !()-.0123456789:[] «»،؟ءابةتثجحخدذرزسشصضطظعغفقكلمنهوىي ARABIC 106 | MADDAH ABOVE, ARABIC HAMZA ABOVE, ARABIC HAMZA BELOW 107 | 108 | Quickstart 109 | ========== 110 | 111 | Recognizing text on an image using the default parameters including the 112 | prerequisite steps of binarization and page segmentation: 113 | 114 | .. code-block:: console 115 | 116 | $ kraken -i image.tif image.txt binarize segment ocr 117 | Loading RNN ✓ 118 | Processing ⣻ 119 | 120 | To binarize a single image using the nlbin algorithm: 121 | 122 | .. code-block:: console 123 | 124 | $ kraken -i image.tif bw.tif binarize 125 | 126 | To segment a binarized image into reading-order sorted lines: 127 | 128 | .. code-block:: console 129 | 130 | $ kraken -i bw.tif lines.json segment 131 | 132 | To OCR a binarized image using the default RNN and the previously generated 133 | page segmentation: 134 | 135 | .. code-block:: console 136 | 137 | $ kraken -i bw.tif image.txt ocr --lines lines.json 138 | 139 | All commands and their parameters are documented, just add the standard 140 | ``--help`` flag for further information. 141 | 142 | Training Tutorial 143 | ================= 144 | 145 | There is a training tutorial at :doc:`training`. 146 | 147 | .. _license: 148 | 149 | License 150 | ======= 151 | 152 | ``Kraken`` is provided under the terms and conditions of the `Apache 2.0 153 | License `_ retained 154 | from the original ``ocropus`` distribution. 155 | -------------------------------------------------------------------------------- /kraken/binarization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015 Benjamin Kiessling 4 | # 2014 Thomas M. Breuel 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 15 | # or implied. See the License for the specific language governing 16 | # permissions and limitations under the License. 17 | """ 18 | kraken.binarization 19 | ~~~~~~~~~~~~~~~~~~~ 20 | 21 | An adaptive binarization algorithm. 22 | """ 23 | import warnings 24 | import logging 25 | import numpy as np 26 | 27 | from PIL import Image 28 | from kraken.lib.util import pil2array, array2pil, is_bitonal, get_im_str 29 | from scipy.ndimage import filters, interpolation, morphology 30 | 31 | from kraken.lib.exceptions import KrakenInputException 32 | 33 | __all__ = ['nlbin'] 34 | 35 | logger = logging.getLogger(__name__) 36 | 37 | 38 | def nlbin(im: Image, 39 | threshold: float = 0.5, 40 | zoom: float = 0.5, 41 | escale: float = 1.0, 42 | border: float = 0.1, 43 | perc: int = 80, 44 | range: int = 20, 45 | low: int = 5, 46 | high: int = 90) -> Image: 47 | """ 48 | Performs binarization using non-linear processing. 49 | 50 | Args: 51 | im (PIL.Image): 52 | threshold (float): 53 | zoom (float): Zoom for background page estimation 54 | escale (float): Scale for estimating a mask over the text region 55 | border (float): Ignore this much of the border 56 | perc (int): Percentage for filters 57 | range (int): Range for filters 58 | low (int): Percentile for black estimation 59 | high (int): Percentile for white estimation 60 | 61 | Returns: 62 | PIL.Image containing the binarized image 63 | 64 | Raises: 65 | KrakenInputException when trying to binarize an empty image. 66 | """ 67 | im_str = get_im_str(im) 68 | logger.info(u'Binarizing {}'.format(im_str)) 69 | if is_bitonal(im): 70 | logger.info(u'Skipping binarization because {} is bitonal.'.format(im_str)) 71 | return im 72 | # convert to grayscale first 73 | logger.debug(u'Converting {} to grayscale'.format(im_str)) 74 | im = im.convert('L') 75 | raw = pil2array(im) 76 | logger.debug(u'Scaling and normalizing') 77 | # rescale image to between -1 or 0 and 1 78 | raw = raw/np.float(np.iinfo(raw.dtype).max) 79 | # perform image normalization 80 | if np.amax(raw) == np.amin(raw): 81 | logger.warning(u'Trying to binarize empty image {}'.format(im_str)) 82 | raise KrakenInputException('Image is empty') 83 | image = raw-np.amin(raw) 84 | image /= np.amax(image) 85 | 86 | logger.debug(u'Interpolation and percentile filtering') 87 | with warnings.catch_warnings(): 88 | warnings.simplefilter('ignore', UserWarning) 89 | m = interpolation.zoom(image, zoom) 90 | m = filters.percentile_filter(m, perc, size=(range, 2)) 91 | m = filters.percentile_filter(m, perc, size=(2, range)) 92 | m = interpolation.zoom(m, 1.0/zoom) 93 | w, h = np.minimum(np.array(image.shape), np.array(m.shape)) 94 | flat = np.clip(image[:w, :h]-m[:w, :h]+1, 0, 1) 95 | 96 | # estimate low and high thresholds 97 | d0, d1 = flat.shape 98 | o0, o1 = int(border*d0), int(border*d1) 99 | est = flat[o0:d0-o0, o1:d1-o1] 100 | logger.debug(u'Threshold estimates {}'.format(est)) 101 | # by default, we use only regions that contain 102 | # significant variance; this makes the percentile 103 | # based low and high estimates more reliable 104 | logger.debug(u'Refine estimates') 105 | v = est-filters.gaussian_filter(est, escale*20.0) 106 | v = filters.gaussian_filter(v**2, escale*20.0)**0.5 107 | v = (v > 0.3*np.amax(v)) 108 | v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1))) 109 | v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50)))) 110 | est = est[v] 111 | lo = np.percentile(est.ravel(), low) 112 | hi = np.percentile(est.ravel(), high) 113 | 114 | flat -= lo 115 | flat /= (hi-lo) 116 | flat = np.clip(flat, 0, 1) 117 | logger.debug(u'Thresholding at {}'.format(threshold)) 118 | bin = np.array(255*(flat > threshold), 'B') 119 | return array2pil(bin) 120 | -------------------------------------------------------------------------------- /kraken/lib/morph.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various add-ons to the SciPy morphology package 3 | """ 4 | import numpy as np 5 | from scipy.ndimage import morphology, measurements, filters 6 | 7 | 8 | def label(image: np.array, **kw) -> np.array: 9 | """ 10 | Redefine the scipy.ndimage.measurements.label function to work with a wider 11 | range of data types. The default function is inconsistent about the data 12 | types it accepts on different platforms. 13 | """ 14 | try: 15 | return measurements.label(image, **kw) 16 | except Exception: 17 | pass 18 | types = ["int32", "uint32", "int64", "uint64", "int16", "uint16"] 19 | for t in types: 20 | try: 21 | return measurements.label(np.array(image, dtype=t), **kw) 22 | except Exception: 23 | pass 24 | # let it raise the same exception as before 25 | return measurements.label(image, **kw) 26 | 27 | 28 | def find_objects(image: np.array, **kw) -> np.array: 29 | """ 30 | Redefine the scipy.ndimage.measurements.find_objects function to work with 31 | a wider range of data types. The default function is inconsistent about 32 | the data types it accepts on different platforms. 33 | """ 34 | try: 35 | return measurements.find_objects(image, **kw) 36 | except Exception: 37 | pass 38 | types = ["int32", "uint32", "int64", "uint64", "int16", "uint16"] 39 | for t in types: 40 | try: 41 | return measurements.find_objects(np.array(image, dtype=t), **kw) 42 | except Exception: 43 | pass 44 | # let it raise the same exception as before 45 | return measurements.find_objects(image, **kw) 46 | 47 | 48 | def r_dilation(image, size, origin=0): 49 | """Dilation with rectangular structuring element using maximum_filter""" 50 | return filters.maximum_filter(image, size, origin=origin) 51 | 52 | 53 | def r_erosion(image, size, origin=0): 54 | """Erosion with rectangular structuring element using maximum_filter""" 55 | return filters.minimum_filter(image, size, origin=origin) 56 | 57 | 58 | def rb_dilation(image, size, origin=0): 59 | """Binary dilation using linear filters.""" 60 | output = np.zeros(image.shape, 'f') 61 | filters.uniform_filter(image, size, output=output, origin=origin, 62 | mode='constant', cval=0) 63 | return np.array(output > 0, 'i') 64 | 65 | 66 | def rb_erosion(image, size, origin=0): 67 | """Binary erosion using linear filters.""" 68 | output = np.zeros(image.shape, 'f') 69 | filters.uniform_filter(image, size, output=output, origin=origin, 70 | mode='constant', cval=1) 71 | return np.array(output == 1, 'i') 72 | 73 | 74 | def rb_opening(image, size, origin=0): 75 | """Binary opening using linear filters.""" 76 | image = rb_erosion(image, size, origin=origin) 77 | return rb_dilation(image, size, origin=origin) 78 | 79 | 80 | def spread_labels(labels, maxdist=9999999): 81 | """Spread the given labels to the background""" 82 | distances, features = morphology.distance_transform_edt(labels == 0, 83 | return_distances=1, 84 | return_indices=1) 85 | indexes = features[0] * labels.shape[1] + features[1] 86 | spread = labels.ravel()[indexes.ravel()].reshape(*labels.shape) 87 | spread *= (distances < maxdist) 88 | return spread 89 | 90 | 91 | def correspondences(labels1, labels2): 92 | """Given two labeled images, compute an array giving the correspondences 93 | between labels in the two images.""" 94 | q = 100000 95 | combo = labels1 * q + labels2 96 | result = np.unique(combo) 97 | result = np.array([result // q, result % q]) 98 | return result 99 | 100 | 101 | def propagate_labels(image, labels, conflict=0): 102 | """Given an image and a set of labels, apply the labels 103 | to all the regions in the image that overlap a label. 104 | Assign the value `conflict` to any labels that have a conflict.""" 105 | rlabels, _ = label(image) 106 | cors = correspondences(rlabels, labels) 107 | outputs = np.zeros(np.amax(rlabels) + 1, 'i') 108 | oops = -(1 << 30) 109 | for o, i in cors.T: 110 | if outputs[o] != 0: 111 | outputs[o] = oops 112 | else: 113 | outputs[o] = i 114 | outputs[outputs == oops] = conflict 115 | outputs[0] = 0 116 | return outputs[rlabels] 117 | 118 | 119 | def select_regions(binary, f, min=0, nbest=100000): 120 | """Given a scoring function f over slice tuples (as returned by 121 | find_objects), keeps at most nbest regions whose scores is higher 122 | than min.""" 123 | labels, n = label(binary) 124 | objects = find_objects(labels) 125 | scores = [f(o) for o in objects] 126 | best = np.argsort(scores) 127 | keep = np.zeros(len(objects) + 1, 'i') 128 | if nbest > 0: 129 | for i in best[-nbest:]: 130 | if scores[i] <= min: 131 | continue 132 | keep[i+1] = 1 133 | return keep[labels] 134 | -------------------------------------------------------------------------------- /kraken/transcribe.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015 Benjamin Kiessling 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | """ 17 | Utility functions for ground truth transcription. 18 | """ 19 | from kraken.lib.exceptions import KrakenInputException 20 | from kraken.lib.util import get_im_str 21 | 22 | from typing import List 23 | 24 | from jinja2 import Environment, PackageLoader 25 | from io import BytesIO 26 | 27 | import uuid 28 | import base64 29 | import logging 30 | 31 | logger = logging.getLogger() 32 | 33 | 34 | class TranscriptionInterface(object): 35 | 36 | def __init__(self, font=None, font_style=None): 37 | logging.info(u'Initializing transcription object.') 38 | logger.debug(u'Initializing jinja environment.') 39 | env = Environment(loader=PackageLoader('kraken', 'templates'), autoescape=True) 40 | logger.debug(u'Loading transcription template.') 41 | self.tmpl = env.get_template('layout.html') 42 | self.pages = [] # type: List[dict] 43 | self.font = {'font': font, 'style': font_style} 44 | self.text_direction = 'horizontal-tb' 45 | self.page_idx = 1 46 | self.line_idx = 1 47 | self.seg_idx = 1 48 | 49 | def add_page(self, im, segmentation=None, records=None): 50 | """ 51 | Adds an image to the transcription interface, optionally filling in 52 | information from a list of ocr_record objects. 53 | 54 | Args: 55 | im (PIL.Image): Input image 56 | segmentation (dict): Output of the segment method. 57 | records (list): A list of ocr_record objects. 58 | """ 59 | im_str = get_im_str(im) 60 | logger.info(u'Adding page {} with {} lines'.format(im_str, len(segmentation) if segmentation else len(records))) 61 | page = {} 62 | fd = BytesIO() 63 | im.save(fd, format='png', optimize=True) 64 | page['index'] = self.page_idx 65 | self.page_idx += 1 66 | logger.debug(u'Base64 encoding image') 67 | page['img'] = 'data:image/png;base64,' + base64.b64encode(fd.getvalue()).decode('ascii') 68 | page['lines'] = [] 69 | if records: 70 | logger.debug(u'Adding records.') 71 | self.text_direction = segmentation['text_direction'] 72 | for record, bbox in zip(records, segmentation['boxes']): 73 | page['lines'].append({'index': self.line_idx, 'text': record.prediction, 74 | 'left': 100*int(bbox[0]) / im.size[0], 75 | 'top': 100*int(bbox[1]) / im.size[1], 76 | 'width': 100*(bbox[2] - bbox[0])/im.size[0], 77 | 'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1], 78 | 'bbox': '{}, {}, {}, {}'.format(int(bbox[0]), 79 | int(bbox[1]), 80 | int(bbox[2]), 81 | int(bbox[3]))}) 82 | 83 | self.line_idx += 1 84 | elif segmentation: 85 | logger.debug(u'Adding segmentations.') 86 | self.text_direction = segmentation['text_direction'] 87 | for bbox in segmentation['boxes']: 88 | page['lines'].append({'index': self.line_idx, 89 | 'left': 100*int(bbox[0]) / im.size[0], 90 | 'top': 100*int(bbox[1]) / im.size[1], 91 | 'width': 100*(bbox[2] - bbox[0])/im.size[0], 92 | 'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1], 93 | 'bbox': '{}, {}, {}, {}'.format(int(bbox[0]), 94 | int(bbox[1]), 95 | int(bbox[2]), 96 | int(bbox[3]))}) 97 | self.line_idx += 1 98 | else: 99 | raise KrakenInputException('Neither segmentations nor records given') 100 | self.pages.append(page) 101 | 102 | def write(self, fd): 103 | """ 104 | Writes the HTML file to a file descriptor. 105 | 106 | Args: 107 | fd (File): File descriptor (mode='rb') to write to. 108 | """ 109 | logger.info(u'Rendering and writing transcription.') 110 | fd.write(self.tmpl.render(uuid=str(uuid.uuid4()), pages=self.pages, 111 | font=self.font, 112 | text_direction=self.text_direction).encode('utf-8')) 113 | -------------------------------------------------------------------------------- /kraken/lib/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | kraken.lib.models 3 | ~~~~~~~~~~~~~~~~~ 4 | 5 | Wrapper around TorchVGSLModel including a variety of forward pass helpers for 6 | sequence classification. 7 | """ 8 | from os.path import expandvars, expanduser, abspath 9 | 10 | import torch 11 | import numpy as np 12 | import kraken.lib.lineest 13 | import kraken.lib.ctc_decoder 14 | 15 | from typing import List, Tuple 16 | 17 | from kraken.lib.vgsl import TorchVGSLModel 18 | from kraken.lib.exceptions import KrakenInvalidModelException, KrakenInputException 19 | 20 | __all__ = ['TorchSeqRecognizer', 'load_any'] 21 | 22 | import logging 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | class TorchSeqRecognizer(object): 28 | """ 29 | A class wrapping a TorchVGSLModel with a more comfortable recognition interface. 30 | """ 31 | def __init__(self, nn, decoder=kraken.lib.ctc_decoder.greedy_decoder, train: bool = False, device: str = 'cpu') -> None: 32 | """ 33 | Constructs a sequence recognizer from a VGSL model and a decoder. 34 | 35 | Args: 36 | nn (kraken.lib.vgsl.TorchVGSLModel): neural network used for recognition 37 | decoder (func): Decoder function used for mapping softmax 38 | activations to labels and positions 39 | train (bool): Enables or disables gradient calculation 40 | device (torch.Device): Device to run model on 41 | """ 42 | self.nn = nn 43 | self.kind = '' 44 | if train: 45 | self.nn.train() 46 | else: 47 | self.nn.eval() 48 | self.codec = self.nn.codec 49 | self.decoder = decoder 50 | self.train = train 51 | self.device = device 52 | self.nn.to(device) 53 | 54 | def to(self, device): 55 | """ 56 | Moves model to device and automatically loads input tensors onto it. 57 | """ 58 | self.device = device 59 | self.nn.to(device) 60 | 61 | def forward(self, line: torch.Tensor) -> np.array: 62 | """ 63 | Performs a forward pass on a torch tensor of a line with shape (C, H, W) 64 | and returns a numpy array (W, C). 65 | """ 66 | # make CHW -> 1CHW 67 | line = line.to(self.device) 68 | line = line.unsqueeze(0) 69 | o = self.nn.nn(line) 70 | if o.size(2) != 1: 71 | raise KrakenInputException('Expected dimension 3 to be 1, actual {}'.format(o.size())) 72 | self.outputs = o.detach().squeeze().cpu().numpy() 73 | return self.outputs 74 | 75 | def predict(self, line: torch.Tensor) -> List[Tuple[str, int, int, float]]: 76 | """ 77 | Performs a forward pass on a torch tensor of a line with shape (C, H, W) 78 | and returns the decoding as a list of tuples (string, start, end, 79 | confidence). 80 | """ 81 | o = self.forward(line) 82 | locs = self.decoder(o) 83 | return self.codec.decode(locs) 84 | 85 | def predict_string(self, line: torch.Tensor) -> str: 86 | """ 87 | Performs a forward pass on a torch tensor of a line with shape (C, H, W) 88 | and returns a string of the results. 89 | """ 90 | o = self.forward(line) 91 | locs = self.decoder(o) 92 | decoding = self.codec.decode(locs) 93 | return ''.join(x[0] for x in decoding) 94 | 95 | def predict_labels(self, line: torch.tensor) -> List[Tuple[int, int, int, float]]: 96 | """ 97 | Performs a forward pass on a torch tensor of a line with shape (C, H, W) 98 | and returns a list of tuples (class, start, end, max). Max is the 99 | maximum value of the softmax layer in the region. 100 | """ 101 | o = self.forward(line) 102 | return self.decoder(o) 103 | 104 | 105 | def load_any(fname: str, train: bool = False, device: str = 'cpu') -> TorchSeqRecognizer: 106 | """ 107 | Loads anything that was, is, and will be a valid ocropus model and 108 | instantiates a shiny new kraken.lib.lstm.SeqRecognizer from the RNN 109 | configuration in the file. 110 | 111 | Currently it recognizes the following kinds of models: 112 | 113 | * pyrnn models containing BIDILSTMs 114 | * protobuf models containing converted python BIDILSTMs 115 | * protobuf models containing CLSTM networks 116 | 117 | Additionally an attribute 'kind' will be added to the SeqRecognizer 118 | containing a string representation of the source kind. Current known values 119 | are: 120 | 121 | * pyrnn for pickled BIDILSTMs 122 | * clstm for protobuf models generated by clstm 123 | 124 | Args: 125 | fname (str): Path to the model 126 | train (bool): Enables gradient calculation and dropout layers in model. 127 | device (str): Target device 128 | 129 | Returns: 130 | A kraken.lib.models.TorchSeqRecognizer object. 131 | """ 132 | nn = None 133 | kind = '' 134 | fname = abspath(expandvars(expanduser(fname))) 135 | logger.info(u'Loading model from {}'.format(fname)) 136 | try: 137 | nn = TorchVGSLModel.load_model(str(fname)) 138 | kind = 'vgsl' 139 | except Exception: 140 | try: 141 | nn = TorchVGSLModel.load_clstm_model(fname) 142 | kind = 'clstm' 143 | except Exception: 144 | nn = TorchVGSLModel.load_pronn_model(fname) 145 | kind = 'pronn' 146 | try: 147 | nn = TorchVGSLModel.load_pyrnn_model(fname) 148 | kind = 'pyrnn' 149 | except Exception: 150 | pass 151 | if not nn: 152 | raise KrakenInvalidModelException('File {} not loadable by any parser.'.format(fname)) 153 | seq = TorchSeqRecognizer(nn, train=train, device=device) 154 | seq.kind = kind 155 | return seq 156 | -------------------------------------------------------------------------------- /kraken/repo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015 Benjamin Kiessling 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | 17 | # -*- coding: utf-8 -*- 18 | """ 19 | Access functions to the model repository on github. 20 | """ 21 | from collections import defaultdict 22 | from typing import Callable, Any 23 | from contextlib import closing 24 | 25 | from kraken.lib.exceptions import KrakenRepoException 26 | 27 | import base64 28 | import requests 29 | import json 30 | import os 31 | import logging 32 | 33 | __all__ = ['get_model', 'get_description', 'get_listing'] 34 | 35 | logger = logging.getLogger(__name__) 36 | 37 | MODEL_REPO = 'https://api.github.com/repos/mittagessen/kraken-models/' 38 | 39 | 40 | def get_model(model_id: str, path: str, callback: Callable[..., Any]) -> None: 41 | """ 42 | Retrieves a model and saves it to a path. 43 | 44 | Args: 45 | model_id (str): Identifier of the model 46 | path (str): Destination to write model to. 47 | callback (func): Function called for every 1024 octet chunk received. 48 | """ 49 | logger.info(u'Saving model {} to {}'.format(model_id, path)) 50 | logger.debug(u'Retrieving head of model repository') 51 | r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master')) 52 | callback() 53 | resp = r.json() 54 | if 'object' not in resp: 55 | logger.error(u'No \'object\' field in repo head API response.') 56 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message'])) 57 | head = resp['object']['sha'] 58 | logger.debug(u'Retrieving tree of model repository') 59 | r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1}) 60 | callback() 61 | resp = r.json() 62 | if 'tree' not in resp: 63 | logger.error(u'No \'tree\' field in repo API response.') 64 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message'])) 65 | url = None 66 | for el in resp['tree']: 67 | components = el['path'].split('/') 68 | if len(components) > 2 and components[1] == model_id and components[2] == 'DESCRIPTION': 69 | logger.debug(u'Retrieving description for {}'.format(components[1])) 70 | raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8') 71 | desc = json.loads(raw) 72 | spath = os.path.join(path, desc['name']) 73 | elif len(components) > 2 and components[1] == model_id: 74 | url = el['url'] 75 | break 76 | if not url: 77 | logger.error(u'Model {} not in repository.'.format(model_id)) 78 | raise KrakenRepoException('Modle {} not in repository'.format(model_id)) 79 | with closing(requests.get(url, headers={'Accept': 'application/vnd.github.v3.raw'}, 80 | stream=True)) as r: 81 | with open(spath, 'wb') as f: 82 | logger.debug(u'Downloading model') 83 | for chunk in r.iter_content(chunk_size=1024): 84 | callback() 85 | f.write(chunk) 86 | return 87 | 88 | 89 | def get_description(model_id: str) -> dict: 90 | logger.info('Retrieving metadata for {}'.format(model_id)) 91 | logger.debug('Retrieving head of model repository') 92 | r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master')) 93 | resp = r.json() 94 | if 'object' not in resp: 95 | logger.error('No \'object\' field in repo head API response.') 96 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message'])) 97 | head = resp['object']['sha'] 98 | logger.debug('Retrieving tree of model repository') 99 | r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1}) 100 | resp = r.json() 101 | if 'tree' not in resp: 102 | logger.error('No \'tree\' field in repo API response.') 103 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message'])) 104 | for el in resp['tree']: 105 | components = el['path'].split('/') 106 | if len(components) > 2 and components[1] == model_id and components[2] == 'DESCRIPTION': 107 | logger.debug('Retrieving description for {}'.format(components[1])) 108 | raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8') 109 | return defaultdict(str, json.loads(raw)) 110 | raise KrakenRepoException('No description for {} found'.format(model_id)) 111 | 112 | 113 | def get_listing(callback: Callable[..., Any]) -> dict: 114 | logger.info(u'Retrieving model list') 115 | r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master')) 116 | callback() 117 | resp = r.json() 118 | if 'object' not in resp: 119 | logger.error(u'No \'object\' field in repo head API response.') 120 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message'])) 121 | head = resp['object']['sha'] 122 | logger.debug(u'Retrieving tree of model repository') 123 | r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1}) 124 | callback() 125 | resp = r.json() 126 | if 'tree' not in resp: 127 | logger.error(u'No \'tree\' field in repo API response.') 128 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message'])) 129 | models = {} 130 | for el in resp['tree']: 131 | components = el['path'].split('/') 132 | # new model 133 | if len(components) == 2: 134 | models[components[1]] = {'type': components[0]} 135 | if len(components) > 2 and components[2] == 'DESCRIPTION': 136 | logger.debug(u'Retrieving description for {}'.format(components[1])) 137 | r = requests.get(el['url']) 138 | if not r.ok: 139 | logger.error(u'Requests to \'{}\' failed with status {}'.format(el['url'], r.status_code)) 140 | raise KrakenRepoException('{}: {}'.format(r.status_code, r.json()['message'])) 141 | raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8') 142 | callback() 143 | try: 144 | models[components[1]].update(json.loads(raw)) 145 | except Exception: 146 | del models[components[1]] 147 | elif len(components) > 2 and components[1] in models: 148 | models[components[1]]['model'] = el['url'] 149 | return models 150 | -------------------------------------------------------------------------------- /kraken/lib/ctc_decoder.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2017 Benjamin Kiessling 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 13 | # or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | # -*- coding: utf-8 -*- 17 | """ 18 | Decoders for softmax outputs of CTC trained networks. 19 | """ 20 | 21 | import collections 22 | import numpy as np 23 | 24 | from typing import List, Tuple 25 | from scipy.special import logsumexp 26 | from scipy.ndimage import measurements 27 | 28 | from itertools import groupby 29 | 30 | __all__ = ['beam_decoder', 'greedy_decoder', 'blank_threshold_decoder'] 31 | 32 | 33 | def beam_decoder(outputs: np.ndarray, beam_size: int = 3) -> List[Tuple[int, int, int, float]]: 34 | """ 35 | Translates back the network output to a label sequence using 36 | same-prefix-merge beam search decoding as described in [0]. 37 | 38 | [0] Hannun, Awni Y., et al. "First-pass large vocabulary continuous speech 39 | recognition using bi-directional recurrent DNNs." arXiv preprint 40 | arXiv:1408.2873 (2014). 41 | 42 | Args: 43 | output (numpy.array): (C, W) shaped softmax output tensor 44 | 45 | Returns: 46 | A list with tuples (class, start, end, prob). max is the maximum value 47 | of the softmax layer in the region. 48 | """ 49 | c, w = outputs.shape 50 | probs = np.log(outputs) 51 | beam = [(tuple(), (0.0, float('-inf')))] # type: List[Tuple[Tuple, Tuple[float, float]]] 52 | 53 | # loop over each time step 54 | for t in range(w): 55 | next_beam = collections.defaultdict(lambda: 2*(float('-inf'),)) # type: dict 56 | # p_b -> prob for prefix ending in blank 57 | # p_nb -> prob for prefix not ending in blank 58 | for prefix, (p_b, p_nb) in beam: 59 | # only update ending-in-blank-prefix probability for blank 60 | n_p_b, n_p_nb = next_beam[prefix] 61 | n_p_b = logsumexp((n_p_b, p_b + probs[0, t], p_nb + probs[0, t])) 62 | next_beam[prefix] = (n_p_b, n_p_nb) 63 | # loop over non-blank classes 64 | for s in range(1, c): 65 | # only update the not-ending-in-blank-prefix probability for prefix+s 66 | l_end = prefix[-1][0] if prefix else None 67 | n_prefix = prefix + ((s, t, t),) 68 | n_p_b, n_p_nb = next_beam[n_prefix] 69 | if s == l_end: 70 | # substitute the previous non-blank-ending-prefix 71 | # probability for repeated labels 72 | n_p_nb = logsumexp((n_p_nb, p_b + probs[s, t])) 73 | else: 74 | n_p_nb = logsumexp((n_p_nb, p_b + probs[s, t], p_nb + probs[s, t])) 75 | 76 | next_beam[n_prefix] = (n_p_b, n_p_nb) 77 | 78 | # If s is repeated at the end we also update the unchanged 79 | # prefix. This is the merging case. 80 | if s == l_end: 81 | n_p_b, n_p_nb = next_beam[prefix] 82 | n_p_nb = logsumexp((n_p_nb, p_nb + probs[s, t])) 83 | # rewrite both new and old prefix positions 84 | next_beam[prefix[:-1] + ((prefix[-1][0], prefix[-1][1], t),)] = (n_p_b, n_p_nb) 85 | next_beam[n_prefix[:-1] + ((n_prefix[-1][0], n_prefix[-1][1], t),)] = next_beam.pop(n_prefix) 86 | 87 | # Sort and trim the beam before moving on to the 88 | # next time-step. 89 | beam = sorted(next_beam.items(), 90 | key=lambda x: logsumexp(x[1]), 91 | reverse=True) 92 | beam = beam[:beam_size] 93 | return [(c, start, end, max(outputs[c, start:end+1])) for (c, start, end) in beam[0][0]] 94 | 95 | 96 | def greedy_decoder(outputs: np.ndarray) -> List[Tuple[int, int, int, float]]: 97 | """ 98 | Translates back the network output to a label sequence using greedy/best 99 | path decoding as described in [0]. 100 | 101 | [0] Graves, Alex, et al. "Connectionist temporal classification: labelling 102 | unsegmented sequence data with recurrent neural networks." Proceedings of 103 | the 23rd international conference on Machine learning. ACM, 2006. 104 | 105 | Args: 106 | output (numpy.array): (C, W) shaped softmax output tensor 107 | 108 | Returns: 109 | A list with tuples (class, start, end, max). max is the maximum value 110 | of the softmax layer in the region. 111 | """ 112 | labels = np.argmax(outputs, 0) 113 | seq_len = outputs.shape[1] 114 | mask = np.eye(outputs.shape[0], dtype='bool')[labels].T 115 | classes = [] 116 | for label, group in groupby(zip(np.arange(seq_len), labels, outputs[mask]), key=lambda x: x[1]): 117 | lgroup = list(group) 118 | if label != 0: 119 | classes.append((label, lgroup[0][0], lgroup[-1][0], max(x[2] for x in lgroup))) 120 | return classes 121 | 122 | 123 | def blank_threshold_decoder(outputs: np.ndarray, threshold: float = 0.5) -> List[Tuple[int, int, int, float]]: 124 | """ 125 | Translates back the network output to a label sequence as the original 126 | ocropy/clstm. 127 | 128 | Thresholds on class 0, then assigns the maximum (non-zero) class to each 129 | region. 130 | 131 | Args: 132 | output (numpy.array): (C, W) shaped softmax output tensor 133 | threshold (float): Threshold for 0 class when determining possible 134 | label locations. 135 | 136 | Returns: 137 | A list with tuples (class, start, end, max). max is the maximum value 138 | of the softmax layer in the region. 139 | """ 140 | outputs = outputs.T 141 | labels, n = measurements.label(outputs[:, 0] < threshold) 142 | mask = np.tile(labels.reshape(-1, 1), (1, outputs.shape[1])) 143 | maxima = measurements.maximum_position(outputs, mask, np.arange(1, np.amax(mask)+1)) 144 | p = 0 145 | start = None 146 | x = [] 147 | for idx, val in enumerate(labels): 148 | if val != 0 and start is None: 149 | start = idx 150 | p += 1 151 | if val == 0 and start is not None: 152 | if maxima[p-1][1] == 0: 153 | start = None 154 | else: 155 | x.append((maxima[p-1][1], start, idx, outputs[maxima[p-1]])) 156 | start = None 157 | # append last non-zero region to list of no zero region occurs after it 158 | if start: 159 | x.append((maxima[p-1][1], start, len(outputs), outputs[maxima[p-1]])) 160 | return [y for y in x if x[0] != 0] 161 | -------------------------------------------------------------------------------- /tests/test_layers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | from nose.tools import raises 5 | 6 | import torch 7 | from kraken.lib import layers 8 | 9 | 10 | class TestLayers(unittest.TestCase): 11 | 12 | """ 13 | Testing custom layer implementations. 14 | """ 15 | def setUp(self): 16 | torch.set_grad_enabled(False) 17 | 18 | def test_maxpool(self): 19 | """ 20 | Test maximum pooling layer. 21 | """ 22 | mp = layers.MaxPool((3, 3), (2, 2)) 23 | o = mp(torch.randn(1, 2, 32, 64)) 24 | self.assertEqual(o.shape, (1, 2, 15, 31)) 25 | 26 | def test_1d_dropout(self): 27 | """ 28 | Test 1d dropout layer. 29 | """ 30 | do = layers.Dropout(0.2, 1) 31 | o = do(torch.randn(1, 2, 32, 64)) 32 | self.assertEqual(o.shape, (1, 2, 32, 64)) 33 | 34 | def test_2d_dropout(self): 35 | """ 36 | Test 2d dropout layer. 37 | """ 38 | do = layers.Dropout(0.2, 2) 39 | o = do(torch.randn(1, 2, 32, 64)) 40 | self.assertEqual(o.shape, (1, 2, 32, 64)) 41 | 42 | def test_forward_rnn_layer_x(self): 43 | """ 44 | Test unidirectional RNN layer in x-dimension. 45 | """ 46 | rnn = layers.TransposedSummarizingRNN(10, 2, 'f', False, False) 47 | o = rnn(torch.randn(1, 10, 32, 64)) 48 | self.assertEqual(o.shape, (1, 2, 32, 64)) 49 | 50 | def test_forward_rnn_layer_y(self): 51 | """ 52 | Test unidirectional RNN layer in y-dimension. 53 | """ 54 | rnn = layers.TransposedSummarizingRNN(10, 2, 'f', True, False) 55 | o = rnn(torch.randn(1, 10, 32, 64)) 56 | self.assertEqual(o.shape, (1, 2, 32, 64)) 57 | 58 | def test_forward_rnn_layer_x_summarize(self): 59 | """ 60 | Test unidirectional summarizing RNN layer in x-dimension. 61 | """ 62 | rnn = layers.TransposedSummarizingRNN(10, 2, 'f', False, True) 63 | o = rnn(torch.randn(1, 10, 32, 64)) 64 | self.assertEqual(o.shape, (1, 2, 32, 1)) 65 | 66 | def test_forward_rnn_layer_y_summarize(self): 67 | """ 68 | Test unidirectional summarizing RNN layer in y-dimension. 69 | """ 70 | rnn = layers.TransposedSummarizingRNN(10, 2, 'f', True, True) 71 | o = rnn(torch.randn(1, 10, 32, 64)) 72 | self.assertEqual(o.shape, (1, 2, 1, 64)) 73 | 74 | def test_bidi_rnn_layer_x(self): 75 | """ 76 | Test bidirectional RNN layer in x-dimension. 77 | """ 78 | rnn = layers.TransposedSummarizingRNN(10, 2, 'b', False, False) 79 | o = rnn(torch.randn(1, 10, 32, 64)) 80 | self.assertEqual(o.shape, (1, 4, 32, 64)) 81 | 82 | def test_bidi_rnn_layer_y(self): 83 | """ 84 | Test bidirectional RNN layer in y-dimension. 85 | """ 86 | rnn = layers.TransposedSummarizingRNN(10, 2, 'b', True, False) 87 | o = rnn(torch.randn(1, 10, 32, 64)) 88 | self.assertEqual(o.shape, (1, 4, 32, 64)) 89 | 90 | def test_bidi_rnn_layer_x_summarize(self): 91 | """ 92 | Test bidirectional summarizing RNN layer in x-dimension. 93 | """ 94 | rnn = layers.TransposedSummarizingRNN(10, 2, 'b', False, True) 95 | o = rnn(torch.randn(1, 10, 32, 64)) 96 | self.assertEqual(o.shape, (1, 4, 32, 1)) 97 | 98 | def test_bidi_rnn_layer_y_summarize(self): 99 | """ 100 | Test bidirectional summarizing RNN layer in y-dimension. 101 | """ 102 | rnn = layers.TransposedSummarizingRNN(10, 2, 'b', True, True) 103 | o = rnn(torch.randn(1, 10, 32, 64)) 104 | self.assertEqual(o.shape, (1, 4, 1, 64)) 105 | 106 | def test_linsoftmax(self): 107 | """ 108 | Test basic function of linear layer. 109 | """ 110 | lin = layers.LinSoftmax(20, 10) 111 | o = lin(torch.randn(1, 20, 12, 24)) 112 | self.assertEqual(o.shape, (1, 10, 12, 24)) 113 | 114 | def test_linsoftmax_train(self): 115 | """ 116 | Test function of linear layer in training mode (log_softmax) 117 | """ 118 | lin = layers.LinSoftmax(20, 10).train() 119 | o = lin(torch.randn(1, 20, 12, 24)) 120 | self.assertLess(o.max(), 0) 121 | 122 | def test_linsoftmax_test(self): 123 | """ 124 | Test function of linear layer in eval mode (softmax) 125 | """ 126 | lin = layers.LinSoftmax(20, 10).eval() 127 | o = lin(torch.randn(1, 20, 12, 24)) 128 | self.assertGreaterEqual(o.min(), 0) 129 | 130 | def test_linsoftmax_aug(self): 131 | """ 132 | Test basic function of linear layer with 1-augmentation. 133 | """ 134 | lin = layers.LinSoftmax(20, 10, True) 135 | o = lin(torch.randn(1, 20, 12, 24)) 136 | self.assertEqual(o.shape, (1, 10, 12, 24)) 137 | 138 | def test_linsoftmax_aug_train(self): 139 | """ 140 | Test function of linear layer in training mode (log_softmax) with 1-augmentation 141 | """ 142 | lin = layers.LinSoftmax(20, 10, True).train() 143 | o = lin(torch.randn(1, 20, 12, 24)) 144 | self.assertLess(o.max(), 0) 145 | 146 | def test_linsoftmax_aug_test(self): 147 | """ 148 | Test function of linear layer in eval mode (softmax) with 1-augmentation 149 | """ 150 | lin = layers.LinSoftmax(20, 10, True).eval() 151 | o = lin(torch.randn(1, 20, 12, 24)) 152 | self.assertGreaterEqual(o.min(), 0) 153 | 154 | def test_actconv2d_lin(self): 155 | """ 156 | Test convolutional layer without activation. 157 | """ 158 | conv = layers.ActConv2D(5, 12, (3, 3), 'l') 159 | o = conv(torch.randn(1, 5, 24, 12)) 160 | self.assertEqual(o.shape, (1, 12, 24, 12)) 161 | 162 | def test_actconv2d_sigmoid(self): 163 | """ 164 | Test convolutional layer with sigmoid activation. 165 | """ 166 | conv = layers.ActConv2D(5, 12, (3, 3), 's') 167 | o = conv(torch.randn(1, 5, 24, 12)) 168 | self.assertTrue(0 <= o.min() <= 1) 169 | self.assertTrue(0 <= o.max() <= 1) 170 | 171 | def test_actconv2d_tanh(self): 172 | """ 173 | Test convolutional layer with tanh activation. 174 | """ 175 | conv = layers.ActConv2D(5, 12, (3, 3), 't') 176 | o = conv(torch.randn(1, 5, 24, 12)) 177 | self.assertTrue(-1 <= o.min() <= 1) 178 | self.assertTrue(-1 <= o.max() <= 1) 179 | 180 | def test_actconv2d_softmax(self): 181 | """ 182 | Test convolutional layer with softmax activation. 183 | """ 184 | conv = layers.ActConv2D(5, 12, (3, 3), 'm') 185 | o = conv(torch.randn(1, 5, 24, 12)) 186 | self.assertTrue(0 <= o.min() <= 1) 187 | self.assertTrue(0 <= o.max() <= 1) 188 | 189 | def test_actconv2d_relu(self): 190 | """ 191 | Test convolutional layer with relu activation. 192 | """ 193 | conv = layers.ActConv2D(5, 12, (3, 3), 'r') 194 | o = conv(torch.randn(1, 5, 24, 12)) 195 | self.assertLessEqual(0, o.min()) 196 | self.assertLessEqual(0, o.max()) 197 | -------------------------------------------------------------------------------- /kraken/lib/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015 Benjamin Kiessling 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | """ 17 | Training loop interception helpers 18 | """ 19 | import abc 20 | import torch 21 | import numpy as np 22 | 23 | from itertools import cycle 24 | from torch.utils import data 25 | from functools import partial 26 | from typing import Tuple, Union, Optional, Callable, List, Dict, Any 27 | from collections.abc import Iterable 28 | 29 | class TrainStopper(Iterable): 30 | 31 | def __init__(self): 32 | self.best_loss = 0.0 33 | self.best_epoch = 0 34 | 35 | @abc.abstractmethod 36 | def update(self, val_loss: float) -> None: 37 | """ 38 | Updates the internal state of the train stopper. 39 | """ 40 | pass 41 | 42 | 43 | def annealing_const(start: float, end: float, pct: float) -> float: 44 | return start 45 | 46 | def annealing_linear(start: float, end: float, pct: float) -> float: 47 | return start + pct * (end-start) 48 | 49 | def annealing_cos(start: float, end: float, pct: float) -> float: 50 | co = np.cos(np.pi * pct) + 1 51 | return end + (start-end)/2 * co 52 | 53 | 54 | class TrainScheduler(object): 55 | """ 56 | Implements learning rate scheduling. 57 | """ 58 | def __init__(self, optimizer: torch.optim.Optimizer) -> None: 59 | self.steps: List[Dict[str, Any]] = [] 60 | self.optimizer = optimizer 61 | self.cycle: Any = None 62 | 63 | def add_phase(self, 64 | iterations: int, 65 | lrate: Tuple[float, float] = (1e-4, 1e-4), 66 | momentum: Tuple[float, float] = (0.9, 0.9), 67 | wd: float = 0.0, 68 | annealing_fn: Callable[[float, float, float], float] = annealing_const) -> None: 69 | """ 70 | Adds a new phase to the scheduler. 71 | 72 | Args: 73 | sched (kraken.lib.train.Trainscheduler): TrainScheduler instance 74 | iterations (int): Number of iterations per cycle 75 | max_lr (float): Peak learning rate 76 | div (float): divisor to determine minimum learning rate (min_lr = max_lr / div) 77 | max_mon (float): Maximum momentum 78 | min_mon (float): Minimum momentum 79 | wd (float): Weight decay 80 | annealing_fn (Callable[[int, int, int], float]): LR change 81 | function. Can be one of `annealing_const` (keeping start value), 82 | `annealing_linear` (linear change), and `annealing_cos` (cosine 83 | change). 84 | """ 85 | self.steps.extend([{'lr': annealing_fn(*lrate, pct=x/iterations), 86 | 'momentum': annealing_fn(*momentum, pct=x/iterations), 87 | 'weight_decay': wd} for x in range(iterations)]) 88 | 89 | def step(self) -> None: 90 | """ 91 | Performs an optimization step. 92 | """ 93 | if not self.cycle: 94 | self.cycle = cycle(self.steps) 95 | kwargs = next(self.cycle) 96 | for param_group in self.optimizer.param_groups: 97 | param_group.update(kwargs) 98 | 99 | 100 | def add_1cycle(sched: TrainScheduler, iterations: int, 101 | max_lr: float = 1e-4, div: float = 25.0, 102 | max_mom: float = 0.95, min_mom: float = 0.85, wd: float = 0.0): 103 | """ 104 | Adds 1cycle policy [0] phases to a learning rate scheduler. 105 | 106 | [0] Smith, Leslie N. "A disciplined approach to neural network hyper-parameters: Part 1--learning rate, batch size, momentum, and weight decay." arXiv preprint arXiv:1803.09820 (2018). 107 | 108 | Args: 109 | sched (kraken.lib.train.Trainscheduler): TrainScheduler instance 110 | iterations (int): Number of iterations per cycle 111 | max_lr (float): Peak learning rate 112 | div (float): divisor to determine minimum learning rate (min_lr = max_lr / div) 113 | max_mon (float): Maximum momentum 114 | min_mon (float): Minimum momentum 115 | wd (float): Weight decay 116 | """ 117 | sched.add_phase(iterations//2, (max_lr/div, max_lr), (max_mom, min_mom), wd, annealing_linear) 118 | sched.add_phase(iterations//2, (max_lr, max_lr/div), (min_mom, max_mom), wd, annealing_cos) 119 | 120 | 121 | class EarlyStopping(TrainStopper): 122 | """ 123 | Early stopping to terminate training when validation loss doesn't improve 124 | over a certain time. 125 | """ 126 | def __init__(self, it: data.DataLoader = None, min_delta: float = 0.002, lag: int = 5) -> None: 127 | """ 128 | Args: 129 | it (torch.utils.data.DataLoader): training data loader 130 | min_delta (float): minimum change in validation loss to qualify as improvement. 131 | lag (int): Number of epochs to wait for improvement before 132 | terminating. 133 | """ 134 | super().__init__() 135 | self.min_delta = min_delta 136 | self.lag = lag 137 | self.it = it 138 | self.wait = 0 139 | self.epoch = -1 140 | 141 | def __iter__(self): 142 | return self 143 | 144 | def __next__(self): 145 | if self.wait >= self.lag: 146 | raise StopIteration 147 | self.epoch += 1 148 | return self.it 149 | 150 | def update(self, val_loss: float) -> None: 151 | """ 152 | Updates the internal validation loss state 153 | """ 154 | if (val_loss - self.best_loss) < self.min_delta: 155 | self.wait += 1 156 | else: 157 | self.wait = 0 158 | self.best_loss = val_loss 159 | self.best_epoch = self.epoch 160 | 161 | 162 | class EpochStopping(TrainStopper): 163 | """ 164 | Dumb stopping after a fixed number of epochs. 165 | """ 166 | def __init__(self, it: data.DataLoader = None, epochs: int = 100) -> None: 167 | """ 168 | Args: 169 | it (torch.utils.data.DataLoader): training data loader 170 | epochs (int): Number of epochs to train for 171 | """ 172 | super().__init__() 173 | self.epochs = epochs 174 | self.epoch = -1 175 | self.it = it 176 | 177 | def __iter__(self): 178 | return self 179 | 180 | def __next__(self): 181 | if self.epoch < self.epochs - 1: 182 | self.epoch += 1 183 | return self.it 184 | else: 185 | raise StopIteration 186 | 187 | def update(self, val_loss: float) -> None: 188 | """ 189 | Only update internal best epoch 190 | """ 191 | if val_loss > self.best_loss: 192 | self.best_loss = val_loss 193 | self.best_epoch = self.epoch 194 | -------------------------------------------------------------------------------- /docs/vgsl.rst: -------------------------------------------------------------------------------- 1 | .. _vgsl: 2 | 3 | VGSL network specification 4 | ========================== 5 | 6 | kraken implements a dialect of the Variable-size Graph Specification Language 7 | (VGSL), enabling the specification of different network architectures for image 8 | processing purposes using a short definition string. 9 | 10 | Basics 11 | ------ 12 | 13 | A VGSL specification consists of an input block, one or more layers, and an 14 | output block. For example: 15 | 16 | .. code-block:: console 17 | 18 | [1,48,0,1 Cr3,3,32 Mp2,2 Cr3,3,64 Mp2,2 S1(1x12)1,3 Lbx100 Do O1c103] 19 | 20 | The first block defines the input in order of [batch, heigh, width, channels] 21 | with zero-valued dimensions being variable. Integer valued height or width 22 | input specifications will result in the input images being automatically scaled 23 | in either dimension. 24 | 25 | When channels are set to 1 grayscale or B/W inputs are expected, 3 expects RGB 26 | color images. Higher values in combination with a height of 1 result in the 27 | network being fed 1 pixel wide grayscale strips scaled to the size of the 28 | channel dimension. 29 | 30 | After the input, a number of layers are defined. Layers operate on the channel 31 | dimension; this is intuitive for convolutional layers but a recurrent layer 32 | doing sequence classification along the width axis on an image of a particular 33 | height requires the height dimension to be moved to the channel dimension, 34 | e.g.: 35 | 36 | .. code-block:: console 37 | 38 | [1,48,0,1 S1(1x48)1,3 Lbx100 O1c103] 39 | 40 | or using the alternative slightly faster formulation: 41 | 42 | .. code-block:: console 43 | 44 | [1,1,0,48 Lbx100 O1c103] 45 | 46 | Finally an output definition is appended. When training sequence classification 47 | networks with the provided tools the appropriate output definition is 48 | automatically appended to the network based on the alphabet of the training 49 | data. 50 | 51 | Examples 52 | -------- 53 | 54 | .. code-block:: console 55 | 56 | [1,1,0,48 Lbx100 Do 01c59] 57 | 58 | Creating new model [1,1,0,48 Lbx100 Do] with 59 outputs 59 | layer type params 60 | 0 rnn direction b transposed False summarize False out 100 legacy None 61 | 1 dropout probability 0.5 dims 1 62 | 2 linear augmented False out 59 63 | 64 | A simple recurrent recognition model with a single LSTM layer classifying lines 65 | normalized to 48 pixels in height. 66 | 67 | .. code-block:: console 68 | 69 | [1,48,0,1 Cr3,3,32 Do0.1,2 Mp2,2 Cr3,3,64 Do0.1,2 Mp2,2 S1(1x12)1,3 Lbx100 Do 01c59] 70 | 71 | Creating new model [1,48,0,1 Cr3,3,32 Do0.1,2 Mp2,2 Cr3,3,64 Do0.1,2 Mp2,2 S1(1x12)1,3 Lbx100 Do] with 59 outputs 72 | layer type params 73 | 0 conv kernel 3 x 3 filters 32 activation r 74 | 1 dropout probability 0.1 dims 2 75 | 2 maxpool kernel 2 x 2 stride 2 x 2 76 | 3 conv kernel 3 x 3 filters 64 activation r 77 | 4 dropout probability 0.1 dims 2 78 | 5 maxpool kernel 2 x 2 stride 2 x 2 79 | 6 reshape from 1 1 x 12 to 1/3 80 | 7 rnn direction b transposed False summarize False out 100 legacy None 81 | 8 dropout probability 0.5 dims 1 82 | 9 linear augmented False out 59 83 | 84 | A model with a small convolutional stack before a recurrent LSTM layer. The 85 | extended dropout layer syntax is used to reduce drop probability on the depth 86 | dimension as the default is too high for convolutional layers. The remainder of 87 | the height dimension (`12`) is reshaped into the depth dimensions before 88 | applying the final recurrent and linear layers. 89 | 90 | .. code-block:: console 91 | 92 | [1,0,0,3 Cr3,3,16 Mp3,3 Lfys64 Lbx128 Lbx256 Do 01c59] 93 | 94 | Creating new model [1,0,0,3 Cr3,3,16 Mp3,3 Lfys64 Lbx128 Lbx256 Do] with 59 outputs 95 | layer type params 96 | 0 conv kernel 3 x 3 filters 16 activation r 97 | 1 maxpool kernel 3 x 3 stride 3 x 3 98 | 2 rnn direction f transposed True summarize True out 64 legacy None 99 | 3 rnn direction b transposed False summarize False out 128 legacy None 100 | 4 rnn direction b transposed False summarize False out 256 legacy None 101 | 5 dropout probability 0.5 dims 1 102 | 6 linear augmented False out 59 103 | 104 | A model with arbitrary sized color image input, an initial summarizing 105 | recurrent layer to squash the height to 64, followed by 2 bi-directional 106 | recurrent layers and a linear projection. 107 | 108 | Convolutional Layers 109 | -------------------- 110 | 111 | .. code-block:: console 112 | 113 | C[{name}](s|t|r|l|m)[{name}],, 114 | s = sigmoid 115 | t = tanh 116 | r = relu 117 | l = linear 118 | m = softmax 119 | 120 | Adds a 2D convolution with kernel size `(y, x)` and `d` output channels, applying 121 | the selected nonlinearity. 122 | 123 | Recurrent Layers 124 | ---------------- 125 | 126 | .. code-block:: console 127 | 128 | L[{name}](f|r|b)(x|y)[s][{name}] LSTM cell with n outputs. 129 | G[{name}](f|r|b)(x|y)[s][{name}] GRU cell with n outputs. 130 | f runs the RNN forward only. 131 | r runs the RNN reversed only. 132 | b runs the RNN bidirectionally. 133 | s (optional) summarizes the output in the requested dimension, return the last step. 134 | 135 | Adds either an LSTM or GRU recurrent layer to the network using eiter the `x` 136 | (width) or `y` (height) dimension as the time axis. Input features are the 137 | channel dimension and the non-time-axis dimension (height/width) is treated as 138 | another batch dimension. For example, a `Lfx25` layer on an `1, 16, 906, 32` 139 | input will execute 16 independent forward passes on `906x32` tensors resulting 140 | in an output of shape `1, 16, 906, 25`. If this isn't desired either run a 141 | summarizing layer in the other direction, e.g. `Lfys20` for an input `1, 1, 142 | 906, 20`, or prepend a reshape layer `S1(1x16)1,3` combining the height and 143 | channel dimension for an `1, 1, 906, 512` input to the recurrent layer. 144 | 145 | Helper and Plumbing Layers 146 | -------------------------- 147 | 148 | Max Pool 149 | ^^^^^^^^ 150 | .. code-block:: console 151 | 152 | Mp[{name}],[,,] 153 | 154 | Adds a maximum pooling with `(y, x)` kernel_size and `(y_stride, x_stride)` stride. 155 | 156 | Reshape 157 | ^^^^^^^ 158 | 159 | .. code-block:: console 160 | 161 | S[{name}](x), Splits one dimension, moves one part to another 162 | dimension. 163 | 164 | The `S` layer reshapes a source dimension `d` to `a,b` and distributes `a` into 165 | dimension `e`, respectively `b` into `f`. Either `e` or `f` has to be equal to 166 | `d`. So `S1(1, 48)1, 3` on an `1, 48, 1020, 8` input will first reshape into 167 | `1, 1, 48, 1020, 8`, leave the `1` part in the height dimension and distribute 168 | the `48` sized tensor into the channel dimension resulting in a `1, 1, 1024, 169 | 48*8=384` sized output. `S` layers are mostly used to remove undesirable non-1 170 | height before a recurrent layer. 171 | 172 | .. note:: 173 | 174 | This `S` layer is equivalent to the one implemented in the tensorflow 175 | implementation of VGSL, i.e. behaves differently from tesseract. 176 | 177 | Regularization Layers 178 | --------------------- 179 | 180 | .. code-block:: console 181 | 182 | Do[{name}][],[] Insert a 1D or 2D dropout layer 183 | 184 | Adds an 1D or 2D dropout layer with a given probability. Defaults to `0.5` drop 185 | probability and 1D dropout. Set to `dim` to `2` after convolutional layers. 186 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/kraken.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/kraken.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/kraken" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/kraken" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | echo. coverage to run coverage check of the documentation if enabled 41 | goto end 42 | ) 43 | 44 | if "%1" == "clean" ( 45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 46 | del /q /s %BUILDDIR%\* 47 | goto end 48 | ) 49 | 50 | 51 | REM Check if sphinx-build is available and fallback to Python version if any 52 | %SPHINXBUILD% 2> nul 53 | if errorlevel 9009 goto sphinx_python 54 | goto sphinx_ok 55 | 56 | :sphinx_python 57 | 58 | set SPHINXBUILD=python -m sphinx.__init__ 59 | %SPHINXBUILD% 2> nul 60 | if errorlevel 9009 ( 61 | echo. 62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 63 | echo.installed, then set the SPHINXBUILD environment variable to point 64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 65 | echo.may add the Sphinx directory to PATH. 66 | echo. 67 | echo.If you don't have Sphinx installed, grab it from 68 | echo.http://sphinx-doc.org/ 69 | exit /b 1 70 | ) 71 | 72 | :sphinx_ok 73 | 74 | 75 | if "%1" == "html" ( 76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 80 | goto end 81 | ) 82 | 83 | if "%1" == "dirhtml" ( 84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 88 | goto end 89 | ) 90 | 91 | if "%1" == "singlehtml" ( 92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 93 | if errorlevel 1 exit /b 1 94 | echo. 95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 96 | goto end 97 | ) 98 | 99 | if "%1" == "pickle" ( 100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 101 | if errorlevel 1 exit /b 1 102 | echo. 103 | echo.Build finished; now you can process the pickle files. 104 | goto end 105 | ) 106 | 107 | if "%1" == "json" ( 108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 109 | if errorlevel 1 exit /b 1 110 | echo. 111 | echo.Build finished; now you can process the JSON files. 112 | goto end 113 | ) 114 | 115 | if "%1" == "htmlhelp" ( 116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 117 | if errorlevel 1 exit /b 1 118 | echo. 119 | echo.Build finished; now you can run HTML Help Workshop with the ^ 120 | .hhp project file in %BUILDDIR%/htmlhelp. 121 | goto end 122 | ) 123 | 124 | if "%1" == "qthelp" ( 125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 129 | .qhcp project file in %BUILDDIR%/qthelp, like this: 130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\kraken.qhcp 131 | echo.To view the help file: 132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\kraken.ghc 133 | goto end 134 | ) 135 | 136 | if "%1" == "devhelp" ( 137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. 141 | goto end 142 | ) 143 | 144 | if "%1" == "epub" ( 145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 149 | goto end 150 | ) 151 | 152 | if "%1" == "latex" ( 153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 157 | goto end 158 | ) 159 | 160 | if "%1" == "latexpdf" ( 161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 162 | cd %BUILDDIR%/latex 163 | make all-pdf 164 | cd %~dp0 165 | echo. 166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdfja" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf-ja 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "text" ( 181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 182 | if errorlevel 1 exit /b 1 183 | echo. 184 | echo.Build finished. The text files are in %BUILDDIR%/text. 185 | goto end 186 | ) 187 | 188 | if "%1" == "man" ( 189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 190 | if errorlevel 1 exit /b 1 191 | echo. 192 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 193 | goto end 194 | ) 195 | 196 | if "%1" == "texinfo" ( 197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 198 | if errorlevel 1 exit /b 1 199 | echo. 200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 201 | goto end 202 | ) 203 | 204 | if "%1" == "gettext" ( 205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 206 | if errorlevel 1 exit /b 1 207 | echo. 208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 209 | goto end 210 | ) 211 | 212 | if "%1" == "changes" ( 213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 214 | if errorlevel 1 exit /b 1 215 | echo. 216 | echo.The overview file is in %BUILDDIR%/changes. 217 | goto end 218 | ) 219 | 220 | if "%1" == "linkcheck" ( 221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 222 | if errorlevel 1 exit /b 1 223 | echo. 224 | echo.Link check complete; look for any errors in the above output ^ 225 | or in %BUILDDIR%/linkcheck/output.txt. 226 | goto end 227 | ) 228 | 229 | if "%1" == "doctest" ( 230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 231 | if errorlevel 1 exit /b 1 232 | echo. 233 | echo.Testing of doctests in the sources finished, look at the ^ 234 | results in %BUILDDIR%/doctest/output.txt. 235 | goto end 236 | ) 237 | 238 | if "%1" == "coverage" ( 239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 240 | if errorlevel 1 exit /b 1 241 | echo. 242 | echo.Testing of coverage in the sources finished, look at the ^ 243 | results in %BUILDDIR%/coverage/python.txt. 244 | goto end 245 | ) 246 | 247 | if "%1" == "xml" ( 248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 249 | if errorlevel 1 exit /b 1 250 | echo. 251 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 252 | goto end 253 | ) 254 | 255 | if "%1" == "pseudoxml" ( 256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 257 | if errorlevel 1 exit /b 1 258 | echo. 259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 260 | goto end 261 | ) 262 | 263 | :end 264 | -------------------------------------------------------------------------------- /kraken/lib/codec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2017 Benjamin Kiessling 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | 17 | """ 18 | pytorch compatible codec with many-to-many mapping between labels and 19 | graphemes. 20 | """ 21 | import regex 22 | import numpy as np 23 | 24 | from typing import List, Tuple, Set, Union, Dict, Sequence 25 | from torch import IntTensor 26 | from kraken.lib.exceptions import KrakenEncodeException 27 | 28 | __all__ = ['PytorchCodec'] 29 | 30 | 31 | class PytorchCodec(object): 32 | """ 33 | Translates between labels and graphemes. 34 | """ 35 | def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str]) -> None: 36 | """ 37 | Builds a codec converting between graphemes/code points and integer 38 | label sequences. 39 | 40 | charset may either be a string, a list or a dict. In the first case 41 | each code point will be assigned a label, in the second case each 42 | string in the list will be assigned a label, and in the final case each 43 | key string will be mapped to the value sequence of integers. In the 44 | first two cases labels will be assigned automatically. 45 | 46 | As 0 is the blank label in a CTC output layer, output labels and input 47 | dictionaries are/should be 1-indexed. 48 | 49 | Args: 50 | charset (unicode, list, dict): Input character set. 51 | """ 52 | if isinstance(charset, dict): 53 | self.c2l = charset 54 | else: 55 | self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)} 56 | # map integer labels to code points because regex only works with strings 57 | self.l2c = {} # type: Dict[str, str] 58 | for k, v in self.c2l.items(): 59 | self.l2c[''.join(chr(c) for c in v)] = k 60 | 61 | # sort prefixes for c2l regex 62 | self.c2l_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.c2l.keys(), key=len, reverse=True))) 63 | # sort prefixes for l2c regex 64 | self.l2c_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.l2c.keys(), key=len, reverse=True))) 65 | 66 | def __len__(self) -> int: 67 | """ 68 | Total number of input labels the codec can decode. 69 | """ 70 | return len(self.l2c.keys()) 71 | 72 | def max_label(self) -> int: 73 | """ 74 | Returns the maximum label value. 75 | """ 76 | return max(l for labels in self.c2l.values() for l in labels) 77 | 78 | def encode(self, s: str) -> IntTensor: 79 | """ 80 | Encodes a string into a sequence of labels. 81 | 82 | Args: 83 | s (str): Input unicode string 84 | 85 | Returns: 86 | (torch.IntTensor) encoded label sequence 87 | 88 | Raises: 89 | KrakenEncodeException if encoding fails. 90 | """ 91 | splits = self._greedy_split(s, self.c2l_regex) 92 | labels = [] # type: List[int] 93 | for c in splits: 94 | labels.extend(self.c2l[c]) 95 | return IntTensor(labels) 96 | 97 | def decode(self, labels: Sequence[Tuple[int, int, int, float]]) -> List[Tuple[str, int, int, float]]: 98 | """ 99 | Decodes a labelling. 100 | 101 | Given a labelling with cuts and confidences returns a string with the 102 | cuts and confidences aggregated across label-code point 103 | correspondences. When decoding multilabels to code points the resulting 104 | cuts are min/max, confidences are averaged. 105 | 106 | Args: 107 | labels (list): Input containing tuples (label, start, end, 108 | confidence). 109 | 110 | Returns: 111 | list: A list of tuples (code point, start, end, confidence) 112 | """ 113 | # map into unicode space 114 | uni_labels = ''.join(chr(v) for v, _, _, _ in labels) 115 | start = [x for _, x, _, _ in labels] 116 | end = [x for _, _, x, _ in labels] 117 | con = [x for _, _, _, x in labels] 118 | splits = self._greedy_split(uni_labels, self.l2c_regex) 119 | decoded = [] 120 | idx = 0 121 | for i in splits: 122 | decoded.extend([(c, s, e, u) for c, s, e, u in zip(self.l2c[i], 123 | len(self.l2c[i]) * [start[idx]], 124 | len(self.l2c[i]) * [end[idx + len(i) - 1]], 125 | len(self.l2c[i]) * [np.mean(con[idx:idx + len(i)])])]) 126 | idx += len(i) 127 | return decoded 128 | 129 | def _greedy_split(self, input: str, re: regex.Regex) -> List[str]: 130 | """ 131 | Splits an input string greedily from a list of prefixes. Stops when no 132 | more matches are found. 133 | 134 | Args: 135 | input (str): input string 136 | re (regex.Regex): Prefix match object 137 | 138 | Returns: 139 | (list) of prefixes 140 | 141 | Raises: 142 | (KrakenEncodeException) if no prefix match is found for some part 143 | of the string. 144 | """ 145 | r = [] # type: List[str] 146 | idx = 0 147 | while True: 148 | mo = re.match(input, idx) 149 | if mo is None or idx == len(input): 150 | if len(input) > idx: 151 | raise KrakenEncodeException('No prefix matches for input after {}'.format(idx)) 152 | return r 153 | r.append(mo.group()) 154 | idx = mo.end() 155 | 156 | def merge(self, codec: 'PytorchCodec') -> Tuple['PytorchCodec', Set]: 157 | """ 158 | Transforms this codec (c1) into another (c2) reusing as many labels as 159 | possible. 160 | 161 | The resulting codec is able to encode the same code point sequences 162 | while not necessarily having the same labels for them as c2. 163 | Retains matching character -> label mappings from both codecs, removes 164 | mappings not c2, and adds mappings not in c1. Compound labels in c2 for 165 | code point sequences not in c1 containing labels also in use in c1 are 166 | added as separate labels. 167 | 168 | Args: 169 | codec (kraken.lib.codec.PytorchCodec): 170 | 171 | Returns: 172 | A merged codec and a list of labels that were removed from the 173 | original codec. 174 | """ 175 | # find character sequences not encodable (exact match) by new codec. 176 | # get labels for these sequences as deletion candidates 177 | rm_candidates = {cseq: enc for cseq, enc in self.c2l.items() if cseq not in codec.c2l} 178 | c2l_cand = self.c2l.copy() 179 | for x in rm_candidates.keys(): 180 | c2l_cand.pop(x) 181 | # remove labels from candidate list that are in use for other decodings 182 | rm_labels = [label for v in rm_candidates.values() for label in v] 183 | for v in c2l_cand.values(): 184 | for l in rm_labels: 185 | if l in v: 186 | rm_labels.remove(l) 187 | # iteratively remove labels, decrementing subsequent labels to close 188 | # (new) holes in the codec. 189 | offset_rm_labels = [v-idx for idx, v in enumerate(sorted(set(rm_labels)))] 190 | for rlabel in offset_rm_labels: 191 | c2l_cand = {k: [l-1 if l > rlabel else l for l in v] for k, v in c2l_cand.items()} 192 | # add mappings not in original codec 193 | add_list = {cseq: enc for cseq, enc in codec.c2l.items() if cseq not in self.c2l} 194 | # renumber 195 | start_idx = max(label for v in c2l_cand.values() for label in v) + 1 196 | add_labels = {k: v for v, k in enumerate(sorted(set(label for v in add_list.values() for label in v)), start_idx)} 197 | for k, v in add_list.items(): 198 | c2l_cand[k] = [add_labels[label] for label in v] 199 | return PytorchCodec(c2l_cand), set(rm_labels) 200 | -------------------------------------------------------------------------------- /kraken/serialization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2015 Benjamin Kiessling 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | from jinja2 import Environment, PackageLoader 17 | 18 | import regex 19 | import logging 20 | import unicodedata 21 | 22 | from collections import Counter 23 | 24 | from kraken.rpred import ocr_record 25 | from kraken.lib.util import make_printable 26 | 27 | from typing import List, Tuple, Iterable, Optional, Sequence 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | __all__ = ['serialize'] 32 | 33 | 34 | def _rescale(val: Sequence[float], low: float, high: float) -> List[float]: 35 | """ 36 | Rescales a list of confidence value between 0 and 1 to an interval [low, 37 | high]. 38 | 39 | Args: 40 | val (float): List of values in interval (0,1) 41 | low (float): Lower bound of rescaling interval 42 | high (float): Upper bound of rescaling interval 43 | 44 | Returns: 45 | Rescaled value (float). 46 | """ 47 | return [(high - low) * x + low for x in val] 48 | 49 | 50 | def max_bbox(boxes: Iterable[Tuple[int, int, int, int]]) -> Tuple[int, int, int, int]: 51 | """ 52 | Calculates the minimal bounding box containing all boxes contained in an 53 | iterator. 54 | 55 | Args: 56 | boxes (iterator): An iterator returning tuples of the format (x0, y0, 57 | x1, y1) 58 | Returns: 59 | A box covering all bounding boxes in the input argument 60 | """ 61 | # XXX: fix type hinting 62 | sbox = list(map(sorted, list(zip(*boxes)))) 63 | return (sbox[0][0], sbox[1][0], sbox[2][-1], sbox[3][-1]) # type: ignore 64 | 65 | 66 | def serialize(records: Sequence[ocr_record], 67 | image_name: str = None, 68 | image_size: Tuple[int, int] = (0, 0), 69 | writing_mode: str = 'horizontal-tb', 70 | scripts: Optional[Iterable[str]] = None, 71 | template: str = 'hocr') -> str: 72 | """ 73 | Serializes a list of ocr_records into an output document. 74 | 75 | Serializes a list of predictions and their corresponding positions by doing 76 | some hOCR-specific preprocessing and then renders them through one of 77 | several jinja2 templates. 78 | 79 | Note: Empty records are ignored for serialization purposes. 80 | 81 | Args: 82 | records (iterable): List of kraken.rpred.ocr_record 83 | image_name (str): Name of the source image 84 | image_size (tuple): Dimensions of the source image 85 | writing_mode (str): Sets the principal layout of lines and the 86 | direction in which blocks progress. Valid values 87 | are horizontal-tb, vertical-rl, and 88 | vertical-lr. 89 | scripts (list): List of scripts contained in the OCR records 90 | template (str): Selector for the serialization format. May be 91 | 'hocr' or 'alto'. 92 | 93 | Returns: 94 | (str) rendered template. 95 | """ 96 | logger.info('Serialize {} records from {} with template {}.'.format(len(records), image_name, template)) 97 | page = {'lines': [], 'size': image_size, 'name': image_name, 'writing_mode': writing_mode, 'scripts': scripts} # type: dict 98 | seg_idx = 0 99 | char_idx = 0 100 | for idx, record in enumerate(records): 101 | # skip empty records 102 | if not record.prediction: 103 | logger.debug('Empty record. Skipping') 104 | continue 105 | line = {'index': idx, 106 | 'bbox': max_bbox(record.cuts), 107 | 'cuts': record.cuts, 108 | 'confidences': record.confidences, 109 | 'recognition': [] 110 | } 111 | splits = regex.split(r'(\s+)', record.prediction) 112 | line_offset = 0 113 | logger.debug('Record contains {} segments'.format(len(splits))) 114 | for segment in splits: 115 | if len(segment) == 0: 116 | continue 117 | seg_bbox = max_bbox(record.cuts[line_offset:line_offset + len(segment)]) 118 | 119 | line['recognition'].extend([{'bbox': seg_bbox, 120 | 'confidences': record.confidences[line_offset:line_offset + len(segment)], 121 | 'cuts': record.cuts[line_offset:line_offset + len(segment)], 122 | 'text': segment, 123 | 'recognition': [{'bbox': cut, 'confidence': conf, 'text': char, 'index': cid} 124 | for conf, cut, char, cid in 125 | zip(record.confidences[line_offset:line_offset + len(segment)], 126 | record.cuts[line_offset:line_offset + len(segment)], 127 | segment, 128 | range(char_idx, char_idx + len(segment)))], 129 | 'index': seg_idx}]) 130 | char_idx += len(segment) 131 | seg_idx += 1 132 | line_offset += len(segment) 133 | page['lines'].append(line) 134 | logger.debug('Initializing jinja environment.') 135 | env = Environment(loader=PackageLoader('kraken', 'templates'), 136 | trim_blocks=True, 137 | lstrip_blocks=True, 138 | autoescape=True) 139 | env.tests['whitespace'] = str.isspace 140 | env.filters['rescale'] = _rescale 141 | logger.debug('Retrieving template.') 142 | tmpl = env.get_template(template) 143 | logger.debug('Rendering data.') 144 | return tmpl.render(page=page) 145 | 146 | 147 | def render_report(model: str, 148 | chars: int, 149 | errors: int, 150 | char_confusions: Counter, 151 | scripts: Counter, 152 | insertions: Counter, 153 | deletions: int, 154 | substitutions: Counter) -> str: 155 | """ 156 | Renders an accuracy report. 157 | 158 | Args: 159 | model (str): Model name. 160 | errors (int): Number of errors on test set. 161 | char_confusions (dict): Dictionary mapping a tuple (gt, pred) to a 162 | number of occurrences. 163 | scripts (dict): Dictionary counting character per script. 164 | insertions (dict): Dictionary counting insertion operations per Unicode 165 | script 166 | deletions (int): Number of deletions 167 | substitutions (dict): Dictionary counting substitution operations per 168 | Unicode script. 169 | 170 | Returns: 171 | A string containing the rendered report. 172 | """ 173 | logger.info('Serializing report for {}'.format(model)) 174 | 175 | report = {'model': model, 176 | 'chars': chars, 177 | 'errors': errors, 178 | 'accuracy': (chars-errors)/chars * 100, 179 | 'insertions': sum(insertions.values()), 180 | 'deletions': deletions, 181 | 'substitutions': sum(substitutions.values()), 182 | 'scripts': sorted([{'script': k, 183 | 'count': v, 184 | 'errors': insertions[k] + substitutions[k], 185 | 'accuracy': 100 * (v-(insertions[k] + substitutions[k]))/v} for k, v in scripts.items()], 186 | key=lambda x: x['accuracy'], 187 | reverse=True), 188 | 'counts': sorted([{'correct': make_printable(k[0]), 189 | 'generated': make_printable(k[1]), 190 | 'errors': v} for k, v in char_confusions.items() if k[0] != k[1]], 191 | key=lambda x: x['errors'], 192 | reverse=True)} 193 | logger.debug('Initializing jinja environment.') 194 | env = Environment(loader=PackageLoader('kraken', 'templates'), 195 | trim_blocks=True, 196 | lstrip_blocks=True, 197 | autoescape=True) 198 | logger.debug('Retrieving template.') 199 | tmpl = env.get_template('report') 200 | logger.debug('Rendering data.') 201 | return tmpl.render(report=report) 202 | 203 | -------------------------------------------------------------------------------- /kraken/lib/clstm_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: clstm.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='clstm.proto', 20 | package='clstm', 21 | syntax='proto2', 22 | serialized_pb=_b('\n\x0b\x63lstm.proto\x12\x05\x63lstm\"&\n\x08KeyValue\x12\x0b\n\x03key\x18\x01 \x02(\t\x12\r\n\x05value\x18\x02 \x02(\t\"1\n\x05\x41rray\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0b\n\x03\x64im\x18\x02 \x03(\x05\x12\r\n\x05value\x18\x03 \x03(\x02\"\xcf\x01\n\x0cNetworkProto\x12\x0c\n\x04kind\x18\x01 \x02(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06ninput\x18\n \x02(\x05\x12\x0f\n\x07noutput\x18\x0b \x02(\x05\x12\x0e\n\x06icodec\x18\x0c \x03(\x05\x12\r\n\x05\x63odec\x18\r \x03(\x05\x12\"\n\tattribute\x18\x14 \x03(\x0b\x32\x0f.clstm.KeyValue\x12\x1d\n\x07weights\x18\x1e \x03(\x0b\x32\x0c.clstm.Array\x12 \n\x03sub\x18( \x03(\x0b\x32\x13.clstm.NetworkProto') 23 | ) 24 | 25 | 26 | 27 | 28 | _KEYVALUE = _descriptor.Descriptor( 29 | name='KeyValue', 30 | full_name='clstm.KeyValue', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | containing_type=None, 34 | fields=[ 35 | _descriptor.FieldDescriptor( 36 | name='key', full_name='clstm.KeyValue.key', index=0, 37 | number=1, type=9, cpp_type=9, label=2, 38 | has_default_value=False, default_value=_b("").decode('utf-8'), 39 | message_type=None, enum_type=None, containing_type=None, 40 | is_extension=False, extension_scope=None, 41 | options=None), 42 | _descriptor.FieldDescriptor( 43 | name='value', full_name='clstm.KeyValue.value', index=1, 44 | number=2, type=9, cpp_type=9, label=2, 45 | has_default_value=False, default_value=_b("").decode('utf-8'), 46 | message_type=None, enum_type=None, containing_type=None, 47 | is_extension=False, extension_scope=None, 48 | options=None), 49 | ], 50 | extensions=[ 51 | ], 52 | nested_types=[], 53 | enum_types=[ 54 | ], 55 | options=None, 56 | is_extendable=False, 57 | syntax='proto2', 58 | extension_ranges=[], 59 | oneofs=[ 60 | ], 61 | serialized_start=22, 62 | serialized_end=60, 63 | ) 64 | 65 | 66 | _ARRAY = _descriptor.Descriptor( 67 | name='Array', 68 | full_name='clstm.Array', 69 | filename=None, 70 | file=DESCRIPTOR, 71 | containing_type=None, 72 | fields=[ 73 | _descriptor.FieldDescriptor( 74 | name='name', full_name='clstm.Array.name', index=0, 75 | number=1, type=9, cpp_type=9, label=1, 76 | has_default_value=False, default_value=_b("").decode('utf-8'), 77 | message_type=None, enum_type=None, containing_type=None, 78 | is_extension=False, extension_scope=None, 79 | options=None), 80 | _descriptor.FieldDescriptor( 81 | name='dim', full_name='clstm.Array.dim', index=1, 82 | number=2, type=5, cpp_type=1, label=3, 83 | has_default_value=False, default_value=[], 84 | message_type=None, enum_type=None, containing_type=None, 85 | is_extension=False, extension_scope=None, 86 | options=None), 87 | _descriptor.FieldDescriptor( 88 | name='value', full_name='clstm.Array.value', index=2, 89 | number=3, type=2, cpp_type=6, label=3, 90 | has_default_value=False, default_value=[], 91 | message_type=None, enum_type=None, containing_type=None, 92 | is_extension=False, extension_scope=None, 93 | options=None), 94 | ], 95 | extensions=[ 96 | ], 97 | nested_types=[], 98 | enum_types=[ 99 | ], 100 | options=None, 101 | is_extendable=False, 102 | syntax='proto2', 103 | extension_ranges=[], 104 | oneofs=[ 105 | ], 106 | serialized_start=62, 107 | serialized_end=111, 108 | ) 109 | 110 | 111 | _NETWORKPROTO = _descriptor.Descriptor( 112 | name='NetworkProto', 113 | full_name='clstm.NetworkProto', 114 | filename=None, 115 | file=DESCRIPTOR, 116 | containing_type=None, 117 | fields=[ 118 | _descriptor.FieldDescriptor( 119 | name='kind', full_name='clstm.NetworkProto.kind', index=0, 120 | number=1, type=9, cpp_type=9, label=2, 121 | has_default_value=False, default_value=_b("").decode('utf-8'), 122 | message_type=None, enum_type=None, containing_type=None, 123 | is_extension=False, extension_scope=None, 124 | options=None), 125 | _descriptor.FieldDescriptor( 126 | name='name', full_name='clstm.NetworkProto.name', index=1, 127 | number=2, type=9, cpp_type=9, label=1, 128 | has_default_value=False, default_value=_b("").decode('utf-8'), 129 | message_type=None, enum_type=None, containing_type=None, 130 | is_extension=False, extension_scope=None, 131 | options=None), 132 | _descriptor.FieldDescriptor( 133 | name='ninput', full_name='clstm.NetworkProto.ninput', index=2, 134 | number=10, type=5, cpp_type=1, label=2, 135 | has_default_value=False, default_value=0, 136 | message_type=None, enum_type=None, containing_type=None, 137 | is_extension=False, extension_scope=None, 138 | options=None), 139 | _descriptor.FieldDescriptor( 140 | name='noutput', full_name='clstm.NetworkProto.noutput', index=3, 141 | number=11, type=5, cpp_type=1, label=2, 142 | has_default_value=False, default_value=0, 143 | message_type=None, enum_type=None, containing_type=None, 144 | is_extension=False, extension_scope=None, 145 | options=None), 146 | _descriptor.FieldDescriptor( 147 | name='icodec', full_name='clstm.NetworkProto.icodec', index=4, 148 | number=12, type=5, cpp_type=1, label=3, 149 | has_default_value=False, default_value=[], 150 | message_type=None, enum_type=None, containing_type=None, 151 | is_extension=False, extension_scope=None, 152 | options=None), 153 | _descriptor.FieldDescriptor( 154 | name='codec', full_name='clstm.NetworkProto.codec', index=5, 155 | number=13, type=5, cpp_type=1, label=3, 156 | has_default_value=False, default_value=[], 157 | message_type=None, enum_type=None, containing_type=None, 158 | is_extension=False, extension_scope=None, 159 | options=None), 160 | _descriptor.FieldDescriptor( 161 | name='attribute', full_name='clstm.NetworkProto.attribute', index=6, 162 | number=20, type=11, cpp_type=10, label=3, 163 | has_default_value=False, default_value=[], 164 | message_type=None, enum_type=None, containing_type=None, 165 | is_extension=False, extension_scope=None, 166 | options=None), 167 | _descriptor.FieldDescriptor( 168 | name='weights', full_name='clstm.NetworkProto.weights', index=7, 169 | number=30, type=11, cpp_type=10, label=3, 170 | has_default_value=False, default_value=[], 171 | message_type=None, enum_type=None, containing_type=None, 172 | is_extension=False, extension_scope=None, 173 | options=None), 174 | _descriptor.FieldDescriptor( 175 | name='sub', full_name='clstm.NetworkProto.sub', index=8, 176 | number=40, type=11, cpp_type=10, label=3, 177 | has_default_value=False, default_value=[], 178 | message_type=None, enum_type=None, containing_type=None, 179 | is_extension=False, extension_scope=None, 180 | options=None), 181 | ], 182 | extensions=[ 183 | ], 184 | nested_types=[], 185 | enum_types=[ 186 | ], 187 | options=None, 188 | is_extendable=False, 189 | syntax='proto2', 190 | extension_ranges=[], 191 | oneofs=[ 192 | ], 193 | serialized_start=114, 194 | serialized_end=321, 195 | ) 196 | 197 | _NETWORKPROTO.fields_by_name['attribute'].message_type = _KEYVALUE 198 | _NETWORKPROTO.fields_by_name['weights'].message_type = _ARRAY 199 | _NETWORKPROTO.fields_by_name['sub'].message_type = _NETWORKPROTO 200 | DESCRIPTOR.message_types_by_name['KeyValue'] = _KEYVALUE 201 | DESCRIPTOR.message_types_by_name['Array'] = _ARRAY 202 | DESCRIPTOR.message_types_by_name['NetworkProto'] = _NETWORKPROTO 203 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 204 | 205 | KeyValue = _reflection.GeneratedProtocolMessageType('KeyValue', (_message.Message,), dict( 206 | DESCRIPTOR = _KEYVALUE, 207 | __module__ = 'clstm_pb2' 208 | # @@protoc_insertion_point(class_scope:clstm.KeyValue) 209 | )) 210 | _sym_db.RegisterMessage(KeyValue) 211 | 212 | Array = _reflection.GeneratedProtocolMessageType('Array', (_message.Message,), dict( 213 | DESCRIPTOR = _ARRAY, 214 | __module__ = 'clstm_pb2' 215 | # @@protoc_insertion_point(class_scope:clstm.Array) 216 | )) 217 | _sym_db.RegisterMessage(Array) 218 | 219 | NetworkProto = _reflection.GeneratedProtocolMessageType('NetworkProto', (_message.Message,), dict( 220 | DESCRIPTOR = _NETWORKPROTO, 221 | __module__ = 'clstm_pb2' 222 | # @@protoc_insertion_point(class_scope:clstm.NetworkProto) 223 | )) 224 | _sym_db.RegisterMessage(NetworkProto) 225 | 226 | 227 | # @@protoc_insertion_point(module_scope) 228 | -------------------------------------------------------------------------------- /docs/advanced.rst: -------------------------------------------------------------------------------- 1 | .. _advanced: 2 | 3 | Advanced Usage 4 | ============== 5 | 6 | Optical character recognition is the serial execution of multiple steps, in the 7 | case of kraken binarization (converting color and grayscale images into bitonal 8 | ones), layout analysis/page segmentation (extracting topological text lines 9 | from an image), recognition (feeding text lines images into an classifiers), 10 | and finally serialization of results into an appropriate format such as hOCR or 11 | ALTO. 12 | 13 | Input Specification 14 | ------------------- 15 | 16 | All kraken subcommands operating on input-output pairs, i.e. producing one 17 | output document for one input document follow the basic syntax: 18 | 19 | .. code-block:: console 20 | 21 | $ kraken -i input_1 output_1 -i input_2 output_2 ... subcommand_1 subcommand_2 ... subcommand_n 22 | 23 | In particular subcommands may be chained. 24 | 25 | Binarization 26 | ------------ 27 | 28 | The binarization subcommand accepts almost the same parameters as 29 | ``ocropus-nlbin``. Only options not related to binarization, e.g. skew 30 | detection are missing. In addition, error checking (image sizes, inversion 31 | detection, grayscale enforcement) is always disabled and kraken will happily 32 | binarize any image that is thrown at it. 33 | 34 | Available parameters are: 35 | 36 | =========== ==== 37 | option type 38 | =========== ==== 39 | --threshold FLOAT 40 | --zoom FLOAT 41 | --escale FLOAT 42 | --border FLOAT 43 | --perc INTEGER RANGE 44 | --range INTEGER 45 | --low INTEGER RANGE 46 | --high INTEGER RANGE 47 | =========== ==== 48 | 49 | Page Segmentation and Script Detection 50 | -------------------------------------- 51 | 52 | The `segment` subcommand access two operations page segmentation into lines and 53 | script detection of those lines. 54 | 55 | Page segmentation is mostly parameterless, although a switch to change the 56 | color of column separators has been retained. The segmentation is written as a 57 | `JSON `_ file containing bounding boxes in reading order and 58 | the general text direction (horizontal, i.e. LTR or RTL text in top-to-bottom 59 | reading order or vertical-ltr/rtl for vertical lines read from left-to-right or 60 | right-to-left). 61 | 62 | The script detection splits extracted lines from the segmenter into strip 63 | sharing a particular script that can then be recognized by supplying 64 | appropriate models for each detected script to the `ocr` subcommand. 65 | 66 | Combined output from both consists of lists in the `boxes` field corresponding 67 | to a topographical line and containing one or more bounding boxes of a 68 | particular script. Identifiers are `ISO 15924 69 | `_ 4 character codes. 70 | 71 | .. code-block:: console 72 | 73 | $ kraken -i 14.tif lines.txt segment 74 | $ cat lines.json 75 | { 76 | "boxes" : [ 77 | [ 78 | ["Grek", [561, 216, 1626,309]] 79 | ], 80 | [ 81 | ["Latn", [2172, 197, 2424, 244]] 82 | ], 83 | [ 84 | ["Grek", [1678, 221, 2236, 320]], 85 | ["Arab", [2241, 221, 2302, 320]] 86 | ], 87 | 88 | ["Grek", [412, 318, 2215, 416]], 89 | ["Latn", [2208, 318, 2424, 416]] 90 | ], 91 | ... 92 | ], 93 | "text_direction" : "horizontal-tb" 94 | } 95 | 96 | Script detection is automatically enabled; by explicitly disabling script 97 | detection the `boxes` field will contain only a list of line bounding boxes: 98 | 99 | .. code-block:: console 100 | 101 | [546, 216, 1626, 309], 102 | [2169, 197, 2423, 244], 103 | [1676, 221, 2293, 320], 104 | ... 105 | [503, 2641, 848, 2681] 106 | 107 | Available page segmentation parameters are: 108 | 109 | =============================================== ====== 110 | option action 111 | =============================================== ====== 112 | -d, --text-direction Sets principal text direction. Valid values are `horizontal-lr`, `horizontal-rl`, `vertical-lr`, and `vertical-rl`. 113 | --scale FLOAT Estimate of the average line height on the page 114 | -m, --maxcolseps Maximum number of columns in the input document. Set to `0` for uni-column layouts. 115 | -b, --black-colseps / -w, --white-colseps Switch to black column separators. 116 | -r, --remove-hlines / -l, --hlines Disables prefiltering of small horizontal lines. Improves segmenter output on some Arabic texts. 117 | =============================================== ====== 118 | 119 | The parameters specific to the script identification are: 120 | 121 | =============================================== ====== 122 | option action 123 | =============================================== ====== 124 | -s/-n Enables/disables script detection 125 | -a, --allowed-script Whitelists specific scripts for detection output. Other detected script runs are merged with their adjacent scripts, after a heuristic pre-merging step. 126 | =============================================== ====== 127 | 128 | Model Repository 129 | ---------------- 130 | 131 | There is a semi-curated `repository 132 | `_ of freely licensed recognition 133 | models that can be accessed from the command line using a few subcommands. For 134 | evaluating a series of models it is also possible to just clone the repository 135 | using the normal git client. 136 | 137 | The ``list`` subcommand retrieves a list of all models available and prints 138 | them including some additional information (identifier, type, and a short 139 | description): 140 | 141 | .. code-block:: console 142 | 143 | $ kraken list 144 | Retrieving model list ✓ 145 | default (pyrnn) - A converted version of en-default.pyrnn.gz 146 | toy (clstm) - A toy model trained on 400 lines of the UW3 data set. 147 | ... 148 | 149 | To access more detailed information the ``show`` subcommand may be used: 150 | 151 | .. code-block:: console 152 | 153 | $ kraken show toy 154 | name: toy.clstm 155 | 156 | A toy model trained on 400 lines of the UW3 data set. 157 | 158 | author: Benjamin Kiessling (mittagessen@l.unchti.me) 159 | http://kraken.re 160 | 161 | If a suitable model has been decided upon it can be retrieved using the ``get`` 162 | subcommand: 163 | 164 | .. code-block:: console 165 | 166 | $ kraken get toy 167 | Retrieving model ✓ 168 | 169 | Models will be placed in $XDG_BASE_DIR and can be accessed using their name as 170 | shown by the ``show`` command, e.g.: 171 | 172 | .. code-block:: console 173 | 174 | $ kraken -i ... ... ocr -m toy 175 | 176 | Additions and updates to existing models are always welcome! Just open a pull 177 | request or write an email. 178 | 179 | Recognition 180 | ----------- 181 | 182 | Recognition requires a grey-scale or binarized image, a page segmentation for 183 | that image, and a model file. In particular there is no requirement to use the 184 | page segmentation algorithm contained in the ``segment`` subcommand or the 185 | binarization provided by kraken. 186 | 187 | Multi-script recognition is possible by supplying a script-annotated 188 | segmentation and a mapping between scripts and models: 189 | 190 | .. code-block:: console 191 | 192 | $ kraken -i ... ... ocr -m Grek:porson.clstm -m Latn:antiqua.clstm 193 | 194 | All polytonic Greek text portions will be recognized using the `porson.clstm` 195 | model while Latin text will be fed into the `antiqua.clstm` model. It is 196 | possible to define a fallback model that other text will be fed to: 197 | 198 | .. code-block:: console 199 | 200 | $ kraken -i ... ... ocr -m ... -m ... -m default:porson.clstm 201 | 202 | It is also possible to disable recognition on a particular script by mapping to 203 | the special model keyword `ignore`. Ignored lines will still be serialized but 204 | will not contain any recognition results. 205 | 206 | The ``ocr`` subcommand is able to serialize the recognition results either as 207 | plain text (default), as `hOCR `_, into `ALTO 208 | `_, or abbyyXML containing additional 209 | metadata such as bounding boxes and confidences: 210 | 211 | .. code-block:: console 212 | 213 | $ kraken -i ... ... ocr -t # text output 214 | $ kraken -i ... ... ocr -h # hOCR output 215 | $ kraken -i ... ... ocr -a # ALTO output 216 | $ kraken -i ... ... ocr -y # abbyyXML output 217 | 218 | hOCR output is slightly different from hOCR files produced by ocropus. Each 219 | ``ocr_line`` span contains not only the bounding box of the line but also 220 | character boxes (``x_bboxes`` attribute) indicating the coordinates of each 221 | character. In each line alternating sequences of alphanumeric and 222 | non-alphanumeric (in the unicode sense) characters are put into ``ocrx_word`` 223 | spans. Both have bounding boxes as attributes and the recognition confidence 224 | for each character in the ``x_conf`` attribute. 225 | 226 | Paragraph detection has been removed as it was deemed to be unduly dependent on 227 | certain typographic features which may not be valid for your input. 228 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # kraken documentation build configuration file, created by 4 | # sphinx-quickstart on Fri May 22 16:51:45 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | from __future__ import absolute_import 16 | 17 | import sys 18 | import os 19 | import shlex 20 | 21 | from subprocess import Popen, PIPE 22 | # If extensions (or modules to document with autodoc) are in another directory, 23 | # add these directories to sys.path here. If the directory is relative to the 24 | # documentation root, use os.path.abspath to make it absolute, like shown here. 25 | #sys.path.insert(0, os.path.abspath('../kraken')) 26 | 27 | # -- General configuration ------------------------------------------------ 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | #needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.napoleon', 38 | ] 39 | 40 | # Add any paths that contain templates here, relative to this directory. 41 | templates_path = ['_templates'] 42 | 43 | # The suffix(es) of source filenames. 44 | # You can specify multiple suffix as a list of string: 45 | # source_suffix = ['.rst', '.md'] 46 | source_suffix = '.rst' 47 | 48 | # The encoding of source files. 49 | #source_encoding = 'utf-8-sig' 50 | 51 | # The master toctree document. 52 | master_doc = 'index' 53 | 54 | # General information about the project. 55 | project = u'kraken' 56 | copyright = u'2015, mittagessen' 57 | author = u'mittagessen' 58 | 59 | # The version info for the project you're documenting, acts as replacement for 60 | # |version| and |release|, also used in various other places throughout the 61 | # built documents. 62 | # 63 | # The short X.Y version. 64 | pipe = Popen('git describe --tags --always master', stdout=PIPE, shell=True) 65 | version = pipe.stdout.read().decode('utf-8') 66 | release = version 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = None 74 | 75 | # There are two options for replacing |today|: either, you set today to some 76 | # non-false value, then it is used: 77 | #today = '' 78 | # Else, today_fmt is used as the format for a strftime call. 79 | #today_fmt = '%B %d, %Y' 80 | 81 | # List of patterns, relative to source directory, that match files and 82 | # directories to ignore when looking for source files. 83 | exclude_patterns = ['_build'] 84 | 85 | # The reST default role (used for this markup: `text`) to use for all 86 | # documents. 87 | #default_role = None 88 | 89 | # If true, '()' will be appended to :func: etc. cross-reference text. 90 | #add_function_parentheses = True 91 | 92 | # If true, the current module name will be prepended to all description 93 | # unit titles (such as .. function::). 94 | #add_module_names = True 95 | 96 | # If true, sectionauthor and moduleauthor directives will be shown in the 97 | # output. They are ignored by default. 98 | #show_authors = False 99 | 100 | # The name of the Pygments (syntax highlighting) style to use. 101 | pygments_style = 'sphinx' 102 | 103 | # A list of ignored prefixes for module index sorting. 104 | #modindex_common_prefix = [] 105 | 106 | # If true, keep warnings as "system message" paragraphs in the built documents. 107 | #keep_warnings = False 108 | 109 | # If true, `todo` and `todoList` produce output, else they produce nothing. 110 | todo_include_todos = False 111 | 112 | 113 | # -- Options for HTML output ---------------------------------------------- 114 | 115 | # The theme to use for HTML and HTML Help pages. See the documentation for 116 | # a list of builtin themes. 117 | html_theme = 'alabaster' 118 | 119 | # Theme options are theme-specific and customize the look and feel of a theme 120 | # further. For a list of options available for each theme, see the 121 | # documentation. 122 | html_theme_options = { 123 | 'github_user': 'mittagessen', 124 | 'github_repo': 'kraken', 125 | 'travis_button': 'true', 126 | } 127 | 128 | # Add any paths that contain custom themes here, relative to this directory. 129 | #html_theme_path = [] 130 | 131 | # The name for this set of Sphinx documents. If None, it defaults to 132 | # " v documentation". 133 | #html_title = None 134 | 135 | # A shorter title for the navigation bar. Default is the same as html_title. 136 | #html_short_title = None 137 | 138 | # The name of an image file (relative to this directory) to place at the top 139 | # of the sidebar. 140 | html_logo = '_static/kraken.png' 141 | 142 | # The name of an image file (within the static path) to use as favicon of the 143 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 144 | # pixels large. 145 | #html_favicon = None 146 | 147 | # Add any paths that contain custom static files (such as style sheets) here, 148 | # relative to this directory. They are copied after the builtin static files, 149 | # so a file named "default.css" will overwrite the builtin "default.css". 150 | html_static_path = ['_static'] 151 | 152 | # Add any extra paths that contain custom files (such as robots.txt or 153 | # .htaccess) here, relative to this directory. These files are copied 154 | # directly to the root of the documentation. 155 | #html_extra_path = [] 156 | 157 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 158 | # using the given strftime format. 159 | #html_last_updated_fmt = '%b %d, %Y' 160 | 161 | # If true, SmartyPants will be used to convert quotes and dashes to 162 | # typographically correct entities. 163 | #html_use_smartypants = True 164 | 165 | # Custom sidebar templates, maps document names to template names. 166 | html_sidebars = { 167 | 'index': ['sidebarintro.html', 'navigation.html', 'searchbox.html', 'versions.html'], 168 | '**': ['localtoc.html', 'relations.html', 'searchbox.html'] 169 | } 170 | 171 | # Additional templates that should be rendered to pages, maps page names to 172 | # template names. 173 | #html_additional_pages = {} 174 | 175 | # If false, no module index is generated. 176 | #html_domain_indices = True 177 | 178 | # If false, no index is generated. 179 | #html_use_index = True 180 | 181 | # If true, the index is split into individual pages for each letter. 182 | #html_split_index = False 183 | 184 | # If true, links to the reST sources are added to the pages. 185 | #html_show_sourcelink = True 186 | 187 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 188 | #html_show_sphinx = True 189 | 190 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 191 | #html_show_copyright = True 192 | 193 | # If true, an OpenSearch description file will be output, and all pages will 194 | # contain a tag referring to it. The value of this option must be the 195 | # base URL from which the finished HTML is served. 196 | #html_use_opensearch = '' 197 | 198 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 199 | #html_file_suffix = None 200 | 201 | # Language to be used for generating the HTML full-text search index. 202 | # Sphinx supports the following languages: 203 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 204 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' 205 | #html_search_language = 'en' 206 | 207 | # A dictionary with options for the search language support, empty by default. 208 | # Now only 'ja' uses this config value 209 | #html_search_options = {'type': 'default'} 210 | 211 | # The name of a javascript file (relative to the configuration directory) that 212 | # implements a search results scorer. If empty, the default will be used. 213 | #html_search_scorer = 'scorer.js' 214 | 215 | # Output file base name for HTML help builder. 216 | htmlhelp_basename = 'krakendoc' 217 | 218 | # -- Options for LaTeX output --------------------------------------------- 219 | 220 | latex_elements = { 221 | # The paper size ('letterpaper' or 'a4paper'). 222 | #'papersize': 'letterpaper', 223 | 224 | # The font size ('10pt', '11pt' or '12pt'). 225 | #'pointsize': '10pt', 226 | 227 | # Additional stuff for the LaTeX preamble. 228 | #'preamble': '', 229 | 230 | # Latex figure (float) alignment 231 | #'figure_align': 'htbp', 232 | } 233 | 234 | # Grouping the document tree into LaTeX files. List of tuples 235 | # (source start file, target name, title, 236 | # author, documentclass [howto, manual, or own class]). 237 | latex_documents = [ 238 | (master_doc, 'kraken.tex', u'kraken Documentation', 239 | u'mittagessen', 'manual'), 240 | ] 241 | 242 | # The name of an image file (relative to this directory) to place at the top of 243 | # the title page. 244 | #latex_logo = None 245 | 246 | # For "manual" documents, if this is true, then toplevel headings are parts, 247 | # not chapters. 248 | #latex_use_parts = False 249 | 250 | # If true, show page references after internal links. 251 | #latex_show_pagerefs = False 252 | 253 | # If true, show URL addresses after external links. 254 | #latex_show_urls = False 255 | 256 | # Documents to append as an appendix to all manuals. 257 | #latex_appendices = [] 258 | 259 | # If false, no module index is generated. 260 | #latex_domain_indices = True 261 | 262 | 263 | # -- Options for manual page output --------------------------------------- 264 | 265 | # One entry per manual page. List of tuples 266 | # (source start file, name, description, authors, manual section). 267 | man_pages = [ 268 | (master_doc, 'kraken', u'kraken Documentation', 269 | [author], 1) 270 | ] 271 | 272 | # If true, show URL addresses after external links. 273 | #man_show_urls = False 274 | 275 | 276 | # -- Options for Texinfo output ------------------------------------------- 277 | 278 | # Grouping the document tree into Texinfo files. List of tuples 279 | # (source start file, target name, title, author, 280 | # dir menu entry, description, category) 281 | texinfo_documents = [ 282 | (master_doc, 'kraken', u'kraken Documentation', 283 | author, 'kraken', 'One line description of project.', 284 | 'Miscellaneous'), 285 | ] 286 | 287 | # Documents to append as an appendix to all manuals. 288 | #texinfo_appendices = [] 289 | 290 | # If false, no module index is generated. 291 | #texinfo_domain_indices = True 292 | 293 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 294 | #texinfo_show_urls = 'footnote' 295 | 296 | # If true, do not generate a @detailmenu in the "Top" node's menu. 297 | #texinfo_no_detailmenu = False 298 | 299 | scv_whitelist_branches = ('master',) 300 | import re 301 | scv_whitelist_tags = (re.compile(r'^\d+\.\d+\.0$'),) 302 | 303 | scv_greatest_tag = True 304 | 305 | scv_show_banner = True 306 | scv_banner_greatest_tag = True 307 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /kraken/lib/pyrnn_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: proto/pyrnn.proto 3 | 4 | from google.protobuf import descriptor as _descriptor 5 | from google.protobuf import message as _message 6 | from google.protobuf import reflection as _reflection 7 | from google.protobuf import symbol_database as _symbol_database 8 | from google.protobuf import descriptor_pb2 9 | # @@protoc_insertion_point(imports) 10 | 11 | _sym_db = _symbol_database.Default() 12 | 13 | 14 | 15 | 16 | DESCRIPTOR = _descriptor.FileDescriptor( 17 | name='proto/pyrnn.proto', 18 | package='kraken', 19 | syntax='proto2', 20 | serialized_pb=b'\n\x11proto/pyrnn.proto\x12\x06kraken\"\'\n\x05\x61rray\x12\x0b\n\x03\x64im\x18\x01 \x03(\r\x12\x11\n\x05value\x18\x02 \x03(\x02\x42\x02\x10\x01\"\xca\x01\n\x04lstm\x12\x1a\n\x03wgi\x18\x01 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wgf\x18\x02 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wgo\x18\x03 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wci\x18\x04 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wip\x18\x05 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wfp\x18\x06 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wop\x18\x07 \x02(\x0b\x32\r.kraken.array\"$\n\x07softmax\x12\x19\n\x02w2\x18\x01 \x02(\x0b\x32\r.kraken.array\"\xb1\x01\n\x05pyrnn\x12\x0c\n\x04kind\x18\x01 \x02(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06ninput\x18\n \x02(\r\x12\x0f\n\x07noutput\x18\x0b \x02(\r\x12\r\n\x05\x63odec\x18\x0c \x03(\t\x12\x1c\n\x06\x66wdnet\x18\r \x02(\x0b\x32\x0c.kraken.lstm\x12\x1c\n\x06revnet\x18\x0e \x02(\x0b\x32\x0c.kraken.lstm\x12 \n\x07softmax\x18\x0f \x02(\x0b\x32\x0f.kraken.softmax' 21 | ) 22 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 23 | 24 | 25 | 26 | 27 | _ARRAY = _descriptor.Descriptor( 28 | name='array', 29 | full_name='kraken.array', 30 | filename=None, 31 | file=DESCRIPTOR, 32 | containing_type=None, 33 | fields=[ 34 | _descriptor.FieldDescriptor( 35 | name='dim', full_name='kraken.array.dim', index=0, 36 | number=1, type=13, cpp_type=3, label=3, 37 | has_default_value=False, default_value=[], 38 | message_type=None, enum_type=None, containing_type=None, 39 | is_extension=False, extension_scope=None, 40 | options=None), 41 | _descriptor.FieldDescriptor( 42 | name='value', full_name='kraken.array.value', index=1, 43 | number=2, type=2, cpp_type=6, label=3, 44 | has_default_value=False, default_value=[], 45 | message_type=None, enum_type=None, containing_type=None, 46 | is_extension=False, extension_scope=None, 47 | options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), b'\020\001')), 48 | ], 49 | extensions=[ 50 | ], 51 | nested_types=[], 52 | enum_types=[ 53 | ], 54 | options=None, 55 | is_extendable=False, 56 | syntax='proto2', 57 | extension_ranges=[], 58 | oneofs=[ 59 | ], 60 | serialized_start=29, 61 | serialized_end=68, 62 | ) 63 | 64 | 65 | _LSTM = _descriptor.Descriptor( 66 | name='lstm', 67 | full_name='kraken.lstm', 68 | filename=None, 69 | file=DESCRIPTOR, 70 | containing_type=None, 71 | fields=[ 72 | _descriptor.FieldDescriptor( 73 | name='wgi', full_name='kraken.lstm.wgi', index=0, 74 | number=1, type=11, cpp_type=10, label=2, 75 | has_default_value=False, default_value=None, 76 | message_type=None, enum_type=None, containing_type=None, 77 | is_extension=False, extension_scope=None, 78 | options=None), 79 | _descriptor.FieldDescriptor( 80 | name='wgf', full_name='kraken.lstm.wgf', index=1, 81 | number=2, type=11, cpp_type=10, label=2, 82 | has_default_value=False, default_value=None, 83 | message_type=None, enum_type=None, containing_type=None, 84 | is_extension=False, extension_scope=None, 85 | options=None), 86 | _descriptor.FieldDescriptor( 87 | name='wgo', full_name='kraken.lstm.wgo', index=2, 88 | number=3, type=11, cpp_type=10, label=2, 89 | has_default_value=False, default_value=None, 90 | message_type=None, enum_type=None, containing_type=None, 91 | is_extension=False, extension_scope=None, 92 | options=None), 93 | _descriptor.FieldDescriptor( 94 | name='wci', full_name='kraken.lstm.wci', index=3, 95 | number=4, type=11, cpp_type=10, label=2, 96 | has_default_value=False, default_value=None, 97 | message_type=None, enum_type=None, containing_type=None, 98 | is_extension=False, extension_scope=None, 99 | options=None), 100 | _descriptor.FieldDescriptor( 101 | name='wip', full_name='kraken.lstm.wip', index=4, 102 | number=5, type=11, cpp_type=10, label=2, 103 | has_default_value=False, default_value=None, 104 | message_type=None, enum_type=None, containing_type=None, 105 | is_extension=False, extension_scope=None, 106 | options=None), 107 | _descriptor.FieldDescriptor( 108 | name='wfp', full_name='kraken.lstm.wfp', index=5, 109 | number=6, type=11, cpp_type=10, label=2, 110 | has_default_value=False, default_value=None, 111 | message_type=None, enum_type=None, containing_type=None, 112 | is_extension=False, extension_scope=None, 113 | options=None), 114 | _descriptor.FieldDescriptor( 115 | name='wop', full_name='kraken.lstm.wop', index=6, 116 | number=7, type=11, cpp_type=10, label=2, 117 | has_default_value=False, default_value=None, 118 | message_type=None, enum_type=None, containing_type=None, 119 | is_extension=False, extension_scope=None, 120 | options=None), 121 | ], 122 | extensions=[ 123 | ], 124 | nested_types=[], 125 | enum_types=[ 126 | ], 127 | options=None, 128 | is_extendable=False, 129 | syntax='proto2', 130 | extension_ranges=[], 131 | oneofs=[ 132 | ], 133 | serialized_start=71, 134 | serialized_end=273, 135 | ) 136 | 137 | 138 | _SOFTMAX = _descriptor.Descriptor( 139 | name='softmax', 140 | full_name='kraken.softmax', 141 | filename=None, 142 | file=DESCRIPTOR, 143 | containing_type=None, 144 | fields=[ 145 | _descriptor.FieldDescriptor( 146 | name='w2', full_name='kraken.softmax.w2', index=0, 147 | number=1, type=11, cpp_type=10, label=2, 148 | has_default_value=False, default_value=None, 149 | message_type=None, enum_type=None, containing_type=None, 150 | is_extension=False, extension_scope=None, 151 | options=None), 152 | ], 153 | extensions=[ 154 | ], 155 | nested_types=[], 156 | enum_types=[ 157 | ], 158 | options=None, 159 | is_extendable=False, 160 | syntax='proto2', 161 | extension_ranges=[], 162 | oneofs=[ 163 | ], 164 | serialized_start=275, 165 | serialized_end=311, 166 | ) 167 | 168 | 169 | _PYRNN = _descriptor.Descriptor( 170 | name='pyrnn', 171 | full_name='kraken.pyrnn', 172 | filename=None, 173 | file=DESCRIPTOR, 174 | containing_type=None, 175 | fields=[ 176 | _descriptor.FieldDescriptor( 177 | name='kind', full_name='kraken.pyrnn.kind', index=0, 178 | number=1, type=9, cpp_type=9, label=2, 179 | has_default_value=False, default_value=b"".decode('utf-8'), 180 | message_type=None, enum_type=None, containing_type=None, 181 | is_extension=False, extension_scope=None, 182 | options=None), 183 | _descriptor.FieldDescriptor( 184 | name='name', full_name='kraken.pyrnn.name', index=1, 185 | number=2, type=9, cpp_type=9, label=1, 186 | has_default_value=False, default_value=b"".decode('utf-8'), 187 | message_type=None, enum_type=None, containing_type=None, 188 | is_extension=False, extension_scope=None, 189 | options=None), 190 | _descriptor.FieldDescriptor( 191 | name='ninput', full_name='kraken.pyrnn.ninput', index=2, 192 | number=10, type=13, cpp_type=3, label=2, 193 | has_default_value=False, default_value=0, 194 | message_type=None, enum_type=None, containing_type=None, 195 | is_extension=False, extension_scope=None, 196 | options=None), 197 | _descriptor.FieldDescriptor( 198 | name='noutput', full_name='kraken.pyrnn.noutput', index=3, 199 | number=11, type=13, cpp_type=3, label=2, 200 | has_default_value=False, default_value=0, 201 | message_type=None, enum_type=None, containing_type=None, 202 | is_extension=False, extension_scope=None, 203 | options=None), 204 | _descriptor.FieldDescriptor( 205 | name='codec', full_name='kraken.pyrnn.codec', index=4, 206 | number=12, type=9, cpp_type=9, label=3, 207 | has_default_value=False, default_value=[], 208 | message_type=None, enum_type=None, containing_type=None, 209 | is_extension=False, extension_scope=None, 210 | options=None), 211 | _descriptor.FieldDescriptor( 212 | name='fwdnet', full_name='kraken.pyrnn.fwdnet', index=5, 213 | number=13, type=11, cpp_type=10, label=2, 214 | has_default_value=False, default_value=None, 215 | message_type=None, enum_type=None, containing_type=None, 216 | is_extension=False, extension_scope=None, 217 | options=None), 218 | _descriptor.FieldDescriptor( 219 | name='revnet', full_name='kraken.pyrnn.revnet', index=6, 220 | number=14, type=11, cpp_type=10, label=2, 221 | has_default_value=False, default_value=None, 222 | message_type=None, enum_type=None, containing_type=None, 223 | is_extension=False, extension_scope=None, 224 | options=None), 225 | _descriptor.FieldDescriptor( 226 | name='softmax', full_name='kraken.pyrnn.softmax', index=7, 227 | number=15, type=11, cpp_type=10, label=2, 228 | has_default_value=False, default_value=None, 229 | message_type=None, enum_type=None, containing_type=None, 230 | is_extension=False, extension_scope=None, 231 | options=None), 232 | ], 233 | extensions=[ 234 | ], 235 | nested_types=[], 236 | enum_types=[ 237 | ], 238 | options=None, 239 | is_extendable=False, 240 | syntax='proto2', 241 | extension_ranges=[], 242 | oneofs=[ 243 | ], 244 | serialized_start=314, 245 | serialized_end=491, 246 | ) 247 | 248 | _LSTM.fields_by_name['wgi'].message_type = _ARRAY 249 | _LSTM.fields_by_name['wgf'].message_type = _ARRAY 250 | _LSTM.fields_by_name['wgo'].message_type = _ARRAY 251 | _LSTM.fields_by_name['wci'].message_type = _ARRAY 252 | _LSTM.fields_by_name['wip'].message_type = _ARRAY 253 | _LSTM.fields_by_name['wfp'].message_type = _ARRAY 254 | _LSTM.fields_by_name['wop'].message_type = _ARRAY 255 | _SOFTMAX.fields_by_name['w2'].message_type = _ARRAY 256 | _PYRNN.fields_by_name['fwdnet'].message_type = _LSTM 257 | _PYRNN.fields_by_name['revnet'].message_type = _LSTM 258 | _PYRNN.fields_by_name['softmax'].message_type = _SOFTMAX 259 | DESCRIPTOR.message_types_by_name['array'] = _ARRAY 260 | DESCRIPTOR.message_types_by_name['lstm'] = _LSTM 261 | DESCRIPTOR.message_types_by_name['softmax'] = _SOFTMAX 262 | DESCRIPTOR.message_types_by_name['pyrnn'] = _PYRNN 263 | 264 | array = _reflection.GeneratedProtocolMessageType('array', (_message.Message,), dict( 265 | DESCRIPTOR = _ARRAY, 266 | __module__ = 'proto.pyrnn_pb2' 267 | # @@protoc_insertion_point(class_scope:kraken.array) 268 | )) 269 | _sym_db.RegisterMessage(array) 270 | 271 | lstm = _reflection.GeneratedProtocolMessageType('lstm', (_message.Message,), dict( 272 | DESCRIPTOR = _LSTM, 273 | __module__ = 'proto.pyrnn_pb2' 274 | # @@protoc_insertion_point(class_scope:kraken.lstm) 275 | )) 276 | _sym_db.RegisterMessage(lstm) 277 | 278 | softmax = _reflection.GeneratedProtocolMessageType('softmax', (_message.Message,), dict( 279 | DESCRIPTOR = _SOFTMAX, 280 | __module__ = 'proto.pyrnn_pb2' 281 | # @@protoc_insertion_point(class_scope:kraken.softmax) 282 | )) 283 | _sym_db.RegisterMessage(softmax) 284 | 285 | pyrnn = _reflection.GeneratedProtocolMessageType('pyrnn', (_message.Message,), dict( 286 | DESCRIPTOR = _PYRNN, 287 | __module__ = 'proto.pyrnn_pb2' 288 | # @@protoc_insertion_point(class_scope:kraken.pyrnn) 289 | )) 290 | _sym_db.RegisterMessage(pyrnn) 291 | 292 | 293 | _ARRAY.fields_by_name['value'].has_options = True 294 | _ARRAY.fields_by_name['value']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), b'\020\001') 295 | # @@protoc_insertion_point(module_scope) 296 | -------------------------------------------------------------------------------- /tests/test_codec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | import os 4 | 5 | from future.utils import PY2 6 | from nose.tools import raises 7 | 8 | from torch import IntTensor 9 | 10 | from kraken.lib import codec 11 | from kraken.lib.exceptions import KrakenEncodeException 12 | 13 | class TestCodec(unittest.TestCase): 14 | 15 | """ 16 | Testing codec mapping routines 17 | """ 18 | 19 | def setUp(self): 20 | # codec mapping one code point to one label 21 | self.o2o_codec = codec.PytorchCodec('ab') 22 | # codec mapping many code points to one label 23 | self.m2o_codec = codec.PytorchCodec(['aaa' , 'aa', 'a', 'b']) 24 | # codec mapping one code point to many labels 25 | self.o2m_codec = codec.PytorchCodec({'a': [10, 11, 12], 'b': [12, 45, 80]}) 26 | # codec mapping many code points to many labels 27 | self.m2m_codec = codec.PytorchCodec({'aaa': [10, 11, 12], 'aa': [10, 10], 'a': [10], 'bb': [15], 'b': [12]}) 28 | 29 | self.invalid_c_sequence = 'aaababbcaaa' 30 | self.valid_c_sequence = 'aaababbaaabbbb' 31 | 32 | self.invalid_l_sequence = [(45, 78, 778, 0.3793492615638364), 33 | (10, 203, 859, 0.9485075253700872), 34 | (11, 70, 601, 0.7885297329523855), 35 | (12, 251, 831, 0.7216817042926938), 36 | (900, 72, 950, 0.27609823017048707)] 37 | 38 | def test_o2o_encode(self): 39 | """ 40 | Test correct encoding of one-to-one code point sequence 41 | """ 42 | self.assertTrue(self.o2o_codec.encode(self.valid_c_sequence).eq( 43 | IntTensor([1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2])).all()) 44 | 45 | def test_m2o_encode(self): 46 | """ 47 | Test correct encoding of many-to-one code point sequence 48 | """ 49 | self.assertTrue(self.m2o_codec.encode(self.valid_c_sequence).eq( 50 | IntTensor([3, 4, 1, 4, 4, 3, 4, 4, 4, 4])).all()) 51 | 52 | def test_o2m_encode(self): 53 | """ 54 | Test correct encoding of one-to-many code point sequence 55 | """ 56 | self.assertTrue(self.o2m_codec.encode(self.valid_c_sequence).eq( 57 | IntTensor([10, 11, 12, 10, 11, 12, 10, 11, 12, 58 | 12, 45, 80, 10, 11, 12, 12, 45, 80, 12, 45, 59 | 80, 10, 11, 12, 10, 11, 12, 10, 11, 12, 12, 60 | 45, 80, 12, 45, 80, 12, 45, 80, 12, 45, 61 | 80])).all()) 62 | 63 | def test_m2m_encode(self): 64 | """ 65 | Test correct encoding of many-to-many code point sequence 66 | """ 67 | self.assertTrue(self.m2m_codec.encode(self.valid_c_sequence).eq( 68 | IntTensor([10, 11, 12, 12, 10, 15, 10, 11, 12, 69 | 15, 15])).all()) 70 | 71 | def test_o2o_decode(self): 72 | """ 73 | Test correct decoding of one-to-one label sequence 74 | """ 75 | self.assertEqual(''.join(x[0] for x in self.o2o_codec.decode([(1, 288, 652, 0.8537325587315542), 76 | (1, 120, 861, 0.4968470297302481), 77 | (1, 372, 629, 0.008650773294205938), 78 | (2, 406, 831, 0.15637985875540783), 79 | (1, 3, 824, 0.26475146828232776), 80 | (2, 228, 959, 0.3062689368044844), 81 | (2, 472, 679, 0.8677848554329698), 82 | (1, 482, 771, 0.6055591197109657), 83 | (1, 452, 606, 0.40744265053745055), 84 | (1, 166, 879, 0.7509269177978337), 85 | (2, 92, 729, 0.34554103785480306), 86 | (2, 227, 959, 0.3006394689033981), 87 | (2, 341, 699, 0.07798704843315862), 88 | (2, 142, 513, 0.9933850573241767)])), 89 | 'aaababbaaabbbb') 90 | 91 | def test_m2o_decode(self): 92 | """ 93 | Test correct decoding of many-to-one label sequence 94 | """ 95 | self.assertEqual(''.join(x[0] for x in self.m2o_codec.decode([(3, 28, 967, 0.07761440833942468), 96 | (4, 282, 565, 0.4946281412618093), 97 | (1, 411, 853, 0.7767301050586806), 98 | (4, 409, 501, 0.47915609540996495), 99 | (4, 299, 637, 0.7755889399450564), 100 | (3, 340, 834, 0.726656062406549), 101 | (4, 296, 846, 0.2274859668684881), 102 | (4, 238, 695, 0.32982930128257815), 103 | (4, 187, 970, 0.43354272748701805), 104 | (4, 376, 863, 0.24483897879550764)])), 105 | 'aaababbaaabbbb') 106 | 107 | def test_o2m_decode(self): 108 | """ 109 | Test correct decoding of one-to-many label sequence 110 | """ 111 | self.assertEqual(''.join(x[0] for x in self.o2m_codec.decode([(10, 35, 959, 0.43819571289990644), 112 | (11, 361, 904, 0.1801115018592916), 113 | (12, 15, 616, 0.5987506334315549), 114 | (10, 226, 577, 0.6178248939780698), 115 | (11, 227, 814, 0.31531097360327787), 116 | (12, 390, 826, 0.7706594984014595), 117 | (10, 251, 579, 0.9442530315305507), 118 | (11, 269, 870, 0.4475979925584944), 119 | (12, 456, 609, 0.9396137478409995), 120 | (12, 60, 757, 0.06416607235266458), 121 | (45, 318, 918, 0.8129458423341515), 122 | (80, 15, 914, 0.49773432435726517), 123 | (10, 211, 648, 0.7919220961861382), 124 | (11, 326, 804, 0.7852387442556333), 125 | (12, 93, 978, 0.9376801123379804), 126 | (12, 23, 698, 0.915543635886972), 127 | (45, 71, 599, 0.8137750423628737), 128 | (80, 167, 980, 0.6501035181890226), 129 | (12, 259, 823, 0.3122860659712233), 130 | (45, 312, 948, 0.20582589628806058), 131 | (80, 430, 694, 0.3528792552966924), 132 | (10, 470, 866, 0.0685524032330419), 133 | (11, 459, 826, 0.39354887700146846), 134 | (12, 392, 926, 0.4102018609185847), 135 | (10, 271, 592, 0.1877915301623876), 136 | (11, 206, 995, 0.21614062190981576), 137 | (12, 466, 648, 0.3106914763314057), 138 | (10, 368, 848, 0.28715379701274113), 139 | (11, 252, 962, 0.5535299604896257), 140 | (12, 387, 709, 0.844810014550603), 141 | (12, 156, 916, 0.9803695305965802), 142 | (45, 150, 555, 0.5969071330809561), 143 | (80, 381, 922, 0.5608300913697513), 144 | (12, 35, 762, 0.5227506455088722), 145 | (45, 364, 931, 0.7205481732247938), 146 | (80, 341, 580, 0.536934566913969), 147 | (12, 79, 919, 0.5136066153481802), 148 | (45, 377, 773, 0.6507467790760987), 149 | (80, 497, 931, 0.7635100185309783), 150 | (12, 76, 580, 0.9542477438586341), 151 | (45, 37, 904, 0.4299813924853797), 152 | (80, 425, 638, 0.6825047210425983)])), 153 | 'aaababbaaabbbb') 154 | 155 | def test_m2m_decode(self): 156 | """ 157 | Test correct decoding of many-to-many label sequence 158 | """ 159 | self.assertEqual(''.join(x[0] for x in self.m2m_codec.decode([(10, 313, 788, 0.9379917930525369), 160 | (11, 117, 793, 0.9974374577004185), 161 | (12, 50, 707, 0.020074164253385374), 162 | (12, 382, 669, 0.525910770170754), 163 | (10, 458, 833, 0.4292373233167248), 164 | (15, 45, 831, 0.5759709886686226), 165 | (10, 465, 729, 0.8492104897235935), 166 | (11, 78, 800, 0.24733538459309445), 167 | (12, 375, 872, 0.26908722769105353), 168 | (15, 296, 889, 0.44251812620463726), 169 | (15, 237, 930, 0.5456105208117391)])), 170 | 'aaababbaaabbbb') 171 | 172 | @raises(KrakenEncodeException) 173 | def test_o2o_decode_invalid(self): 174 | """ 175 | Test correct handling of undecodable sequences (one-to-one decoder) 176 | """ 177 | self.o2o_codec.decode(self.invalid_l_sequence) 178 | 179 | @raises(KrakenEncodeException) 180 | def test_m2o_decode_invalid(self): 181 | """ 182 | Test correct handling of undecodable sequences (many-to-one decoder) 183 | """ 184 | self.m2o_codec.decode(self.invalid_l_sequence) 185 | 186 | @raises(KrakenEncodeException) 187 | def test_o2m_decode_invalid(self): 188 | """ 189 | Test correct handling of undecodable sequences (one-to-many decoder) 190 | """ 191 | self.o2m_codec.decode(self.invalid_l_sequence) 192 | 193 | @raises(KrakenEncodeException) 194 | def test_m2m_decode_invalid(self): 195 | """ 196 | Test correct handling of undecodable sequences (many-to-many decoder) 197 | """ 198 | self.m2m_codec.decode(self.invalid_l_sequence) 199 | 200 | @raises(KrakenEncodeException) 201 | def test_o2o_encode_invalid(self): 202 | """ 203 | Test correct handling of unencodable sequences (one-to-one encoder) 204 | """ 205 | self.o2o_codec.encode(self.invalid_c_sequence) 206 | 207 | @raises(KrakenEncodeException) 208 | def test_m2o_encode_invalid(self): 209 | """ 210 | Test correct handling of unencodable sequences (many-to-one encoder) 211 | """ 212 | self.m2o_codec.encode(self.invalid_c_sequence) 213 | 214 | @raises(KrakenEncodeException) 215 | def test_o2m_encode_invalid(self): 216 | """ 217 | Test correct handling of unencodable sequences (one-to-many encoder) 218 | """ 219 | self.o2m_codec.encode(self.invalid_c_sequence) 220 | 221 | @raises(KrakenEncodeException) 222 | def test_m2m_encode_invalid(self): 223 | """ 224 | Test correct handling of unencodable sequences (many-to-many encoder) 225 | """ 226 | self.m2m_codec.encode(self.invalid_c_sequence) 227 | --------------------------------------------------------------------------------