├── kraken
    ├── lib
    │   ├── __init__.py
    │   ├── sl.py
    │   ├── exceptions.py
    │   ├── log.py
    │   ├── util.py
    │   ├── lineest.py
    │   ├── lstm.py
    │   ├── morph.py
    │   ├── models.py
    │   ├── ctc_decoder.py
    │   ├── train.py
    │   ├── codec.py
    │   ├── clstm_pb2.py
    │   └── pyrnn_pb2.py
    ├── script.clstm
    ├── __init__.py
    ├── templates
    │   ├── report
    │   ├── hocr
    │   ├── abbyyxml
    │   ├── style.css
    │   ├── alto
    │   └── layout.html
    ├── contrib
    │   ├── recognition_boxes.py
    │   └── generate_scripts.py
    ├── iso15924.json
    ├── binarization.py
    ├── transcribe.py
    ├── repo.py
    └── serialization.py
├── docs
    ├── _static
    │   └── kraken.png
    ├── gpu.rst
    ├── _templates
    │   └── sidebarintro.html
    ├── api.rst
    ├── models.rst
    ├── index.rst
    ├── vgsl.rst
    ├── Makefile
    ├── make.bat
    ├── advanced.rst
    └── conf.py
├── tests
    ├── resources
    │   ├── bw.png
    │   ├── input.jpg
    │   ├── input.tif
    │   ├── toy.clstm
    │   ├── model.pronn
    │   ├── model.pyrnn.gz
    │   └── segmentation.json
    ├── test_train.py
    ├── test_rpred.py
    ├── test_transcribe.py
    ├── test_pageseg.py
    ├── test_vgsl.py
    ├── test_models.py
    ├── test_binarization.py
    ├── test_serialization.py
    ├── test_layers.py
    └── test_codec.py
├── requirements.txt
├── setup.py
├── environment_cuda.yml
├── environment.yml
├── setup.cfg
├── .gitignore
├── .travis.yml
├── README.rst
└── LICENSE


/kraken/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kraken/script.clstm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/kraken/script.clstm


--------------------------------------------------------------------------------
/docs/_static/kraken.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/docs/_static/kraken.png


--------------------------------------------------------------------------------
/tests/resources/bw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/bw.png


--------------------------------------------------------------------------------
/tests/resources/input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/input.jpg


--------------------------------------------------------------------------------
/tests/resources/input.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/input.tif


--------------------------------------------------------------------------------
/tests/resources/toy.clstm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/toy.clstm


--------------------------------------------------------------------------------
/tests/resources/model.pronn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/model.pronn


--------------------------------------------------------------------------------
/tests/resources/model.pyrnn.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/model.pyrnn.gz


--------------------------------------------------------------------------------
/kraken/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | entry point for kraken functionality
3 | """
4 | 
5 | from __future__ import absolute_import, division, print_function
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | lxml
 2 | future
 3 | requests
 4 | click>=7.0
 5 | numpy
 6 | Pillow
 7 | regex
 8 | scipy
 9 | protobuf>=3.0.0
10 | jinja2
11 | python-bidi
12 | torchvision
13 | torch>=0.4.1
14 | coremltools
15 | 


--------------------------------------------------------------------------------
/docs/gpu.rst:
--------------------------------------------------------------------------------
 1 | .. _gpu:
 2 | 
 3 | GPU Acceleration
 4 | ================
 5 | 
 6 | The latest version of kraken uses a new pytorch backend which enables GPU
 7 | acceleration both for training and recognition. Apart from a compatible Nvidia
 8 | GPU, CUDA and cuDNN have to be installed so pytorch can run computation on it.
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | 
 5 | from setuptools import setup
 6 | 
 7 | setup(
 8 |     include_package_data=True,
 9 |     test_suite="nose.collector",
10 |     tests_require=['nose', 'hocr-spec'],
11 |     setup_requires=['pbr'],
12 |     pbr=True,
13 | )
14 | 


--------------------------------------------------------------------------------
/docs/_templates/sidebarintro.html:
--------------------------------------------------------------------------------
1 | <h3>Useful Links</h3>
2 | <ul>
3 |   <li><a href="http://kraken.re">The Kraken Website</a></li>
4 |   <li><a href="http://pypi.python.org/pypi/kraken">kraken @ PyPI</a></li>
5 |   <li><a href="http://github.com/mittagessen/kraken">kraken @ github</a></li>
6 |   <li><a href="http://github.com/mittagessen/kraken/issues">Issue Tracker</a></li>
7 | </ul>
8 | 


--------------------------------------------------------------------------------
/environment_cuda.yml:
--------------------------------------------------------------------------------
 1 | name: kraken
 2 | channels:
 3 |   - pytorch
 4 |   - fastai
 5 |   - defaults
 6 | dependencies:
 7 |   - python>=3.6
 8 |   - lxml
 9 |   - future
10 |   - regex
11 |   - requests
12 |   - click>=7.0
13 |   - numpy
14 |   - pillow
15 |   - scipy
16 |   - protobuf>=3.0.0
17 |   - jinja2
18 |   - torchvision-nightly
19 |   - pytorch-nightly
20 |   - pip:
21 |     - coremltools
22 |     - python-bidi
23 |     - git+https://github.com/mittagessen/kraken.git@master
24 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: kraken
 2 | channels:
 3 |   - pytorch
 4 |   - fastai
 5 |   - defaults
 6 | dependencies:
 7 |   - python>=3.6
 8 |   - lxml
 9 |   - future
10 |   - regex
11 |   - requests
12 |   - click>=7.0
13 |   - numpy
14 |   - pillow
15 |   - scipy
16 |   - protobuf>=3.0.0
17 |   - jinja2
18 |   - torchvision-nightly-cpu
19 |   - pytorch-nightly-cpu
20 |   - pip:
21 |     - coremltools
22 |     - python-bidi
23 |     - git+https://github.com/mittagessen/kraken.git@master
24 | 


--------------------------------------------------------------------------------
/kraken/templates/report:
--------------------------------------------------------------------------------
 1 | === report {{ report.name }} ===
 2 | 
 3 | {{ report.chars }}	Characters
 4 | {{ report.errors }}	Errors
 5 | {{ '%0.2f'| format(report.accuracy) }}%	Accuracy
 6 | 
 7 | {{ report.insertions }}	Insertions
 8 | {{ report.deletions }}	Deletions
 9 | {{ report.substitutions }}	Substitutions
10 | 
11 | Count	Missed	%Right
12 | {% for script in report.scripts %}
13 | {{ script.count }}	{{ script.errors }}	{{'%0.2f'| format(script.accuracy) }}%	{{ script.script }}
14 | {% endfor %}
15 | 
16 | Errors	Correct-Generated
17 | {% for count in report.counts %}
18 | {{ count.errors }}	{{ '{ ' }}{{ count.correct }}{{ ' }' }} - {{ '{ ' }}{{ count.generated }}{{ ' }' }}
19 | {% endfor %}
20 | 


--------------------------------------------------------------------------------
/kraken/lib/sl.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def dim0(s):
 5 |     """Dimension of the slice list for dimension 0."""
 6 |     return s[0].stop-s[0].start
 7 | 
 8 | 
 9 | def dim1(s):
10 |     """Dimension of the slice list for dimension 1."""
11 |     return s[1].stop-s[1].start
12 | 
13 | 
14 | def area(a):
15 |     """Return the area of the slice list (ignores anything past a[:2]."""
16 |     return np.prod([max(x.stop-x.start, 0) for x in a[:2]])
17 | 
18 | 
19 | def width(s):
20 |     return s[1].stop-s[1].start
21 | 
22 | 
23 | def height(s):
24 |     return s[0].stop-s[0].start
25 | 
26 | 
27 | def aspect(a):
28 |     return height(a)*1.0/width(a)
29 | 
30 | 
31 | def xcenter(s):
32 |     return np.mean([s[1].stop, s[1].start])
33 | 
34 | 
35 | def ycenter(s):
36 |     return np.mean([s[0].stop, s[0].start])
37 | 
38 | 
39 | def center(s):
40 |     return (ycenter(s), xcenter(s))
41 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = kraken
 3 | author = Benjamin Kiessling
 4 | author-email = mittagessen@l.unchti.me
 5 | summary = OCR/HTR engine for all the languages
 6 | home-page = http://kraken.re
 7 | description-file = README.rst
 8 | license = Apache
 9 | classifier =
10 |     Development Status :: 5 - Stable
11 |         Environment :: Console
12 |         Intended Audience :: Science/Research
13 | 	License :: OSI Approved :: Apache Software License
14 |         Operating System :: POSIX
15 | 	Programming Language :: Python :: 3.6
16 | 	Programming Language :: Python :: 3.7
17 | 
18 | keywords =
19 |     ocr
20 |     ocropus
21 | 
22 | [bdist_wheel]
23 | universal = 1
24 | 
25 | [files]
26 | packages = kraken
27 | 
28 | [entry_points]
29 | console_scripts =
30 |     kraken = kraken.kraken:cli
31 |     ketos = kraken.ketos:cli
32 | 
33 | [flake8]
34 | max-line-length = 160
35 | exclude = tests/*
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | [._]*.s[a-w][a-z]
 2 | [._]s[a-w][a-z]
 3 | *.un~
 4 | Session.vim
 5 | .netrwhist
 6 | *~
 7 | # Byte-compiled / optimized / DLL files
 8 | __pycache__/
 9 | *.py[cod]
10 | *$py.class
11 | 
12 | # C extensions
13 | *.so
14 | 
15 | # Distribution / packaging
16 | .Python
17 | env/
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | 
32 | # PyInstaller
33 | #  Usually these files are written by a python script from a template
34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 | 
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 | 
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 


--------------------------------------------------------------------------------
/kraken/templates/hocr:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 	<head>
 4 | 		<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
 5 | 		<meta name="ocr-system" content="kraken"/>
 6 | 		<meta name="ocr-capabilities" content="ocr_page ocr_line ocrx_word"/>
 7 | 		{% if page.scripts %}
 8 | 		<meta name="ocr-scripts" content="{{ page.scripts|join(' ') }}"/>
 9 | 		{% endif %}
10 | 	</head>
11 | 	<body>
12 | 		<div class="ocr_page" title="bbox 0 0 {{ page.size|join(' ') }}; image {{ page.name }}" style="writing-mode: {{ page.writing_mode }};">
13 | 			{% for line in page.lines %}
14 | 			<span class="ocr_line" id="line_{{ line.index }}" title="bbox {{ line.bbox|join(' ') }}; x_bboxes {{ line.cuts|map('join', ' ')|join(' ') }}">
15 | 			{% for segment in line.recognition %}
16 | 				<span class="ocrx_word" id="segment_{{ segment.index }}" title="bbox {{ segment.bbox|join(' ') }}; x_confs {{ segment.confidences|join(' ') }}">{{ segment.text }}</span>
17 | 			{% endfor %}
18 | 			</span>
19 | 			<br/>
20 | 			{% endfor %}
21 | 
22 | 		</div>
23 | 	</body>
24 | </html>
25 | 


--------------------------------------------------------------------------------
/tests/test_train.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | 
 4 | from nose.tools import raises
 5 | 
 6 | from kraken.lib import train
 7 | from itertools import cycle
 8 | 
 9 | class TestTrain(unittest.TestCase):
10 |     """
11 |     Testing model trainer interrupter classes
12 |     """
13 |     def test_early_stopping(self):
14 |         """
15 |         Tests early stopping interrupter.
16 |         """
17 |         it = train.EarlyStopping(cycle('a'), min_delta = 1, lag = 5)
18 |         for epoch, _ in enumerate(it):
19 |             it.update(epoch if epoch < 10 else 10)
20 |         self.assertEqual(15, epoch)
21 |         self.assertEqual(it.best_epoch, 10)
22 |         self.assertEqual(it.best_loss, 10)
23 | 
24 |     def test_epoch_stopping(self):
25 |         """
26 |         Tests stopping after n epochs.
27 |         """
28 |         it = train.EpochStopping(cycle('a'), epochs = 57)
29 |         for epoch, _ in enumerate(it):
30 |             it.update(epoch)
31 |         self.assertEqual(56, epoch)
32 |         self.assertEqual(it.best_epoch, 56)
33 |         self.assertEqual(it.best_loss, 56)
34 | 


--------------------------------------------------------------------------------
/kraken/contrib/recognition_boxes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | from PIL import Image, ImageDraw
 7 | 
 8 | from kraken.pageseg import segment
 9 | from kraken.binarization import nlbin
10 | from kraken.rpred import rpred
11 | from itertools import cycle
12 | from kraken.lib import models
13 | 
14 | cmap = cycle([(230, 25, 75, 127),
15 |               (60, 180, 75, 127),
16 |               (255, 225, 25, 127),
17 |               (0, 130, 200, 127),
18 |               (245, 130, 48, 127),
19 |               (145, 30, 180, 127),
20 |               (70, 240, 240, 127)])
21 | 
22 | net = models.load_any(sys.argv[1])
23 | 
24 | for fname in sys.argv[2:]:
25 |     im = Image.open(fname)
26 |     print(fname)
27 |     im = nlbin(im)
28 |     res = segment(im, maxcolseps=0)
29 |     pred = rpred(net, im, res)
30 |     im = im.convert('RGBA')
31 |     tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
32 |     draw = ImageDraw.Draw(tmp)
33 |     for line in pred:
34 |         for box in line.cuts:
35 |             draw.rectangle(box, fill=next(cmap))
36 |     im = Image.alpha_composite(im, tmp)
37 |     im.save('high_{}'.format(os.path.basename(fname)))
38 | 


--------------------------------------------------------------------------------
/tests/test_rpred.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | 
 5 | import os
 6 | import unittest
 7 | 
 8 | from PIL import Image
 9 | from nose.tools import raises
10 | 
11 | from kraken.lib.models import load_any
12 | from kraken.rpred import rpred
13 | from kraken.lib.exceptions import KrakenInputException
14 | 
15 | thisfile = os.path.abspath(os.path.dirname(__file__))
16 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
17 | 
18 | class TestRecognition(unittest.TestCase):
19 | 
20 |     """
21 |     Tests of the recognition facility and associated routines.
22 |     """
23 |     def setUp(self):
24 |         self.im = Image.open(os.path.join(resources, 'bw.png'))
25 | 
26 |     def tearDown(self):
27 |         self.im.close()
28 | 
29 |     @raises(KrakenInputException)
30 |     def test_rpred_outbounds(self):
31 |         """
32 |         Tests correct handling of invalid line coordinates.
33 |         """
34 |         nn = load_any(os.path.join(resources, 'toy.clstm'))
35 |         pred = rpred(nn, self.im, {'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal'}, True)
36 |         next(pred)
37 | 


--------------------------------------------------------------------------------
/tests/test_transcribe.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | 
 5 | import os
 6 | import json
 7 | import unittest
 8 | 
 9 | from PIL import Image
10 | from lxml import etree
11 | from io import BytesIO 
12 | from kraken.transcribe import TranscriptionInterface
13 | 
14 | thisfile = os.path.abspath(os.path.dirname(__file__))
15 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
16 | 
17 | class TestTranscriptionInterface(unittest.TestCase):
18 | 
19 |     """
20 |     Test of the transcription interface generation
21 |     """
22 | 
23 |     def test_transcription_generation(self):
24 |         """
25 |         Tests creation of transcription interfaces with segmentation.
26 |         """
27 |         tr = TranscriptionInterface()
28 |         with open(os.path.join(resources, 'segmentation.json')) as fp:
29 |             seg = json.load(fp)
30 |         with Image.open(os.path.join(resources, 'input.jpg')) as im:
31 |             tr.add_page(im, seg)
32 |         fp = BytesIO()
33 |         tr.write(fp)
34 |         # this will not throw an exception ever so we need a better validator
35 |         etree.HTML(fp.getvalue())
36 | 


--------------------------------------------------------------------------------
/tests/resources/segmentation.json:
--------------------------------------------------------------------------------
1 | {"boxes": [[0, 29, 518, 56], [25, 54, 122, 82], [9, 74, 95, 119], [103, 75, 146, 131], [7, 138, 136, 231], [10, 228, 122, 348], [13, 230, 65, 285], [74, 304, 121, 354], [12, 353, 143, 405], [15, 450, 109, 521], [17, 511, 147, 574], [108, 544, 151, 597], [30, 591, 143, 694], [21, 696, 149, 838], [13, 832, 155, 900], [3, 880, 93, 970], [20, 989, 60, 1036], [13, 1096, 67, 1152], [87, 1502, 126, 1558], [7, 1866, 132, 1949], [21, 1978, 93, 2051], [26, 2048, 120, 2091], [518, 297, 580, 337], [654, 293, 1088, 332], [514, 353, 1294, 398], [519, 407, 1294, 447], [515, 453, 1292, 499], [518, 505, 1290, 546], [517, 553, 1292, 594], [514, 603, 1292, 647], [518, 652, 1293, 693], [519, 700, 1296, 742], [518, 750, 1296, 797], [518, 799, 1292, 841], [514, 848, 1296, 897], [515, 895, 885, 944], [517, 943, 1294, 990], [514, 995, 1351, 1043], [513, 1043, 1294, 1094], [513, 1094, 1293, 1141], [512, 1143, 1294, 1192], [512, 1192, 1293, 1240], [513, 1241, 1294, 1284], [517, 1290, 1292, 1331], [515, 1340, 1291, 1383], [514, 1388, 1295, 1438], [517, 1436, 1292, 1487], [516, 1483, 1291, 1539], [1078, 1546, 1283, 1584], [530, 1581, 1291, 1636], [514, 1639, 1291, 1689], [512, 1680, 859, 1716], [1389, 24, 1453, 45]], "text_direction": "horizontal-lr", "script_detection": false}


--------------------------------------------------------------------------------
/kraken/contrib/generate_scripts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Script fetching the latest unicode Scripts.txt and dumping it as json.
 4 | """
 5 | from urllib import request
 6 | import json
 7 | import regex
 8 | 
 9 | uri = 'http://www.unicode.org/Public/UNIDATA/Scripts.txt'
10 | 
11 | re = regex.compile('^(?P<start>[0-9A-F]{4,6})(..(?P<end>[0-9A-F]{4,6}))?\s+; (?P<name>[A-Za-z]+)')
12 | 
13 | with open('scripts.json', 'w') as fp, request.urlopen(uri) as req:
14 |     d = []
15 |     for line in req:
16 |         line = line.decode('utf-8')
17 |         if line.startswith('#') or line.strip() == '':
18 |             continue
19 |         m = re.match(line)
20 |         if m:
21 |             print(line)
22 |             start = int(m.group('start'), base=16)
23 |             end = start
24 |             if m.group('end'):
25 |                 end = int(m.group('end'), base=16)
26 |             name = m.group('name')
27 |             if len(d) > 0 and d[-1][2] == name and (start - 1 == d[-1][1] or start -1 == d[-1][0]):
28 |                 print('merging {} and ({}, {}, {})'.format(d[-1], start, end, name))
29 |                 d[-1] = (d[-1][0], end, name)
30 |             else:
31 |                 d.append((start, end if end != start else None, name))
32 |     json.dump(d, fp)
33 | 


--------------------------------------------------------------------------------
/kraken/templates/abbyyxml:
--------------------------------------------------------------------------------
 1 | <document xmlns="http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml" version="1.0" producer="kraken">
 2 | 	<page width="{{ page.size[0] }}" height="{{ page.size[1] }}" resolution="0" originalCoords="1">
 3 | 		<block blockType="Text">
 4 | 			<text>
 5 | 				<par>
 6 | 				{% for line in page.lines %}
 7 | 				<line baseline="{{ ((line.bbox[1] + line.bbox[3]) / 2)|int }}" l="{{ line.bbox[0] }}" r="{{ line.bbox[2] }}" t="{{ line.bbox[1] }}" b="{{ line.bbox[3] }}"><formatting lang="">
 8 | 					{% for segment in line.recognition %}
 9 | 					{% for char in segment.recognition %}
10 | 					{% if loop.first %}
11 | 					<charParams l="{{ char.bbox[0] }}" r="{{ char.bbox[2] }}" t="{{ char.bbox[1] }}" b="{{ char.bbox[3] }}" wordStart="1" charConfidence="{{ [char.confidence]|rescale(0, 100)|int }}">{{ char.text }}</charParams>
12 | 					{% else %}
13 | 					<charParams l="{{ char.bbox[0] }}" r="{{ char.bbox[2] }}" t="{{ char.bbox[1] }}" b="{{ char.bbox[3] }}" wordStart="0" charConfidence="{{ [char.confidence]|rescale(0, 100)|int }}">{{ char.text }}</charParams>
14 | 					{% endif %}
15 | 					{% endfor %}
16 | 		                        {% endfor %}
17 | 				</formatting>
18 | 				</line>
19 | 				{% endfor %}
20 | 				</par>
21 | 			</text>
22 | 		</block>
23 | 	</page>
24 | </document>
25 | 
26 | 


--------------------------------------------------------------------------------
/kraken/lib/exceptions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | kraken.lib.exceptions
 4 | ~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 | All custom exceptions raised by kraken's modules and packages. Packages should
 7 | always define their exceptions here.
 8 | """
 9 | 
10 | 
11 | class KrakenEncodeException(Exception):
12 | 
13 |     def __init__(self, message=None):
14 |         Exception.__init__(self, message)
15 | 
16 | 
17 | class KrakenRecordException(Exception):
18 | 
19 |     def __init__(self, message=None):
20 |         Exception.__init__(self, message)
21 | 
22 | 
23 | class KrakenInvalidModelException(Exception):
24 | 
25 |     def __init__(self, message=None):
26 |         Exception.__init__(self, message)
27 | 
28 | 
29 | class KrakenInputException(Exception):
30 | 
31 |     def __init__(self, message=None):
32 |         Exception.__init__(self, message)
33 | 
34 | 
35 | class KrakenRepoException(Exception):
36 | 
37 |     def __init__(self, message=None):
38 |         Exception.__init__(self, message)
39 | 
40 | 
41 | class KrakenCairoSurfaceException(Exception):
42 |     """
43 |     Raised when the Cairo surface couldn't be created.
44 | 
45 |     Attributes:
46 |         message (str): Error message
47 |         width (int): Width of the surface
48 |         height (int): Height of the surface
49 |     """
50 |     def __init__(self, message: str, width: int, height: int) -> None:
51 |         self.message = message
52 |         self.width = width
53 |         self.height = height
54 | 
55 |     def __repr__(self) -> str:
56 |         return repr(self.message)
57 | 


--------------------------------------------------------------------------------
/tests/test_pageseg.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | 
 5 | import unittest
 6 | import os
 7 | 
 8 | from PIL import Image
 9 | from nose.tools import raises
10 | 
11 | from kraken.pageseg import segment
12 | from kraken.lib.exceptions import KrakenInputException
13 | 
14 | thisfile = os.path.abspath(os.path.dirname(__file__))
15 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
16 | 
17 | 
18 | class TestPageSeg(unittest.TestCase):
19 | 
20 |     """
21 |     Tests of the page segmentation functionality
22 |     """
23 |     @raises(KrakenInputException)
24 |     def test_segment_color(self):
25 |         """
26 |         Test correct handling of color input.
27 |         """
28 |         with Image.open(os.path.join(resources, 'input.jpg')) as im:
29 |             segment(im)
30 | 
31 |     def test_segment_bw(self):
32 |         """
33 |         Tests segmentation of bi-level input.
34 |         """
35 |         with Image.open(os.path.join(resources, 'bw.png')) as im:
36 |             lines = segment(im)
37 |             # test if line count is roughly correct
38 |             self.assertAlmostEqual(len(lines['boxes']), 30, msg='Segmentation differs '
39 |                                    'wildly from true line count', delta=5)
40 |             # check if lines do not extend beyond image
41 |             for box in lines['boxes']:
42 |                 self.assertLess(0, box[0], msg='Line x0 < 0')
43 |                 self.assertLess(0, box[1], msg='Line y0 < 0')
44 |                 self.assertGreater(im.size[0], box[2], msg='Line x1 > {}'.format(im.size[0]))
45 |                 self.assertGreater(im.size[1], box[3], msg='Line y1 > {}'.format(im.size[1]))
46 | 


--------------------------------------------------------------------------------
/kraken/templates/style.css:
--------------------------------------------------------------------------------
 1 | body {
 2 | 	background: #f3f3f3;
 3 | 	{% if font.family %}
 4 | 	font-family: {{ font.family }};
 5 | 	{% endif %}
 6 | 	{% if font.style %}
 7 | 	font-style: {{ font.style }};
 8 | 	{% endif %}
 9 | 	{% if font.weight %}
10 | 	font-style: {{ font.weight }};
11 | 	{% endif %}
12 | }
13 | 
14 | [contenteditable=true]:empty:before {
15 | 	content: attr(data-placeholder);
16 | 	display: block; /* For Firefox */
17 | }
18 | 
19 | li[contenteditable=true]:hover, li[contenteditable=true].hovered, span[contenteditable=true]:hover, span[contenteditable=true].hovered {
20 | 	border: 1px solid #ff0000;
21 | }
22 | 
23 | .rect:hover, a.hovered {
24 | 	box-shadow: inset 0 0 0 1px #ff0000;
25 | }
26 | 
27 | li[contenteditable=true]{
28 | 	border: 1px dashed #000;
29 | 	width: 100%;
30 | 	padding: 2px;
31 | 	margin: 0 0 5px 0;
32 | }
33 | 
34 | ul {
35 | 	list-style-type:none;
36 | }
37 | 
38 | nav {
39 | 	background: #444;
40 | 	position: fixed;
41 | 	top: 0;
42 | 	left: 0;
43 | 	height: 100%;
44 | 	width: 10%;
45 | 	font-family: "Helvetica Neue", Arial, sans-serif;
46 | }
47 | 
48 | nav ul {
49 | 	list-style: none;
50 | 	margin-right: 1em;
51 | }
52 | 
53 | nav li {
54 | 	display : inline-block;
55 | }
56 | 
57 | nav a {
58 | 	color: white;
59 | 	text-decoration: none;
60 | }
61 | 
62 | nav a:hover {
63 | 	text-decoration: underline;	
64 | }
65 | 
66 | .container {
67 | 	position: relative;
68 | 	margin-left: 15%;
69 | 	display: table;
70 | 	height: 100%;
71 | 	width: 85%;
72 | }
73 | 
74 | .img_container {
75 | 	position: relative;
76 | }
77 | 
78 | .column {
79 | 	display: table-cell;
80 | 	vertical-align: top;
81 | 	width: 50%;
82 | 	height: 100%;
83 | 	padding: 1rem;
84 | }
85 | 
86 | #download_button {
87 | 	position: fixed;
88 | 	padding: 0;
89 | 	text-align: center;
90 | 	width: 10%;
91 | 	bottom: 50px;
92 | }
93 | 
94 | .corrected {
95 | 	background-color: #73AD21;
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | matrix:
 3 |   include:
 4 |     - python: 3.6
 5 |     - python: 3.7
 6 |       dist: xenial
 7 |       sudo: required
 8 | notifications:
 9 |   email: false
10 | sudo: false
11 | install:
12 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
13 | - bash miniconda.sh -b -p $HOME/miniconda
14 | - export PATH="$HOME/miniconda/bin:$PATH"
15 | - conda config --set always_yes yes --set changeps1 no --set show_channel_urls yes
16 | - conda update conda
17 | - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION
18 | - source activate test-environment
19 | - conda install pbr nose pip
20 | - sed '/coremltools\|python-bidi\|regex\|torch/d' requirements.txt | xargs conda install
21 | - conda install torchvision-cpu -c pytorch
22 | - conda uninstall pytorch-cpu -c pytorch
23 | - conda install pytorch-nightly-cpu -c pytorch
24 | - pip install -r requirements.txt
25 | - conda list
26 | - pip freeze
27 | - python setup.py install
28 | script:
29 | - python setup.py nosetests
30 | deploy:
31 |   provider: pypi
32 |   username: ogl-iris
33 |   distributions: sdist bdist_wheel
34 |   skip_cleanup: true
35 |   skip_upload_docs: true
36 |   on:
37 |     tags: true
38 |   password:
39 |     secure: i/TwRgfux3ebFtTgg8Od/7KGHr1AZgHJ/9r4Yop7HoZhKsgSW8Q3e65K/LJ9aQFxmggeneAdOZFboStl9li48FpfFTqJy9TioSyaDoxDv5oPmUDFKHzbjExlupa7BzeL/OaNYSzkD8S2CIcnaiQspFASCWy0pHvveTU0MvdeaFbZ+lEdwH7Kb4DotzRA2p0wOwuq84P6Vunqi9UEvVP4e/f2j1Hin+zGs08nnxfC8A1XXkKZlnnRtbaGqKkzcSyeYFDcHfFENU1E3KEbeR6xqpWgZla/WIxnQTjUaZy9/RVLja8JLoPI86WofYScKcvYRUBPX74RBgjQhpNusuZ1umGxG+1C5TzF705YqWdYCM96qqUA/hBlDSngk+ZjraPJAtSPlJCx6VaiuIu8VPgP2jcazKaMduq5C6NT0XJtNUS22cdoox3Fzhhf/f6mLPMeBxQJewYo3Qbj86Ll5M8O5SmGdwAnmGDEwL0+cqb5oULXQcK1fJMnqR68KqSoFq89zNdTEEHTjMCLJO9Yfjmpd6iY33nOXhCEWNFRKEQVbeyFcudQemDxSSGTq2LNrgzMjJj4O3chjqbU9y5KiQF5lpH28/S/ele7VrbpX9bbn3/QmSQnJhByiypOQ2vEricn3aEoToE8Ws//OCmqItoOYTzRNHs/EUST0Zah2W/LTX8=
40 | 


--------------------------------------------------------------------------------
/tests/test_vgsl.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | 
 4 | from nose.tools import raises
 5 | 
 6 | import os
 7 | import torch
 8 | import tempfile
 9 | from kraken.lib import vgsl
10 | 
11 | 
12 | class TestVGSL(unittest.TestCase):
13 |     """
14 |     Testing VGSL module
15 |     """
16 |     def test_helper_train(self):
17 |         """
18 |         Tests train/eval mode helper methods
19 |         """
20 |         rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
21 |         rnn.train()
22 |         self.assertTrue(torch.is_grad_enabled())
23 |         self.assertTrue(rnn.nn.training)
24 |         rnn.eval()
25 |         self.assertFalse(torch.is_grad_enabled())
26 |         self.assertFalse(rnn.nn.training)
27 | 
28 |     def test_helper_threads(self):
29 |         """
30 |         Test openmp threads helper method.
31 |         """
32 |         rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
33 |         rnn.set_num_threads(4)
34 |         self.assertEqual(torch.get_num_threads(), 4)
35 | 
36 |     def test_save_model(self):
37 |         """
38 |         Test model serialization.
39 |         """
40 |         rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
41 |         with tempfile.TemporaryDirectory() as dir:
42 |             rnn.save_model(dir + '/foo.mlmodel')
43 |             self.assertTrue(os.path.exists(dir + '/foo.mlmodel'))
44 | 
45 |     def test_resize(self):
46 |         """
47 |         Tests resizing of output layers.
48 |         """
49 |         rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
50 |         rnn.resize_output(80)
51 |         self.assertEqual(rnn.nn[-1].lin.out_features, 80)
52 | 
53 |     def test_del_resize(self):
54 |         """
55 |         Tests resizing of output layers with entry deletion.
56 |         """
57 |         rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
58 |         rnn.resize_output(80, [2, 4, 5, 6, 7, 12, 25])
59 |         self.assertEqual(rnn.nn[-1].lin.out_features, 80)
60 | 


--------------------------------------------------------------------------------
/tests/test_models.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | import os
 4 | import tempfile
 5 | import pickle
 6 | 
 7 | from nose.tools import raises
 8 | 
 9 | import kraken.lib.lstm
10 | 
11 | from kraken.lib import models
12 | from kraken.lib.exceptions import KrakenInvalidModelException
13 | 
14 | thisfile = os.path.abspath(os.path.dirname(__file__))
15 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
16 | 
17 | class TestModels(unittest.TestCase):
18 |     """
19 |     Testing model loading routines
20 |     """
21 | 
22 |     def setUp(self):
23 |         self.temp = tempfile.NamedTemporaryFile(delete=False)
24 | 
25 |     def tearDown(self):
26 |         self.temp.close()
27 |         os.unlink(self.temp.name)
28 | 
29 |     @raises(KrakenInvalidModelException)
30 |     def test_load_invalid(self):
31 |         """
32 |         Tests correct handling of invalid files.
33 |         """
34 |         models.load_any(self.temp.name)
35 | 
36 |     def test_load_clstm(self):
37 |         """
38 |         Tests loading of valid clstm files.
39 |         """
40 |         rnn = models.load_any(os.path.join(resources, 'toy.clstm').encode('utf-8'))
41 |         self.assertIsInstance(rnn, models.TorchSeqRecognizer)
42 | 
43 |     @raises(KrakenInvalidModelException)
44 |     def test_load_pyrnn_no_seqrecognizer(self):
45 |         """
46 |         Test correct handling of non-SeqRecognizer pickles.
47 |         """
48 |         pickle.dump(u'Iámnõtãrécðçnízer', self.temp)
49 |         self.temp.close()
50 |         models.load_any(self.temp.name)
51 | 
52 |     @raises(KrakenInvalidModelException)
53 |     def test_load_any_pyrnn_py3(self):
54 |         """
55 |         Test load_any doesn't load pickled models on python 3
56 |         """
57 |         rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz'))
58 | 
59 |     def test_load_any_proto(self):
60 |         """
61 |         Test load_any loads protobuf models.
62 |         """
63 |         rnn = models.load_any(os.path.join(resources, 'model.pronn'))
64 |         self.assertIsInstance(rnn, kraken.lib.models.TorchSeqRecognizer)
65 | 


--------------------------------------------------------------------------------
/tests/test_binarization.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | 
 5 | import unittest
 6 | import os
 7 | 
 8 | from PIL import Image
 9 | from kraken.binarization import nlbin
10 | 
11 | thisfile = os.path.abspath(os.path.dirname(__file__))
12 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
13 | 
14 | class TestBinarization(unittest.TestCase):
15 | 
16 |     """
17 |     Tests of the nlbin function for binarization of images
18 |     """
19 |     def test_not_binarize_bw(self):
20 |         """
21 |         Test that mode '1' images aren't binarized again.
22 |         """
23 |         with Image.new('1', (1000,1000)) as im:
24 |             self.assertEqual(im, nlbin(im))
25 | 
26 |     def test_binarize_no_bw(self):
27 |         """
28 |         Tests binarization of image formats without a 1bpp mode (JPG).
29 |         """
30 |         with Image.open(os.path.join(resources, 'input.jpg')) as im:
31 |             res = nlbin(im)
32 |             # calculate histogram and check if only pixels of value 0/255 exist
33 |             self.assertEqual(254, res.histogram().count(0), msg='Output not '
34 |                              'binarized')
35 | 
36 |     def test_binarize_tif(self):
37 |         """
38 |         Tests binarization of RGB TIFF images.
39 |         """
40 |         with Image.open(os.path.join(resources, 'input.tif')) as im:
41 |             res = nlbin(im)
42 |             # calculate histogram and check if only pixels of value 0/255 exist
43 |             self.assertEqual(254, res.histogram().count(0), msg='Output not '
44 |                              'binarized')
45 | 
46 |     def test_binarize_grayscale(self):
47 |         """
48 |         Test binarization of mode 'L' images.
49 |         """
50 |         with Image.open(os.path.join(resources, 'input.tif')) as im:
51 |             res = nlbin(im.convert('L'))
52 |             # calculate histogram and check if only pixels of value 0/255 exist
53 |             self.assertEqual(254, res.histogram().count(0), msg='Output not '
54 |                              'binarized')
55 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | kraken API
 2 | ==========
 3 | 
 4 | .. module:: kraken
 5 | 
 6 | Kraken provides routines which are usable by third party tools. In general
 7 | you can expect function in the ``kraken`` package to remain stable. We will try
 8 | to keep these backward compatible, but as kraken is still in an early
 9 | development stage and the API is still quite rudimentary nothing can be
10 | garantueed.
11 | 
12 | kraken.binarization module
13 | --------------------------
14 | 
15 | .. automodule:: kraken.binarization
16 |     :members:
17 |     :show-inheritance:
18 | 
19 | kraken.serialization module
20 | ---------------------------
21 | 
22 | .. automodule:: kraken.serialization
23 |     :members:
24 |     :show-inheritance:
25 | 
26 | kraken.pageseg module
27 | ---------------------
28 | 
29 | .. automodule:: kraken.pageseg
30 |     :members:
31 |     :show-inheritance:
32 | 
33 | kraken.rpred module
34 | -------------------
35 | 
36 | .. automodule:: kraken.rpred
37 |     :members:
38 |     :show-inheritance:
39 | 
40 | kraken.transcribe module
41 | ------------------------
42 | 
43 | .. automodule:: kraken.transcribe
44 |     :members:
45 |     :show-inheritance:
46 | 
47 | kraken.linegen module
48 | ---------------------
49 | 
50 | .. automodule:: kraken.linegen
51 |     :members:
52 |     :show-inheritance:
53 | 
54 | kraken.lib.models module
55 | ------------------------
56 | 
57 | .. automodule:: kraken.lib.models
58 |     :members:
59 |     :show-inheritance:
60 | 
61 | kraken.lib.vgsl module
62 | ----------------------
63 | 
64 | .. automodule:: kraken.lib.vgsl
65 |     :members:
66 |     :show-inheritance:
67 | 
68 | kraken.lib.codec
69 | ----------------
70 | 
71 | .. automodule:: kraken.lib.codec
72 |     :members:
73 |     :show-inheritance:
74 | 
75 | kraken.lib.train module
76 | -----------------------
77 | 
78 | .. automodule:: kraken.lib.train
79 |     :members:
80 |     :show-inheritance:
81 | 
82 | kraken.lib.dataset module
83 | -------------------------
84 | 
85 | .. automodule:: kraken.lib.dataset
86 |     :members:
87 |     :show-inheritance:
88 | 
89 | kraken.lib.ctc_decoder
90 | ----------------------
91 | 
92 | .. automodule:: kraken.lib.ctc_decoder
93 |     :members:
94 |     :show-inheritance:
95 | 


--------------------------------------------------------------------------------
/docs/models.rst:
--------------------------------------------------------------------------------
 1 | .. _models:
 2 | 
 3 | Models
 4 | ======
 5 | 
 6 | There are currently three kinds of models containing the recurrent neural
 7 | networks doing all the character recognition supported by kraken: ``pronn``
 8 | files serializing old pickled ``pyrnn`` models as protobuf, clstm's native
 9 | serialization, and versatile `Core ML
10 | <https://developer.apple.com/documentation/coreml>`_ models.
11 | 
12 | .. _pyrnn:
13 | 
14 | pyrnn
15 | -----
16 | 
17 | These are serialized instances of python ``lstm.SeqRecognizer`` objects. Using
18 | such a model just entails loading the pickle and calling the appropriate
19 | functions to perform recognition much like a shared library in other
20 | programming languages.
21 | 
22 | Support for these models has been dropped with kraken 1.0 as python 2.7 is
23 | phased out.
24 | 
25 | pronn
26 | -----
27 | 
28 | Legacy python models can be converted to a protobuf based serialization. These
29 | are loadable by kraken 1.0 and will be automatically converted to Core ML.
30 | 
31 | Protobuf models have several advantages over pickled ones. They are noticeably
32 | smaller (80Mb vs 1.8Mb for the default model), don't allow arbitrary code
33 | execution, and are upward compatible with python 3. Because they are so much
34 | more lightweight they are also loaded much faster. 
35 | 
36 | clstm
37 | -----
38 | 
39 | `clstm <https://github.com/tmbdev/clstm>`_, a small and fast implementation of
40 | LSTM networks that was used in previous kraken versions. The model files can be
41 | loaded with pytorch-based kraken and will be converted to Core ML.
42 | 
43 | CoreML
44 | ------
45 | 
46 | Core ML allows arbitrary network architectures in a compact serialization with
47 | metadata. This is the default format in pytorch-based kraken.
48 | 
49 | Conversion
50 | ----------
51 | 
52 | Per default pronn/clstm models are automatically converted to the new Core ML
53 | format when explicitely defined using the ``-m`` option to the ``ocr`` utility
54 | on the command line. They are stored in the user kraken directory (default is
55 | ~/.kraken) and will be automatically substituted in future runs.
56 | 
57 | If conversion is not desired, e.g. because there is a bug in the conversion
58 | routine, it can be disabled using the ``--disable-autoconversion`` switch.
59 | 


--------------------------------------------------------------------------------
/kraken/lib/log.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright 2018 Benjamin Kiessling
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14 | # or implied. See the License for the specific language governing
15 | # permissions and limitations under the License.
16 | """
17 | kraken.lib.log
18 | ~~~~~~~~~~~~~~~~~
19 | 
20 | Handlers and formatters for logging.
21 | """
22 | import time
23 | import click
24 | import logging
25 | 
26 | 
27 | class LogHandler(logging.Handler):
28 |     def emit(self, record):
29 |         msg = self.format(record)
30 |         level = record.levelname.lower()
31 |         err = level in ('warning', 'error', 'exception', 'critical')
32 |         click.echo(msg, err=err)
33 | 
34 | 
35 | class LogFormatter(logging.Formatter):
36 |     colors = {
37 |         'error': dict(fg='red'),
38 |         'exception': dict(fg='red'),
39 |         'critical': dict(fg='red'),
40 |         'warning': dict(fg='yellow'),
41 |     }
42 | 
43 |     st_time = time.time()
44 | 
45 |     def format(self, record):
46 |         if not record.exc_info:
47 |             level = record.levelname.lower()
48 |             msg = record.msg
49 |             if level in self.colors:
50 |                 style = self.colors[level]
51 |             else:
52 |                 style = {}
53 |             msg = click.style(u'[{:2.4f}] {} '.format(time.time() - self.st_time, str(msg)), **style)
54 |             return msg
55 |         return logging.Formatter.format(self, record)
56 | 
57 | 
58 | def progressbar(*args, **kwargs):
59 |     """
60 |     Slight extension to click's progressbar disabling output on when log level
61 |     is set below 30.
62 |     """
63 |     import logging
64 |     logger = logging.getLogger(__name__)
65 |     bar = click.progressbar(*args, **kwargs)
66 |     if logger.getEffectiveLevel() < 30:
67 |         bar.is_hidden = True  # type: ignore
68 |     return bar
69 | 
70 | 
71 | def set_logger(logger=None, level=logging.ERROR):
72 |     handler = LogHandler()
73 |     handler.setFormatter(LogFormatter())
74 |     logger.addHandler(handler)
75 |     logger.setLevel(level)
76 | 


--------------------------------------------------------------------------------
/tests/test_serialization.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | 
 5 | import unittest
 6 | import json
 7 | import os
 8 | 
 9 | from lxml import etree
10 | from io import StringIO
11 | from hocr_spec import HocrValidator
12 | 
13 | from kraken import rpred
14 | from kraken import serialization
15 | 
16 | thisfile = os.path.abspath(os.path.dirname(__file__))
17 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
18 | 
19 | class TestSerializations(unittest.TestCase):
20 |     """
21 |     Tests for output serialization
22 |     """
23 |     def setUp(self):
24 |         with open(os.path.join(resources, 'records.json'), 'r') as fp:
25 |             self.records = [rpred.ocr_record(**x) for x in json.load(fp)]
26 |         self.validator = HocrValidator('standard')
27 | 
28 |     def test_vertical_hocr_serialization(self):
29 |         """
30 |         Test vertical line hOCR serialization
31 |         """
32 |         fp = StringIO()
33 | 
34 |         fp.write(serialization.serialize(self.records, image_name='foo.png', writing_mode='vertical-lr', template='hocr'))
35 |         fp.seek(0)
36 | 
37 |         report = self.validator.validate(fp, parse_strict=True)
38 |         self.assertTrue(report.is_valid())
39 | 
40 |     def test_hocr_serialization(self):
41 |         """
42 |         Test hOCR serialization
43 |         """
44 |         fp = StringIO()
45 | 
46 |         fp.write(serialization.serialize(self.records, image_name='foo.png', template='hocr'))
47 |         fp.seek(0)
48 | 
49 |         report = self.validator.validate(fp, parse_strict=True)
50 |         self.assertTrue(report.is_valid())
51 | 
52 |     def test_alto_serialization_validation(self):
53 |         """
54 |         Validates output against ALTO schema
55 |         """
56 |         fp = StringIO()
57 | 
58 |         fp.write(serialization.serialize(self.records, image_name='foo.png', template='alto'))
59 |         doc = etree.fromstring(fp.getvalue().encode('utf-8'))
60 |         with open(os.path.join(resources, 'alto-4-0.xsd')) as schema_fp:
61 |             alto_schema = etree.XMLSchema(etree.parse(schema_fp))
62 |             alto_schema.assertValid(doc)
63 | 
64 |     def test_abbyyxml_serialization_validation(self):
65 |         """
66 |         Validates output against abbyyXML schema
67 |         """
68 |         fp = StringIO()
69 | 
70 |         fp.write(serialization.serialize(self.records, image_name='foo.png', template='abbyyxml'))
71 |         doc = etree.fromstring(fp.getvalue().encode('utf-8'))
72 |         with open(os.path.join(resources, 'FineReader10-schema-v1.xml')) as schema_fp:
73 |             abbyy_schema = etree.XMLSchema(etree.parse(schema_fp))
74 |             abbyy_schema.assertValid(doc)
75 | 


--------------------------------------------------------------------------------
/kraken/iso15924.json:
--------------------------------------------------------------------------------
1 | {"520": "Tang", "20": "Xsux", "30": "Xpeo", "550": "Blis", "40": "Ugar", "50": "Egyp", "570": "Brai", "60": "Egyh", "437": "Loma", "70": "Egyd", "80": "Hluw", "90": "Maya", "95": "Sgnw", "610": "Inds", "100": "Mero", "101": "Merc", "105": "Sarb", "106": "Narb", "620": "Roro", "115": "Phnx", "116": "Lydi", "120": "Tfng", "123": "Samr", "124": "Armi", "125": "Hebr", "126": "Palm", "127": "Hatr", "130": "Prti", "131": "Phli", "132": "Phlp", "133": "Phlv", "134": "Avst", "135": "Syrc", "136": "Syrn", "137": "Syrj", "138": "Syre", "139": "Mani", "140": "Mand", "145": "Mong", "159": "Nbat", "160": "Arab", "161": "Aran", "165": "Nkoo", "166": "Adlm", "170": "Thaa", "175": "Orkh", "176": "Hung", "200": "Grek", "201": "Cari", "202": "Lyci", "204": "Copt", "206": "Goth", "210": "Ital", "211": "Runr", "212": "Ogam", "215": "Latn", "216": "Latg", "217": "Latf", "218": "Moon", "219": "Osge", "220": "Cyrl", "221": "Cyrs", "225": "Glag", "226": "Elba", "227": "Perm", "230": "Armn", "239": "Aghb", "240": "Geor", "241": "Geok", "755": "Dupl", "250": "Dsrt", "259": "Bass", "260": "Osma", "261": "Olck", "262": "Wara", "263": "Pauc", "264": "Mroo", "265": "Medf", "280": "Visp", "281": "Shaw", "282": "Plrd", "284": "Jamo", "285": "Bopo", "286": "Hang", "287": "Kore", "288": "Kits", "290": "Teng", "291": "Cirt", "292": "Sara", "293": "Piqd", "300": "Brah", "302": "Sidd", "305": "Khar", "310": "Guru", "312": "Gong", "313": "Gonm", "314": "Mahj", "315": "Deva", "316": "Sylo", "317": "Kthi", "318": "Sind", "319": "Shrd", "320": "Gujr", "321": "Takr", "322": "Khoj", "323": "Mult", "324": "Modi", "325": "Beng", "326": "Tirh", "327": "Orya", "328": "Dogr", "329": "Soyo", "330": "Tibt", "331": "Phag", "332": "Marc", "333": "Newa", "334": "Bhks", "335": "Lepc", "336": "Limb", "337": "Mtei", "338": "Ahom", "339": "Zanb", "340": "Telu", "343": "Gran", "344": "Saur", "345": "Knda", "346": "Taml", "347": "Mlym", "348": "Sinh", "349": "Cakm", "350": "Mymr", "351": "Lana", "352": "Thai", "353": "Tale", "354": "Talu", "355": "Khmr", "356": "Laoo", "357": "Kali", "358": "Cham", "359": "Tavt", "360": "Bali", "361": "Java", "362": "Sund", "363": "Rjng", "364": "Leke", "365": "Batk", "366": "Maka", "367": "Bugi", "370": "Tglg", "371": "Hano", "372": "Buhd", "373": "Tagb", "900": "Qaaa", "398": "Sora", "399": "Lisu", "400": "Lina", "401": "Linb", "403": "Cprt", "410": "Hira", "411": "Kana", "412": "Hrkt", "413": "Jpan", "420": "Nkgb", "430": "Ethi", "435": "Bamu", "436": "Kpel", "949": "Qabx", "438": "Mend", "439": "Afak", "440": "Cans", "445": "Cher", "450": "Hmng", "460": "Yiii", "470": "Vaii", "480": "Wole", "993": "Zsye", "994": "Zinh", "995": "Zmth", "996": "Zsym", "997": "Zxxx", "998": "Zyyy", "999": "Zzzz", "499": "Nshu", "500": "Hani", "501": "Hans", "502": "Hant", "503": "Hanb", "505": "Kitl", "510": "Jurc"}
2 | 


--------------------------------------------------------------------------------
/kraken/templates/alto:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <alto xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 | 	xmlns="http://www.loc.gov/standards/alto/ns-v4#"
 4 | 	xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v4# http://www.loc.gov/standards/alto/v4/alto-4-0.xsd">
 5 | 	<Description>
 6 | 		<MeasurementUnit>pixel</MeasurementUnit>
 7 | 		<sourceImageInformation>
 8 | 			<fileName>{{ page.name }}</fileName>
 9 | 		</sourceImageInformation>
10 | 		<OCRProcessing ID="OCR_0">
11 | 			<ocrProcessingStep>
12 | 				<processingSoftware>
13 | 					<softwareName>kraken</softwareName>
14 | 				</processingSoftware>
15 | 			</ocrProcessingStep>
16 | 		</OCRProcessing>
17 | 	</Description>
18 | 
19 | 	<Layout>
20 | 		<Page WIDTH="{{ page.size[0] }}" HEIGHT="{{ page.size[1] }}" PHYSICAL_IMG_NR="0" ID="page_0">
21 | 			<PrintSpace HPOS="0" VPOS="0" WIDTH="{{ page.size[0] }}" HEIGHT="{{ page.size[1] }}">
22 | 				<TextBlock HPOS="0" VPOS="0" ID="textblock_0" WIDTH="{{ page.size[0] }}" HEIGHT="{{ page.size[1] }}">
23 | 					{% for line in page.lines %}
24 | 					<TextLine ID="line_{{ line.index }}"
25 | 						  HPOS="{{ line.bbox[0] }}"
26 | 						  VPOS="{{ line.bbox[1] }}" 
27 | 						  WIDTH="{{ line.bbox[2] - line.bbox[0] }}" 
28 | 						  HEIGHT="{{ line.bbox[3] - line.bbox[1] }}">
29 | 						{% for segment in line.recognition %}
30 | 							{# ALTO forbids encoding whitespace before any String/Shape tags #}
31 | 							{% if segment.text is whitespace and loop.index > 1 %}
32 | 							<SP ID="segment_{{ segment.index }}"
33 | 							    HPOS="{{ segment.bbox[0]}}" 
34 | 							    VPOS="{{ segment.bbox[1] }}"
35 | 							    WIDTH="{{ segment.bbox[2] - segment.bbox[0] }}" 
36 | 							    HEIGHT="{{ segment.bbox[3] - segment.bbox[1] }}"/>
37 | 							{% else %}
38 | 							<String ID="segment_{{ segment.index }}"
39 | 								CONTENT="{{ segment.text|e }}" 
40 | 								HPOS="{{ segment.bbox[0] }}" 
41 | 								VPOS="{{ segment.bbox[1] }}"
42 | 								WIDTH="{{ segment.bbox[2] - segment.bbox[0] }}" 
43 | 								HEIGHT="{{ segment.bbox[3] - segment.bbox[1] }}" 
44 | 								WC="{{ (segment.confidences|sum / segment.confidences|length)|round(4) }}">
45 | 								{% for char in segment.recognition %}
46 | 								<Glyph ID="char_{{ char.index }}" 
47 | 								       CONTENT="{{ char.text|e }}"
48 | 								       HPOS="{{ char.bbox[0] }}"
49 | 								       VPOS="{{ char.bbox[1] }}"
50 | 								       WIDTH="{{ char.bbox[2] - char.bbox[0] }}"
51 | 								       HEIGHT="{{ char.bbox[3] - char.bbox[1] }}"
52 | 								       GC="{{ char.confidence|round(4) }}">
53 | 								</Glyph>
54 | 								{% endfor %}
55 | 							</String>
56 | 							{% endif %}
57 | 						{% endfor %}
58 | 					</TextLine>
59 | 					{% endfor %}
60 | 				</TextBlock>
61 | 			</PrintSpace>
62 | 		</Page>
63 | 	</Layout>
64 | </alto>
65 | 


--------------------------------------------------------------------------------
/kraken/lib/util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Ocropus's magic PIL-numpy array conversion routines. They express slightly
 3 | different behavior from PIL.Image.toarray().
 4 | """
 5 | import unicodedata
 6 | import numpy as np
 7 | 
 8 | from PIL import Image
 9 | 
10 | __all__ = ['pil2array', 'array2pil']
11 | 
12 | 
13 | def pil2array(im: Image, alpha: int = 0) -> np.array:
14 |     if im.mode == '1':
15 |         return np.array(im.convert('L'))
16 |     return np.array(im)
17 | 
18 | 
19 | def array2pil(a: np.array) -> Image:
20 |     if a.dtype == np.dtype("B"):
21 |         if a.ndim == 2:
22 |             return Image.frombytes("L", (a.shape[1], a.shape[0]),
23 |                                    a.tostring())
24 |         elif a.ndim == 3:
25 |             return Image.frombytes("RGB", (a.shape[1], a.shape[0]),
26 |                                    a.tostring())
27 |         else:
28 |             raise Exception("bad image rank")
29 |     elif a.dtype == np.dtype('float32'):
30 |         return Image.frombytes("F", (a.shape[1], a.shape[0]), a.tostring())
31 |     else:
32 |         raise Exception("unknown image type")
33 | 
34 | 
35 | def is_bitonal(im: Image) -> bool:
36 |     """
37 |     Tests a PIL.Image for bitonality.
38 | 
39 |     Args:
40 |         im (PIL.Image): Image to test
41 | 
42 |     Returns:
43 |         True if the image contains only two different color values. False
44 |         otherwise.
45 |     """
46 |     return im.getcolors(2) is not None
47 | 
48 | 
49 | def get_im_str(im: Image) -> str:
50 |     return im.filename if hasattr(im, 'filename') else str(im)
51 | 
52 | 
53 | def is_printable(char: str) -> bool:
54 |     """
55 |     Determines if a chode point is printable/visible when printed.
56 | 
57 |     Args:
58 |         char (str): Input code point.
59 | 
60 |     Returns:
61 |         True if printable, False otherwise.
62 |     """
63 |     letters     = ('LC', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu')
64 |     numbers     = ('Nd', 'Nl', 'No')
65 |     punctuation = ('Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps')
66 |     symbol      = ('Sc', 'Sk', 'Sm', 'So')
67 |     printable = letters + numbers + punctuation + symbol
68 | 
69 |     return unicodedata.category(char) in printable
70 | 
71 | 
72 | def make_printable(char: str) -> str:
73 |     """
74 |     Takes a Unicode code point and return a printable representation of it.
75 | 
76 |     Args:
77 |         char (str): Input code point
78 | 
79 |     Returns:
80 |         Either the original code point, the name of the code point if it is a
81 |         combining mark, whitespace etc., or the hex code if it is a control
82 |         symbol.
83 |     """
84 |     if not char or is_printable(char):
85 |         return char
86 |     elif unicodedata.category(char) in ('Cc', 'Cs', 'Co'):
87 |         return '0x{:x}'.format(ord(char))
88 |     else:
89 |         return unicodedata.name(char)
90 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Description
  2 | ===========
  3 | 
  4 | .. image:: https://travis-ci.org/mittagessen/kraken.svg?branch=master
  5 |     :target: https://travis-ci.org/mittagessen/kraken
  6 | 
  7 | kraken is a fork of ocropus intended to rectify a number of issues while
  8 | preserving (mostly) functional equivalence. Its main features are:
  9 | 
 10 |   - Script detection and multiscript recognition support
 11 |   - `Right-to-Left <https://en.wikipedia.org/wiki/Right-to-left>`_, `BiDi
 12 |     <https://en.wikipedia.org/wiki/Bi-directional_text>`_, and Top-to-Bottom
 13 |     script support
 14 |   - `ALTO <https://www.loc.gov/standards/alto/>`_, abbyXML, and hOCR output
 15 |   - Word bounding boxes and character cuts
 16 |   - `Public repository <https://github.com/mittagessen/kraken-models>`_ of model files
 17 |   - Dynamic recognition model architectures and GPU acceleration
 18 |   - Clean public API 
 19 | 
 20 | Installation
 21 | ============
 22 | 
 23 | When using a recent version of pip all dependencies will be installed from
 24 | binary wheel packages, so installing build-essential or your distributions
 25 | equivalent is often unnecessary.
 26 | 
 27 | Install the latest master version through `conda <https://anaconda.org>`_:
 28 | 
 29 | ::
 30 | 
 31 |   $ wget https://raw.githubusercontent.com/mittagessen/kraken/master/environment.yml
 32 |   $ conda env create -f environment.yml
 33 | 
 34 | or:
 35 | 
 36 | ::
 37 | 
 38 |   $ wget https://raw.githubusercontent.com/mittagessen/kraken/master/environment_cuda.yml
 39 |   $ conda env create -f environment_cuda.yml
 40 | 
 41 | for CUDA acceleration with the appropriate hardware.
 42 | 
 43 | It is also possible to install the stable version with the old clstm backend from pypi:
 44 | 
 45 | ::
 46 | 
 47 |   $ pip install kraken
 48 | 
 49 | Finally you'll have to scrounge up a model to do the actual recognition of
 50 | characters. To download the default model for printed English text and place it
 51 | in the kraken directory for the current user:
 52 | 
 53 | ::
 54 | 
 55 |   $ kraken get default
 56 | 
 57 | A list of libre models available in the central repository can be retrieved by
 58 | running:
 59 | 
 60 | ::
 61 | 
 62 |   $ kraken list
 63 | 
 64 | Quickstart
 65 | ==========
 66 | 
 67 | Recognizing text on an image using the default parameters including the
 68 | prerequisite steps of binarization and page segmentation:
 69 | 
 70 | ::
 71 | 
 72 |   $ kraken -i image.tif image.txt binarize segment ocr
 73 | 
 74 | To binarize a single image using the nlbin algorithm:
 75 | 
 76 | ::
 77 | 
 78 |   $ kraken -i image.tif bw.png binarize
 79 | 
 80 | To segment a binarized image into reading-order sorted lines:
 81 | 
 82 | ::
 83 | 
 84 |   $ kraken -i bw.png lines.json segment
 85 | 
 86 | To OCR a binarized image using the default RNN and the previously generated
 87 | page segmentation:
 88 | 
 89 | ::
 90 | 
 91 |   $ kraken -i bw.png image.txt ocr --lines lines.json
 92 | 
 93 | All subcommands and options are documented. Use the ``help`` option to get more
 94 | information.
 95 | 
 96 | Documentation
 97 | =============
 98 | 
 99 | Have a look at the `docs <http://kraken.re>`_
100 | 
101 | Funding
102 | =======
103 | 
104 | kraken is developed at `Université PSL <http://www.psl.eu>`_.
105 | 


--------------------------------------------------------------------------------
/kraken/lib/lineest.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | import PIL
 3 | import numpy as np
 4 | 
 5 | from kraken.lib.util import pil2array, array2pil
 6 | from scipy.ndimage import interpolation, filters
 7 | 
 8 | __all__ = ['CenterNormalizer', 'dewarp']
 9 | 
10 | 
11 | def scale_to_h(img, target_height, order=1, dtype=np.dtype('f'), cval=0):
12 |     h, w = img.shape
13 |     scale = target_height*1.0/h
14 |     target_width = int(scale*w)
15 |     with warnings.catch_warnings():
16 |         warnings.simplefilter('ignore', UserWarning)
17 |         output = interpolation.affine_transform(1.0*img, np.ones(2)/scale,
18 |                                                 order=order,
19 |                                                 output_shape=(target_height,
20 |                                                               target_width),
21 |                                                 mode='constant', cval=cval)
22 |     output = np.array(output, dtype=dtype)
23 |     return output
24 | 
25 | 
26 | class CenterNormalizer(object):
27 |     def __init__(self, target_height=48, params=(4, 1.0, 0.3)):
28 |         self.target_height = target_height
29 |         self.range, self.smoothness, self.extra = params
30 | 
31 |     def setHeight(self, target_height):
32 |         self.target_height = target_height
33 | 
34 |     def measure(self, line):
35 |         h, w = line.shape
36 |         # XXX: this filter is awfully slow
37 |         smoothed = filters.gaussian_filter(line, (h*0.5, h*self.smoothness),
38 |                                            mode='constant')
39 |         smoothed += 0.001*filters.uniform_filter(smoothed, (h*0.5, w),
40 |                                                  mode='constant')
41 |         self.shape = (h, w)
42 |         a = np.argmax(smoothed, axis=0)
43 |         a = filters.gaussian_filter(a, h*self.extra)
44 |         self.center = np.array(a, 'i')
45 |         deltas = np.abs(np.arange(h)[:, np.newaxis]-self.center[np.newaxis, :])
46 |         self.mad = np.mean(deltas[line != 0])
47 |         self.r = int(1+self.range*self.mad)
48 | 
49 |     def dewarp(self, img, cval=0, dtype=np.dtype('f')):
50 |         if img.shape != self.shape:
51 |             raise Exception('Measured and dewarp image shapes different')
52 |         h, w = img.shape
53 |         padded = np.vstack([cval*np.ones((h, w)), img, cval*np.ones((h, w))])
54 |         center = self.center+h
55 |         dewarped = [padded[center[i]-self.r:center[i]+self.r, i] for i in
56 |                     range(w)]
57 |         dewarped = np.array(dewarped, dtype=dtype).T
58 |         return dewarped
59 | 
60 |     def normalize(self, img, order=1, dtype=np.dtype('f'), cval=0):
61 |         dewarped = self.dewarp(img, cval=cval, dtype=dtype)
62 |         h, w = dewarped.shape
63 |         scaled = scale_to_h(dewarped, self.target_height, order=order,
64 |                             dtype=dtype, cval=cval)
65 |         return scaled
66 | 
67 | 
68 | def dewarp(normalizer: CenterNormalizer, im: PIL.Image) -> PIL.Image:
69 |     """
70 |     Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer
71 |     instance.
72 | 
73 |     Args:
74 |         normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer
75 |                                                           instance
76 |         im (PIL.Image): Image to dewarp
77 | 
78 |     Returns:
79 |         PIL.Image containing the dewarped image.
80 |     """
81 |     line = pil2array(im)
82 |     temp = np.amax(line)-line
83 |     temp = temp*1.0/np.amax(temp)
84 |     normalizer.measure(temp)
85 |     line = normalizer.normalize(line, cval=np.amax(line))
86 |     return array2pil(line)
87 | 


--------------------------------------------------------------------------------
/kraken/templates/layout.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html {% if text_direction == "horizontal-rl" %}dir="rtl"{% endif %}>
  3 | 	<head>
  4 | 		<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
  5 | 		<meta name="uuid" content="{{ uuid }}"/>
  6 | 		<meta itemprop="text_direction" content="{{ text_direction }}"/>
  7 | 		<style>
  8 | 			{% include 'style.css' %}
  9 | 		</style>
 10 | 	</head>
 11 | 	<body>
 12 | 		<script language="javascript" type="text/javascript">
 13 | 			{% include 'jquery-1.11.3.min.js' %}
 14 | 		</script>
 15 | 		<script>
 16 | 			$(document).ready(function() {
 17 | 				var uuid = $('meta[name=uuid]').attr('content');
 18 | 				if (localStorage) {
 19 | 					$('li[contenteditable=true], span').each(function(index) {
 20 | 						if(localStorage[uuid + this.id]) {
 21 | 							$(this).text(localStorage[uuid + this.id]);
 22 | 						}
 23 | 					});
 24 | 				}
 25 | 				// focus text fields when lines/words are clicked + mouseover
 26 | 				$('a.rect').click(function(e) {
 27 | 					e.preventDefault();
 28 | 					$('#' + $(this).attr('alt')).focus();
 29 | 				}).mouseover(function(e) {
 30 | 					$('#' + $(this).attr('alt')).addClass("hovered");
 31 | 				}).mouseout(function(e) {
 32 | 					$('#' + $(this).attr('alt')).removeClass("hovered");
 33 | 				});
 34 | 
 35 | 				// create mouseover effect on text fields
 36 | 				$('li[contenteditable=true], span').mouseover(function(e) {
 37 | 					console.log(this.id);
 38 | 					$('a.rect[alt=' + this.id + ']').addClass("hovered");
 39 | 				}).mouseout(function(e) {
 40 | 					$('a.rect[alt=' + this.id + ']').removeClass("hovered");
 41 | 				}).focusin(function(e) {
 42 | 					$('a.rect[alt=' + this.id + ']').addClass("hovered");
 43 | 				}).focusout(function(e) {
 44 | 					$('a.rect[alt=' + this.id + ']').removeClass("hovered");
 45 | 				}).keydown(function(e) {
 46 | 					if(e.which == 13) {
 47 | 						e.preventDefault();
 48 | 						$(this).addClass('corrected');
 49 | 						var $els = $('[contenteditable=true]');
 50 | 						$els.eq($els.index(this) + 1).focus();
 51 | 					}
 52 | 				// save to local storage 
 53 | 				}).keyup(function () {
 54 | 					localStorage[uuid + this.id] = $(this).text();
 55 | 				});
 56 | 				
 57 | 				// smooth scrolling
 58 | 				$('.page_anchor').click(function(e) {
 59 | 					e.preventDefault();
 60 | 					var target = $(this).attr("href");
 61 | 					var top = $(target).offset().top;
 62 | 					$('html, body').stop().animate({scrollTop: top }, 500);
 63 | 				});
 64 | 
 65 | 				// serializing the DOM to a file
 66 | 				$('#download_button').click(function(e) {
 67 | 					path = window.location.pathname;
 68 | 					$(this).attr('href', 'data:text/html,' + encodeURIComponent($('html')[0].outerHTML));
 69 | 					$(this).attr('download', path.substr(path.lastIndexOf('/') + 1));
 70 | 				});
 71 | 			});
 72 | 		</script>
 73 | 		<nav>
 74 | 			<ul>
 75 | 			{% for page in pages %}
 76 | 				<li><a class="page_anchor" href='#page_{{ page.index }}'>{{ page.index }}</a></li>
 77 | 			{% endfor %}
 78 | 			</ul>
 79 | 			<a id="download_button" href="#">Download</a>
 80 | 		</nav>
 81 | 
 82 | 		{% for page in pages %}
 83 | 		<section class="page container" id="page_{{ page.index }}">
 84 | 			<div class="column">
 85 | 				<div class="img_container">
 86 | 					<img style="width: 100%;" src="{{ page.img }}" alt="photo"/>
 87 | 					{% for line in page.lines %}
 88 | 						<a class="rect" alt="line_{{ line.index }}" href="" title="" style="position: absolute; left: {{ line.left }}%; top: {{ line.top }}%; width: {{ line.width }}%; height: {{ line.height }}%; z-index: 2;"></a>
 89 | 					{% endfor %}
 90 | 				</div>
 91 | 			</div>
 92 | 			<div class="column">
 93 | 				<ul>
 94 | 				{% for line in page.lines %}
 95 | 					<li id="line_{{ line.index }}" contenteditable="true" spellcheck="true" data-bbox="{{ line.bbox }}" data-placeholder="Enter text for line {{ line.index }}">
 96 | 						{% if line.text %}
 97 | 							{{ line.text }}
 98 | 						{% endif %}
 99 | 					</li>
100 | 				{% endfor %}
101 | 				</ul>
102 | 			</div>
103 | 		</section>
104 | 		{% endfor %}
105 | 	</body>
106 | </html>
107 | 


--------------------------------------------------------------------------------
/kraken/lib/lstm.py:
--------------------------------------------------------------------------------
  1 | # flake8: noqa
  2 | import numpy as np
  3 | 
  4 | from typing import Dict
  5 | from scipy.ndimage import measurements
  6 | from scipy.special import expit
  7 | 
  8 | initial_range = 0.1
  9 | 
 10 | 
 11 | class Codec(object):
 12 |     """Translate between integer codes and characters."""
 13 |     def init(self, charset):
 14 |         charset = sorted(list(set(charset)))
 15 |         self.code2char = {}  # type: Dict[int, str]
 16 |         self.char2code = {}  # type:  Dict[str, int]
 17 |         for code,char in enumerate(charset):
 18 |             self.code2char[code] = char
 19 |             self.char2code[char] = code
 20 |         return self
 21 |     def size(self):
 22 |         """The total number of codes (use this for the number of output
 23 |         classes when training a classifier."""
 24 |         return len(list(self.code2char.keys()))
 25 |     def encode(self, s):
 26 |         "Encode the string `s` into a code sequence."
 27 |         tab = self.char2code
 28 |         dflt = self.char2code["~"]
 29 |         return [self.char2code.get(c,dflt) for c in s]
 30 |     def decode(self, l):
 31 |         "Decode a code sequence into a string."
 32 |         s = [self.code2char.get(c,"~") for c in l]
 33 |         return s
 34 | 
 35 | class Network:
 36 |     def predict(self,xs):
 37 |         """Prediction is the same as forward propagation."""
 38 |         return self.forward(xs)
 39 | 
 40 | class Softmax(Network):
 41 |     """A logistic regression network."""
 42 |     def __init__(self,Nh,No,initial_range=0.1,rand=None):
 43 |         pass
 44 |     def ninputs(self):
 45 |         pass
 46 |     def noutputs(self):
 47 |         pass
 48 |     def forward(self,ys):
 49 |         pass
 50 |     def backward(self,deltas):
 51 |         pass
 52 | 
 53 | 
 54 | class LSTM(Network):
 55 |     """A standard LSTM network. This is a direct implementation of all the forward
 56 |     and backward propagation formulas, mainly for speed. (There is another, more
 57 |     abstract implementation as well, but that's significantly slower in Python
 58 |     due to function call overhead.)"""
 59 |     def __init__(self,ni,ns,initial=0.1,maxlen=5000):
 60 |         pass
 61 | 
 62 |     def init_weights(self,initial):
 63 |         pass
 64 | 
 65 |     def allocate(self,n):
 66 |         pass
 67 | 
 68 |     def reset(self,n):
 69 |         pass
 70 | 
 71 |     def forward(self,xs):
 72 |         pass
 73 | 
 74 | ################################################################
 75 | # combination classifiers
 76 | ################################################################
 77 | 
 78 | class Stacked(Network):
 79 |     """Stack two networks on top of each other."""
 80 |     def __init__(self,nets):
 81 |         self.nets = nets
 82 |     def forward(self,xs):
 83 |         pass
 84 | 
 85 | class Reversed(Network):
 86 |     """Run a network on the time-reversed input."""
 87 |     def __init__(self,net):
 88 |         self.net = net
 89 |     def forward(self,xs):
 90 |         pass
 91 | 
 92 | class Parallel(Network):
 93 |     """Run multiple networks in parallel on the same input."""
 94 |     def __init__(self,*nets):
 95 |         self.nets = nets
 96 |     def forward(self,xs):
 97 |         pass
 98 | 
 99 | def BIDILSTM(Ni,Ns,No):
100 |     """A bidirectional LSTM, constructed from regular and reversed LSTMs."""
101 |     lstm1 = LSTM(Ni,Ns)
102 |     lstm2 = Reversed(LSTM(Ni,Ns))
103 |     bidi = Parallel(lstm1,lstm2)
104 |     logreg = Softmax(2*Ns,No)
105 |     stacked = Stacked([bidi,logreg])
106 |     return stacked
107 | 
108 | 
109 | class SeqRecognizer(Network):
110 |     """Perform sequence recognition using BIDILSTM and alignment."""
111 |     def __init__(self,ninput,nstates,noutput=-1,codec=None,normalize=None):
112 |         self.Ni = ninput
113 |         if codec: noutput = codec.size()
114 |         self.No = noutput
115 |         self.lstm = BIDILSTM(ninput,nstates,noutput)
116 |         self.codec = codec
117 |     def translate_back(self, output):
118 |         pass
119 |     def translate_back_locations(self, output):
120 |         pass
121 |     def predictSequence(self,xs):
122 |         "Predict an integer sequence of codes."
123 |         pass
124 |     def l2s(self,l):
125 |         "Convert a code sequence into a unicode string after recognition."
126 |         l = self.codec.decode(l)
127 |         return u"".join(l)
128 |     def predictString(self,xs):
129 |         "Predict output as a string. This uses codec and normalizer."
130 |         pass
131 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | kraken
  2 | ======
  3 | 
  4 | .. toctree::
  5 |    :hidden:
  6 |    :maxdepth: 2
  7 | 
  8 |    advanced
  9 |    Training <ketos>
 10 |    API <api>
 11 |    Models <models>
 12 | 
 13 | kraken is a turn-key OCR system forked from `ocropus
 14 | <https://github.com/tmbdev/ocropy>`_. It is intended to rectify a number of
 15 | issues while preserving (mostly) functional equivalence. 
 16 | 
 17 | Features
 18 | ========
 19 | 
 20 | kraken's main features are:
 21 | 
 22 |   - Script detection and multi-script recognition support
 23 |   - `Right-to-Left <https://en.wikipedia.org/wiki/Right-to-left>`_, `BiDi
 24 |     <https://en.wikipedia.org/wiki/Bi-directional_text>`_, and Top-to-Bottom
 25 |     script support
 26 |   - `ALTO <https://www.loc.gov/standards/alto/>`_, abbyXML, and hOCR output
 27 |   - Word bounding boxes and character cuts
 28 |   - `Public repository <https://github.com/mittagessen/kraken-models>`_ of model files
 29 |   - :ref:`Lightweight model files <models>`
 30 |   - :ref:`Variable recognition network architectures <vgsl>`
 31 | 
 32 | All functionality not pertaining to OCR and prerequisite steps has been
 33 | removed, i.e. no more error rate measuring, etc.
 34 | 
 35 | Pull requests and code contributions are always welcome. 
 36 | 
 37 | Installation
 38 | ============
 39 | 
 40 | kraken requires some external libraries to run. On Debian/Ubuntu they may be
 41 | installed using:
 42 | 
 43 | .. code-block:: console
 44 | 
 45 |         # apt install libpangocairo-1.0 libxml2 libblas3 liblapack3 python3-dev python3-pip
 46 | 
 47 | pip
 48 | ---
 49 | 
 50 | .. code-block:: console
 51 | 
 52 |   $ pip3 install kraken
 53 | 
 54 | or by running pip in the git repository:
 55 | 
 56 | .. code-block:: console
 57 | 
 58 |   $ pip3 install .
 59 | 
 60 | conda
 61 | -----
 62 | 
 63 | If you are running `Anaconda <https://www.anaconda.com/download/>`_/miniconda, use:
 64 | 
 65 | .. code-block:: console
 66 | 
 67 |   $ conda install -c mittagessen kraken
 68 | 
 69 | Models
 70 | ------
 71 | 
 72 | Finally you'll have to scrounge up a recognition model to do the actual
 73 | recognition of characters. To download the default English text recognition
 74 | model and place it in the user's kraken directory:
 75 | 
 76 | .. code-block:: console
 77 | 
 78 |   $ kraken get default
 79 | 
 80 | A list of libre models available in the central repository can be retrieved by
 81 | running:
 82 | 
 83 | .. code-block:: console
 84 | 
 85 |   $ kraken list
 86 | 
 87 | Model metadata can be extracted using:
 88 | 
 89 | .. code-block:: console
 90 | 
 91 |   $ kraken show arabic-alam-al-kutub
 92 |   name: arabic-alam-al-kutub.clstm
 93 | 
 94 |   An experimental model for Classical Arabic texts.
 95 | 
 96 |   Network trained on 889 lines of [0] as a test case for a general Classical
 97 |   Arabic model. Ground truth was prepared by Sarah Savant
 98 |   <sarah.savant@aku.edu> and Maxim Romanov <maxim.romanov@uni-leipzig.de>.
 99 | 
100 |   Vocalization was omitted in the ground truth. Training was stopped at ~35000
101 |   iterations with an accuracy of 97%.
102 | 
103 |   [0] Ibn al-Faqīh (d. 365 AH). Kitāb al-buldān. Edited by Yūsuf al-Hādī, 1st
104 |   edition. Bayrūt: ʿĀlam al-kutub, 1416 AH/1996 CE.
105 |   alphabet:  !()-.0123456789:[] «»،؟ءابةتثجحخدذرزسشصضطظعغفقكلمنهوىي ARABIC
106 |   MADDAH ABOVE, ARABIC HAMZA ABOVE, ARABIC HAMZA BELOW
107 | 
108 | Quickstart
109 | ==========
110 | 
111 | Recognizing text on an image using the default parameters including the
112 | prerequisite steps of binarization and page segmentation:
113 | 
114 | .. code-block:: console
115 | 
116 |   $ kraken -i image.tif image.txt binarize segment ocr
117 |   Loading RNN     ✓
118 |   Processing      ⣻
119 | 
120 | To binarize a single image using the nlbin algorithm:
121 | 
122 | .. code-block:: console
123 | 
124 |   $ kraken -i image.tif bw.tif binarize
125 | 
126 | To segment a binarized image into reading-order sorted lines:
127 | 
128 | .. code-block:: console
129 | 
130 |   $ kraken -i bw.tif lines.json segment
131 | 
132 | To OCR a binarized image using the default RNN and the previously generated
133 | page segmentation:
134 | 
135 | .. code-block:: console
136 | 
137 |   $ kraken -i bw.tif image.txt ocr --lines lines.json
138 | 
139 | All commands and their parameters are documented, just add the standard
140 | ``--help`` flag for further information.
141 | 
142 | Training Tutorial
143 | =================
144 | 
145 | There is a training tutorial at :doc:`training`.
146 | 
147 | .. _license:
148 | 
149 | License
150 | =======
151 | 
152 | ``Kraken`` is provided under the terms and conditions of the `Apache 2.0
153 | License <https://github.com/mittagessen/kraken/blob/master/LICENSE>`_ retained
154 | from the original ``ocropus`` distribution.
155 | 


--------------------------------------------------------------------------------
/kraken/binarization.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Copyright 2015 Benjamin Kiessling
  4 | #           2014 Thomas M. Breuel
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | # http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 15 | # or implied. See the License for the specific language governing
 16 | # permissions and limitations under the License.
 17 | """
 18 | kraken.binarization
 19 | ~~~~~~~~~~~~~~~~~~~
 20 | 
 21 | An adaptive binarization algorithm.
 22 | """
 23 | import warnings
 24 | import logging
 25 | import numpy as np
 26 | 
 27 | from PIL import Image
 28 | from kraken.lib.util import pil2array, array2pil, is_bitonal, get_im_str
 29 | from scipy.ndimage import filters, interpolation, morphology
 30 | 
 31 | from kraken.lib.exceptions import KrakenInputException
 32 | 
 33 | __all__ = ['nlbin']
 34 | 
 35 | logger = logging.getLogger(__name__)
 36 | 
 37 | 
 38 | def nlbin(im: Image,
 39 |           threshold: float = 0.5,
 40 |           zoom: float = 0.5,
 41 |           escale: float = 1.0,
 42 |           border: float = 0.1,
 43 |           perc: int = 80,
 44 |           range: int = 20,
 45 |           low: int = 5,
 46 |           high: int = 90) -> Image:
 47 |     """
 48 |     Performs binarization using non-linear processing.
 49 | 
 50 |     Args:
 51 |         im (PIL.Image):
 52 |         threshold (float):
 53 |         zoom (float): Zoom for background page estimation
 54 |         escale (float): Scale for estimating a mask over the text region
 55 |         border (float): Ignore this much of the border
 56 |         perc (int): Percentage for filters
 57 |         range (int): Range for filters
 58 |         low (int): Percentile for black estimation
 59 |         high (int): Percentile for white estimation
 60 | 
 61 |     Returns:
 62 |         PIL.Image containing the binarized image
 63 | 
 64 |     Raises:
 65 |         KrakenInputException when trying to binarize an empty image.
 66 |     """
 67 |     im_str = get_im_str(im)
 68 |     logger.info(u'Binarizing {}'.format(im_str))
 69 |     if is_bitonal(im):
 70 |         logger.info(u'Skipping binarization because {} is bitonal.'.format(im_str))
 71 |         return im
 72 |     # convert to grayscale first
 73 |     logger.debug(u'Converting {} to grayscale'.format(im_str))
 74 |     im = im.convert('L')
 75 |     raw = pil2array(im)
 76 |     logger.debug(u'Scaling and normalizing')
 77 |     # rescale image to between -1 or 0 and 1
 78 |     raw = raw/np.float(np.iinfo(raw.dtype).max)
 79 |     # perform image normalization
 80 |     if np.amax(raw) == np.amin(raw):
 81 |         logger.warning(u'Trying to binarize empty image {}'.format(im_str))
 82 |         raise KrakenInputException('Image is empty')
 83 |     image = raw-np.amin(raw)
 84 |     image /= np.amax(image)
 85 | 
 86 |     logger.debug(u'Interpolation and percentile filtering')
 87 |     with warnings.catch_warnings():
 88 |         warnings.simplefilter('ignore', UserWarning)
 89 |         m = interpolation.zoom(image, zoom)
 90 |         m = filters.percentile_filter(m, perc, size=(range, 2))
 91 |         m = filters.percentile_filter(m, perc, size=(2, range))
 92 |         m = interpolation.zoom(m, 1.0/zoom)
 93 |     w, h = np.minimum(np.array(image.shape), np.array(m.shape))
 94 |     flat = np.clip(image[:w, :h]-m[:w, :h]+1, 0, 1)
 95 | 
 96 |     # estimate low and high thresholds
 97 |     d0, d1 = flat.shape
 98 |     o0, o1 = int(border*d0), int(border*d1)
 99 |     est = flat[o0:d0-o0, o1:d1-o1]
100 |     logger.debug(u'Threshold estimates {}'.format(est))
101 |     # by default, we use only regions that contain
102 |     # significant variance; this makes the percentile
103 |     # based low and high estimates more reliable
104 |     logger.debug(u'Refine estimates')
105 |     v = est-filters.gaussian_filter(est, escale*20.0)
106 |     v = filters.gaussian_filter(v**2, escale*20.0)**0.5
107 |     v = (v > 0.3*np.amax(v))
108 |     v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1)))
109 |     v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50))))
110 |     est = est[v]
111 |     lo = np.percentile(est.ravel(), low)
112 |     hi = np.percentile(est.ravel(), high)
113 | 
114 |     flat -= lo
115 |     flat /= (hi-lo)
116 |     flat = np.clip(flat, 0, 1)
117 |     logger.debug(u'Thresholding at {}'.format(threshold))
118 |     bin = np.array(255*(flat > threshold), 'B')
119 |     return array2pil(bin)
120 | 


--------------------------------------------------------------------------------
/kraken/lib/morph.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Various add-ons to the SciPy morphology package
  3 | """
  4 | import numpy as np
  5 | from scipy.ndimage import morphology, measurements, filters
  6 | 
  7 | 
  8 | def label(image: np.array, **kw) -> np.array:
  9 |     """
 10 |     Redefine the scipy.ndimage.measurements.label function to work with a wider
 11 |     range of data types.  The default function is inconsistent about the data
 12 |     types it accepts on different platforms.
 13 |     """
 14 |     try:
 15 |         return measurements.label(image, **kw)
 16 |     except Exception:
 17 |         pass
 18 |     types = ["int32", "uint32", "int64", "uint64", "int16", "uint16"]
 19 |     for t in types:
 20 |         try:
 21 |             return measurements.label(np.array(image, dtype=t), **kw)
 22 |         except Exception:
 23 |             pass
 24 |     # let it raise the same exception as before
 25 |     return measurements.label(image, **kw)
 26 | 
 27 | 
 28 | def find_objects(image: np.array, **kw) -> np.array:
 29 |     """
 30 |     Redefine the scipy.ndimage.measurements.find_objects function to work with
 31 |     a wider range of data types.  The default function is inconsistent about
 32 |     the data types it accepts on different platforms.
 33 |     """
 34 |     try:
 35 |         return measurements.find_objects(image, **kw)
 36 |     except Exception:
 37 |         pass
 38 |     types = ["int32", "uint32", "int64", "uint64", "int16", "uint16"]
 39 |     for t in types:
 40 |         try:
 41 |             return measurements.find_objects(np.array(image, dtype=t), **kw)
 42 |         except Exception:
 43 |             pass
 44 |     # let it raise the same exception as before
 45 |     return measurements.find_objects(image, **kw)
 46 | 
 47 | 
 48 | def r_dilation(image, size, origin=0):
 49 |     """Dilation with rectangular structuring element using maximum_filter"""
 50 |     return filters.maximum_filter(image, size, origin=origin)
 51 | 
 52 | 
 53 | def r_erosion(image, size, origin=0):
 54 |     """Erosion with rectangular structuring element using maximum_filter"""
 55 |     return filters.minimum_filter(image, size, origin=origin)
 56 | 
 57 | 
 58 | def rb_dilation(image, size, origin=0):
 59 |     """Binary dilation using linear filters."""
 60 |     output = np.zeros(image.shape, 'f')
 61 |     filters.uniform_filter(image, size, output=output, origin=origin,
 62 |                            mode='constant', cval=0)
 63 |     return np.array(output > 0, 'i')
 64 | 
 65 | 
 66 | def rb_erosion(image, size, origin=0):
 67 |     """Binary erosion using linear filters."""
 68 |     output = np.zeros(image.shape, 'f')
 69 |     filters.uniform_filter(image, size, output=output, origin=origin,
 70 |                            mode='constant', cval=1)
 71 |     return np.array(output == 1, 'i')
 72 | 
 73 | 
 74 | def rb_opening(image, size, origin=0):
 75 |     """Binary opening using linear filters."""
 76 |     image = rb_erosion(image, size, origin=origin)
 77 |     return rb_dilation(image, size, origin=origin)
 78 | 
 79 | 
 80 | def spread_labels(labels, maxdist=9999999):
 81 |     """Spread the given labels to the background"""
 82 |     distances, features = morphology.distance_transform_edt(labels == 0,
 83 |                                                             return_distances=1,
 84 |                                                             return_indices=1)
 85 |     indexes = features[0] * labels.shape[1] + features[1]
 86 |     spread = labels.ravel()[indexes.ravel()].reshape(*labels.shape)
 87 |     spread *= (distances < maxdist)
 88 |     return spread
 89 | 
 90 | 
 91 | def correspondences(labels1, labels2):
 92 |     """Given two labeled images, compute an array giving the correspondences
 93 |     between labels in the two images."""
 94 |     q = 100000
 95 |     combo = labels1 * q + labels2
 96 |     result = np.unique(combo)
 97 |     result = np.array([result // q, result % q])
 98 |     return result
 99 | 
100 | 
101 | def propagate_labels(image, labels, conflict=0):
102 |     """Given an image and a set of labels, apply the labels
103 |     to all the regions in the image that overlap a label.
104 |     Assign the value `conflict` to any labels that have a conflict."""
105 |     rlabels, _ = label(image)
106 |     cors = correspondences(rlabels, labels)
107 |     outputs = np.zeros(np.amax(rlabels) + 1, 'i')
108 |     oops = -(1 << 30)
109 |     for o, i in cors.T:
110 |         if outputs[o] != 0:
111 |             outputs[o] = oops
112 |         else:
113 |             outputs[o] = i
114 |     outputs[outputs == oops] = conflict
115 |     outputs[0] = 0
116 |     return outputs[rlabels]
117 | 
118 | 
119 | def select_regions(binary, f, min=0, nbest=100000):
120 |     """Given a scoring function f over slice tuples (as returned by
121 |     find_objects), keeps at most nbest regions whose scores is higher
122 |     than min."""
123 |     labels, n = label(binary)
124 |     objects = find_objects(labels)
125 |     scores = [f(o) for o in objects]
126 |     best = np.argsort(scores)
127 |     keep = np.zeros(len(objects) + 1, 'i')
128 |     if nbest > 0:
129 |         for i in best[-nbest:]:
130 |             if scores[i] <= min:
131 |                 continue
132 |             keep[i+1] = 1
133 |     return keep[labels]
134 | 


--------------------------------------------------------------------------------
/kraken/transcribe.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Copyright 2015 Benjamin Kiessling
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 14 | # or implied. See the License for the specific language governing
 15 | # permissions and limitations under the License.
 16 | """
 17 | Utility functions for ground truth transcription.
 18 | """
 19 | from kraken.lib.exceptions import KrakenInputException
 20 | from kraken.lib.util import get_im_str
 21 | 
 22 | from typing import List
 23 | 
 24 | from jinja2 import Environment, PackageLoader
 25 | from io import BytesIO
 26 | 
 27 | import uuid
 28 | import base64
 29 | import logging
 30 | 
 31 | logger = logging.getLogger()
 32 | 
 33 | 
 34 | class TranscriptionInterface(object):
 35 | 
 36 |     def __init__(self, font=None, font_style=None):
 37 |         logging.info(u'Initializing transcription object.')
 38 |         logger.debug(u'Initializing jinja environment.')
 39 |         env = Environment(loader=PackageLoader('kraken', 'templates'), autoescape=True)
 40 |         logger.debug(u'Loading transcription template.')
 41 |         self.tmpl = env.get_template('layout.html')
 42 |         self.pages = []  # type: List[dict]
 43 |         self.font = {'font': font, 'style': font_style}
 44 |         self.text_direction = 'horizontal-tb'
 45 |         self.page_idx = 1
 46 |         self.line_idx = 1
 47 |         self.seg_idx = 1
 48 | 
 49 |     def add_page(self, im, segmentation=None, records=None):
 50 |         """
 51 |         Adds an image to the transcription interface, optionally filling in
 52 |         information from a list of ocr_record objects.
 53 | 
 54 |         Args:
 55 |             im (PIL.Image): Input image
 56 |             segmentation (dict): Output of the segment method.
 57 |             records (list): A list of ocr_record objects.
 58 |         """
 59 |         im_str = get_im_str(im)
 60 |         logger.info(u'Adding page {} with {} lines'.format(im_str, len(segmentation) if segmentation else len(records)))
 61 |         page = {}
 62 |         fd = BytesIO()
 63 |         im.save(fd, format='png', optimize=True)
 64 |         page['index'] = self.page_idx
 65 |         self.page_idx += 1
 66 |         logger.debug(u'Base64 encoding image')
 67 |         page['img'] = 'data:image/png;base64,' + base64.b64encode(fd.getvalue()).decode('ascii')
 68 |         page['lines'] = []
 69 |         if records:
 70 |             logger.debug(u'Adding records.')
 71 |             self.text_direction = segmentation['text_direction']
 72 |             for record, bbox in zip(records, segmentation['boxes']):
 73 |                 page['lines'].append({'index': self.line_idx, 'text': record.prediction,
 74 |                                       'left': 100*int(bbox[0]) / im.size[0],
 75 |                                       'top': 100*int(bbox[1]) / im.size[1],
 76 |                                       'width': 100*(bbox[2] - bbox[0])/im.size[0],
 77 |                                       'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
 78 |                                       'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
 79 |                                                                       int(bbox[1]),
 80 |                                                                       int(bbox[2]),
 81 |                                                                       int(bbox[3]))})
 82 | 
 83 |                 self.line_idx += 1
 84 |         elif segmentation:
 85 |             logger.debug(u'Adding segmentations.')
 86 |             self.text_direction = segmentation['text_direction']
 87 |             for bbox in segmentation['boxes']:
 88 |                 page['lines'].append({'index': self.line_idx,
 89 |                                       'left': 100*int(bbox[0]) / im.size[0],
 90 |                                       'top': 100*int(bbox[1]) / im.size[1],
 91 |                                       'width': 100*(bbox[2] - bbox[0])/im.size[0],
 92 |                                       'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
 93 |                                       'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
 94 |                                                                       int(bbox[1]),
 95 |                                                                       int(bbox[2]),
 96 |                                                                       int(bbox[3]))})
 97 |                 self.line_idx += 1
 98 |         else:
 99 |             raise KrakenInputException('Neither segmentations nor records given')
100 |         self.pages.append(page)
101 | 
102 |     def write(self, fd):
103 |         """
104 |         Writes the HTML file to a file descriptor.
105 | 
106 |         Args:
107 |             fd (File): File descriptor (mode='rb') to write to.
108 |         """
109 |         logger.info(u'Rendering and writing transcription.')
110 |         fd.write(self.tmpl.render(uuid=str(uuid.uuid4()), pages=self.pages,
111 |                                   font=self.font,
112 |                                   text_direction=self.text_direction).encode('utf-8'))
113 | 


--------------------------------------------------------------------------------
/kraken/lib/models.py:
--------------------------------------------------------------------------------
  1 | """
  2 | kraken.lib.models
  3 | ~~~~~~~~~~~~~~~~~
  4 | 
  5 | Wrapper around TorchVGSLModel including a variety of forward pass helpers for
  6 | sequence classification.
  7 | """
  8 | from os.path import expandvars, expanduser, abspath
  9 | 
 10 | import torch
 11 | import numpy as np
 12 | import kraken.lib.lineest
 13 | import kraken.lib.ctc_decoder
 14 | 
 15 | from typing import List, Tuple
 16 | 
 17 | from kraken.lib.vgsl import TorchVGSLModel
 18 | from kraken.lib.exceptions import KrakenInvalidModelException, KrakenInputException
 19 | 
 20 | __all__ = ['TorchSeqRecognizer', 'load_any']
 21 | 
 22 | import logging
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | class TorchSeqRecognizer(object):
 28 |     """
 29 |     A class wrapping a TorchVGSLModel with a more comfortable recognition interface.
 30 |     """
 31 |     def __init__(self, nn, decoder=kraken.lib.ctc_decoder.greedy_decoder, train: bool = False, device: str = 'cpu') -> None:
 32 |         """
 33 |         Constructs a sequence recognizer from a VGSL model and a decoder.
 34 | 
 35 |         Args:
 36 |             nn (kraken.lib.vgsl.TorchVGSLModel): neural network used for recognition
 37 |             decoder (func): Decoder function used for mapping softmax
 38 |                             activations to labels and positions
 39 |             train (bool): Enables or disables gradient calculation
 40 |             device (torch.Device): Device to run model on
 41 |         """
 42 |         self.nn = nn
 43 |         self.kind = ''
 44 |         if train:
 45 |             self.nn.train()
 46 |         else:
 47 |             self.nn.eval()
 48 |         self.codec = self.nn.codec
 49 |         self.decoder = decoder
 50 |         self.train = train
 51 |         self.device = device
 52 |         self.nn.to(device)
 53 | 
 54 |     def to(self, device):
 55 |         """
 56 |         Moves model to device and automatically loads input tensors onto it.
 57 |         """
 58 |         self.device = device
 59 |         self.nn.to(device)
 60 | 
 61 |     def forward(self, line: torch.Tensor) -> np.array:
 62 |         """
 63 |         Performs a forward pass on a torch tensor of a line with shape (C, H, W)
 64 |         and returns a numpy array (W, C).
 65 |         """
 66 |         # make CHW -> 1CHW
 67 |         line = line.to(self.device)
 68 |         line = line.unsqueeze(0)
 69 |         o = self.nn.nn(line)
 70 |         if o.size(2) != 1:
 71 |             raise KrakenInputException('Expected dimension 3 to be 1, actual {}'.format(o.size()))
 72 |         self.outputs = o.detach().squeeze().cpu().numpy()
 73 |         return self.outputs
 74 | 
 75 |     def predict(self, line: torch.Tensor) -> List[Tuple[str, int, int, float]]:
 76 |         """
 77 |         Performs a forward pass on a torch tensor of a line with shape (C, H, W)
 78 |         and returns the decoding as a list of tuples (string, start, end,
 79 |         confidence).
 80 |         """
 81 |         o = self.forward(line)
 82 |         locs = self.decoder(o)
 83 |         return self.codec.decode(locs)
 84 | 
 85 |     def predict_string(self, line: torch.Tensor) -> str:
 86 |         """
 87 |         Performs a forward pass on a torch tensor of a line with shape (C, H, W)
 88 |         and returns a string of the results.
 89 |         """
 90 |         o = self.forward(line)
 91 |         locs = self.decoder(o)
 92 |         decoding = self.codec.decode(locs)
 93 |         return ''.join(x[0] for x in decoding)
 94 | 
 95 |     def predict_labels(self, line: torch.tensor) -> List[Tuple[int, int, int, float]]:
 96 |         """
 97 |         Performs a forward pass on a torch tensor of a line with shape (C, H, W)
 98 |         and returns a list of tuples (class, start, end, max). Max is the
 99 |         maximum value of the softmax layer in the region.
100 |         """
101 |         o = self.forward(line)
102 |         return self.decoder(o)
103 | 
104 | 
105 | def load_any(fname: str, train: bool = False, device: str = 'cpu') -> TorchSeqRecognizer:
106 |     """
107 |     Loads anything that was, is, and will be a valid ocropus model and
108 |     instantiates a shiny new kraken.lib.lstm.SeqRecognizer from the RNN
109 |     configuration in the file.
110 | 
111 |     Currently it recognizes the following kinds of models:
112 | 
113 |         * pyrnn models containing BIDILSTMs
114 |         * protobuf models containing converted python BIDILSTMs
115 |         * protobuf models containing CLSTM networks
116 | 
117 |     Additionally an attribute 'kind' will be added to the SeqRecognizer
118 |     containing a string representation of the source kind. Current known values
119 |     are:
120 | 
121 |         * pyrnn for pickled BIDILSTMs
122 |         * clstm for protobuf models generated by clstm
123 | 
124 |     Args:
125 |         fname (str): Path to the model
126 |         train (bool): Enables gradient calculation and dropout layers in model.
127 |         device (str): Target device
128 | 
129 |     Returns:
130 |         A kraken.lib.models.TorchSeqRecognizer object.
131 |     """
132 |     nn = None
133 |     kind = ''
134 |     fname = abspath(expandvars(expanduser(fname)))
135 |     logger.info(u'Loading model from {}'.format(fname))
136 |     try:
137 |         nn = TorchVGSLModel.load_model(str(fname))
138 |         kind = 'vgsl'
139 |     except Exception:
140 |         try:
141 |             nn = TorchVGSLModel.load_clstm_model(fname)
142 |             kind = 'clstm'
143 |         except Exception:
144 |             nn = TorchVGSLModel.load_pronn_model(fname)
145 |             kind = 'pronn'
146 |         try:
147 |             nn = TorchVGSLModel.load_pyrnn_model(fname)
148 |             kind = 'pyrnn'
149 |         except Exception:
150 |             pass
151 |     if not nn:
152 |         raise KrakenInvalidModelException('File {} not loadable by any parser.'.format(fname))
153 |     seq = TorchSeqRecognizer(nn, train=train, device=device)
154 |     seq.kind = kind
155 |     return seq
156 | 


--------------------------------------------------------------------------------
/kraken/repo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Copyright 2015 Benjamin Kiessling
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 14 | # or implied. See the License for the specific language governing
 15 | # permissions and limitations under the License.
 16 | 
 17 | # -*- coding: utf-8 -*-
 18 | """
 19 | Access functions to the model repository on github.
 20 | """
 21 | from collections import defaultdict
 22 | from typing import Callable, Any
 23 | from contextlib import closing
 24 | 
 25 | from kraken.lib.exceptions import KrakenRepoException
 26 | 
 27 | import base64
 28 | import requests
 29 | import json
 30 | import os
 31 | import logging
 32 | 
 33 | __all__ = ['get_model', 'get_description', 'get_listing']
 34 | 
 35 | logger = logging.getLogger(__name__)
 36 | 
 37 | MODEL_REPO = 'https://api.github.com/repos/mittagessen/kraken-models/'
 38 | 
 39 | 
 40 | def get_model(model_id: str, path: str, callback: Callable[..., Any]) -> None:
 41 |     """
 42 |     Retrieves a model and saves it to a path.
 43 | 
 44 |     Args:
 45 |         model_id (str): Identifier of the model
 46 |         path (str): Destination to write model to.
 47 |         callback (func): Function called for every 1024 octet chunk received.
 48 |     """
 49 |     logger.info(u'Saving model {} to {}'.format(model_id, path))
 50 |     logger.debug(u'Retrieving head of model repository')
 51 |     r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master'))
 52 |     callback()
 53 |     resp = r.json()
 54 |     if 'object' not in resp:
 55 |         logger.error(u'No \'object\' field in repo head API response.')
 56 |         raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
 57 |     head = resp['object']['sha']
 58 |     logger.debug(u'Retrieving tree of model repository')
 59 |     r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1})
 60 |     callback()
 61 |     resp = r.json()
 62 |     if 'tree' not in resp:
 63 |         logger.error(u'No \'tree\' field in repo API response.')
 64 |         raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
 65 |     url = None
 66 |     for el in resp['tree']:
 67 |         components = el['path'].split('/')
 68 |         if len(components) > 2 and components[1] == model_id and components[2] == 'DESCRIPTION':
 69 |             logger.debug(u'Retrieving description for {}'.format(components[1]))
 70 |             raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8')
 71 |             desc = json.loads(raw)
 72 |             spath = os.path.join(path, desc['name'])
 73 |         elif len(components) > 2 and components[1] == model_id:
 74 |             url = el['url']
 75 |             break
 76 |     if not url:
 77 |         logger.error(u'Model {} not in repository.'.format(model_id))
 78 |         raise KrakenRepoException('Modle {} not in repository'.format(model_id))
 79 |     with closing(requests.get(url, headers={'Accept': 'application/vnd.github.v3.raw'},
 80 |                  stream=True)) as r:
 81 |         with open(spath, 'wb') as f:
 82 |             logger.debug(u'Downloading model')
 83 |             for chunk in r.iter_content(chunk_size=1024):
 84 |                 callback()
 85 |                 f.write(chunk)
 86 |     return
 87 | 
 88 | 
 89 | def get_description(model_id: str) -> dict:
 90 |     logger.info('Retrieving metadata for {}'.format(model_id))
 91 |     logger.debug('Retrieving head of model repository')
 92 |     r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master'))
 93 |     resp = r.json()
 94 |     if 'object' not in resp:
 95 |         logger.error('No \'object\' field in repo head API response.')
 96 |         raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
 97 |     head = resp['object']['sha']
 98 |     logger.debug('Retrieving tree of model repository')
 99 |     r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1})
100 |     resp = r.json()
101 |     if 'tree' not in resp:
102 |         logger.error('No \'tree\' field in repo API response.')
103 |         raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
104 |     for el in resp['tree']:
105 |         components = el['path'].split('/')
106 |         if len(components) > 2 and components[1] == model_id and components[2] == 'DESCRIPTION':
107 |             logger.debug('Retrieving description for {}'.format(components[1]))
108 |             raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8')
109 |             return defaultdict(str, json.loads(raw))
110 |     raise KrakenRepoException('No description for {} found'.format(model_id))
111 | 
112 | 
113 | def get_listing(callback: Callable[..., Any]) -> dict:
114 |     logger.info(u'Retrieving model list')
115 |     r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master'))
116 |     callback()
117 |     resp = r.json()
118 |     if 'object' not in resp:
119 |         logger.error(u'No \'object\' field in repo head API response.')
120 |         raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
121 |     head = resp['object']['sha']
122 |     logger.debug(u'Retrieving tree of model repository')
123 |     r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1})
124 |     callback()
125 |     resp = r.json()
126 |     if 'tree' not in resp:
127 |         logger.error(u'No \'tree\' field in repo API response.')
128 |         raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
129 |     models = {}
130 |     for el in resp['tree']:
131 |         components = el['path'].split('/')
132 |         # new model
133 |         if len(components) == 2:
134 |             models[components[1]] = {'type': components[0]}
135 |         if len(components) > 2 and components[2] == 'DESCRIPTION':
136 |             logger.debug(u'Retrieving description for {}'.format(components[1]))
137 |             r = requests.get(el['url'])
138 |             if not r.ok:
139 |                 logger.error(u'Requests to \'{}\' failed with status {}'.format(el['url'], r.status_code))
140 |                 raise KrakenRepoException('{}: {}'.format(r.status_code, r.json()['message']))
141 |             raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8')
142 |             callback()
143 |             try:
144 |                 models[components[1]].update(json.loads(raw))
145 |             except Exception:
146 |                 del models[components[1]]
147 |         elif len(components) > 2 and components[1] in models:
148 |             models[components[1]]['model'] = el['url']
149 |     return models
150 | 


--------------------------------------------------------------------------------
/kraken/lib/ctc_decoder.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright 2017 Benjamin Kiessling
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | # http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 13 | # or implied. See the License for the specific language governing
 14 | # permissions and limitations under the License.
 15 | 
 16 | # -*- coding: utf-8 -*-
 17 | """
 18 | Decoders for softmax outputs of CTC trained networks.
 19 | """
 20 | 
 21 | import collections
 22 | import numpy as np
 23 | 
 24 | from typing import List, Tuple
 25 | from scipy.special import logsumexp
 26 | from scipy.ndimage import measurements
 27 | 
 28 | from itertools import groupby
 29 | 
 30 | __all__ = ['beam_decoder', 'greedy_decoder', 'blank_threshold_decoder']
 31 | 
 32 | 
 33 | def beam_decoder(outputs: np.ndarray, beam_size: int = 3) -> List[Tuple[int, int, int, float]]:
 34 |     """
 35 |     Translates back the network output to a label sequence using
 36 |     same-prefix-merge beam search decoding as described in [0].
 37 | 
 38 |     [0] Hannun, Awni Y., et al. "First-pass large vocabulary continuous speech
 39 |     recognition using bi-directional recurrent DNNs." arXiv preprint
 40 |     arXiv:1408.2873 (2014).
 41 | 
 42 |     Args:
 43 |         output (numpy.array): (C, W) shaped softmax output tensor
 44 | 
 45 |     Returns:
 46 |         A list with tuples (class, start, end, prob). max is the maximum value
 47 |         of the softmax layer in the region.
 48 |     """
 49 |     c, w = outputs.shape
 50 |     probs = np.log(outputs)
 51 |     beam = [(tuple(), (0.0, float('-inf')))]  # type: List[Tuple[Tuple, Tuple[float, float]]]
 52 | 
 53 |     # loop over each time step
 54 |     for t in range(w):
 55 |         next_beam = collections.defaultdict(lambda: 2*(float('-inf'),))  # type: dict
 56 |         # p_b -> prob for prefix ending in blank
 57 |         # p_nb -> prob for prefix not ending in blank
 58 |         for prefix, (p_b, p_nb) in beam:
 59 |             # only update ending-in-blank-prefix probability for blank
 60 |             n_p_b, n_p_nb = next_beam[prefix]
 61 |             n_p_b = logsumexp((n_p_b, p_b + probs[0, t], p_nb + probs[0, t]))
 62 |             next_beam[prefix] = (n_p_b, n_p_nb)
 63 |             # loop over non-blank classes
 64 |             for s in range(1, c):
 65 |                 # only update the not-ending-in-blank-prefix probability for prefix+s
 66 |                 l_end = prefix[-1][0] if prefix else None
 67 |                 n_prefix = prefix + ((s, t, t),)
 68 |                 n_p_b, n_p_nb = next_beam[n_prefix]
 69 |                 if s == l_end:
 70 |                     # substitute the previous non-blank-ending-prefix
 71 |                     # probability for repeated labels
 72 |                     n_p_nb = logsumexp((n_p_nb, p_b + probs[s, t]))
 73 |                 else:
 74 |                     n_p_nb = logsumexp((n_p_nb, p_b + probs[s, t], p_nb + probs[s, t]))
 75 | 
 76 |                 next_beam[n_prefix] = (n_p_b, n_p_nb)
 77 | 
 78 |                 # If s is repeated at the end we also update the unchanged
 79 |                 # prefix. This is the merging case.
 80 |                 if s == l_end:
 81 |                     n_p_b, n_p_nb = next_beam[prefix]
 82 |                     n_p_nb = logsumexp((n_p_nb, p_nb + probs[s, t]))
 83 |                     # rewrite both new and old prefix positions
 84 |                     next_beam[prefix[:-1] + ((prefix[-1][0], prefix[-1][1], t),)] = (n_p_b, n_p_nb)
 85 |                     next_beam[n_prefix[:-1] + ((n_prefix[-1][0], n_prefix[-1][1], t),)] = next_beam.pop(n_prefix)
 86 | 
 87 |         # Sort and trim the beam before moving on to the
 88 |         # next time-step.
 89 |         beam = sorted(next_beam.items(),
 90 |                       key=lambda x: logsumexp(x[1]),
 91 |                       reverse=True)
 92 |         beam = beam[:beam_size]
 93 |     return [(c, start, end, max(outputs[c, start:end+1])) for (c, start, end) in beam[0][0]]
 94 | 
 95 | 
 96 | def greedy_decoder(outputs: np.ndarray) -> List[Tuple[int, int, int, float]]:
 97 |     """
 98 |     Translates back the network output to a label sequence using greedy/best
 99 |     path decoding as described in [0].
100 | 
101 |     [0] Graves, Alex, et al. "Connectionist temporal classification: labelling
102 |     unsegmented sequence data with recurrent neural networks." Proceedings of
103 |     the 23rd international conference on Machine learning. ACM, 2006.
104 | 
105 |     Args:
106 |         output (numpy.array): (C, W) shaped softmax output tensor
107 | 
108 |     Returns:
109 |         A list with tuples (class, start, end, max). max is the maximum value
110 |         of the softmax layer in the region.
111 |     """
112 |     labels = np.argmax(outputs, 0)
113 |     seq_len = outputs.shape[1]
114 |     mask = np.eye(outputs.shape[0], dtype='bool')[labels].T
115 |     classes = []
116 |     for label, group in groupby(zip(np.arange(seq_len), labels, outputs[mask]), key=lambda x: x[1]):
117 |         lgroup = list(group)
118 |         if label != 0:
119 |             classes.append((label, lgroup[0][0], lgroup[-1][0], max(x[2] for x in lgroup)))
120 |     return classes
121 | 
122 | 
123 | def blank_threshold_decoder(outputs: np.ndarray, threshold: float = 0.5) -> List[Tuple[int, int, int, float]]:
124 |     """
125 |     Translates back the network output to a label sequence as the original
126 |     ocropy/clstm.
127 | 
128 |     Thresholds on class 0, then assigns the maximum (non-zero) class to each
129 |     region.
130 | 
131 |     Args:
132 |         output (numpy.array): (C, W) shaped softmax output tensor
133 |         threshold (float): Threshold for 0 class when determining possible
134 |                            label locations.
135 | 
136 |     Returns:
137 |         A list with tuples (class, start, end, max). max is the maximum value
138 |         of the softmax layer in the region.
139 |     """
140 |     outputs = outputs.T
141 |     labels, n = measurements.label(outputs[:, 0] < threshold)
142 |     mask = np.tile(labels.reshape(-1, 1), (1, outputs.shape[1]))
143 |     maxima = measurements.maximum_position(outputs, mask, np.arange(1, np.amax(mask)+1))
144 |     p = 0
145 |     start = None
146 |     x = []
147 |     for idx, val in enumerate(labels):
148 |         if val != 0 and start is None:
149 |             start = idx
150 |             p += 1
151 |         if val == 0 and start is not None:
152 |             if maxima[p-1][1] == 0:
153 |                 start = None
154 |             else:
155 |                 x.append((maxima[p-1][1], start, idx, outputs[maxima[p-1]]))
156 |                 start = None
157 |     # append last non-zero region to list of no zero region occurs after it
158 |     if start:
159 |         x.append((maxima[p-1][1], start, len(outputs), outputs[maxima[p-1]]))
160 |     return [y for y in x if x[0] != 0]
161 | 


--------------------------------------------------------------------------------
/tests/test_layers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import unittest
  3 | 
  4 | from nose.tools import raises
  5 | 
  6 | import torch
  7 | from kraken.lib import layers
  8 | 
  9 | 
 10 | class TestLayers(unittest.TestCase):
 11 | 
 12 |     """
 13 |     Testing custom layer implementations.
 14 |     """
 15 |     def setUp(self):
 16 |         torch.set_grad_enabled(False)
 17 | 
 18 |     def test_maxpool(self):
 19 |         """
 20 |         Test maximum pooling layer.
 21 |         """
 22 |         mp = layers.MaxPool((3, 3), (2, 2))
 23 |         o = mp(torch.randn(1, 2, 32, 64))
 24 |         self.assertEqual(o.shape, (1, 2, 15, 31))
 25 | 
 26 |     def test_1d_dropout(self):
 27 |         """
 28 |         Test 1d dropout layer.
 29 |         """
 30 |         do = layers.Dropout(0.2, 1)
 31 |         o = do(torch.randn(1, 2, 32, 64))
 32 |         self.assertEqual(o.shape, (1, 2, 32, 64))
 33 | 
 34 |     def test_2d_dropout(self):
 35 |         """
 36 |         Test 2d dropout layer.
 37 |         """
 38 |         do = layers.Dropout(0.2, 2)
 39 |         o = do(torch.randn(1, 2, 32, 64))
 40 |         self.assertEqual(o.shape, (1, 2, 32, 64))
 41 | 
 42 |     def test_forward_rnn_layer_x(self):
 43 |         """
 44 |         Test unidirectional RNN layer in x-dimension.
 45 |         """
 46 |         rnn = layers.TransposedSummarizingRNN(10, 2, 'f', False, False)
 47 |         o = rnn(torch.randn(1, 10, 32, 64))
 48 |         self.assertEqual(o.shape, (1, 2, 32, 64))
 49 | 
 50 |     def test_forward_rnn_layer_y(self):
 51 |         """
 52 |         Test unidirectional RNN layer in y-dimension.
 53 |         """
 54 |         rnn = layers.TransposedSummarizingRNN(10, 2, 'f', True, False)
 55 |         o = rnn(torch.randn(1, 10, 32, 64))
 56 |         self.assertEqual(o.shape, (1, 2, 32, 64))
 57 | 
 58 |     def test_forward_rnn_layer_x_summarize(self):
 59 |         """
 60 |         Test unidirectional summarizing RNN layer in x-dimension.
 61 |         """
 62 |         rnn = layers.TransposedSummarizingRNN(10, 2, 'f', False, True)
 63 |         o = rnn(torch.randn(1, 10, 32, 64))
 64 |         self.assertEqual(o.shape, (1, 2, 32, 1))
 65 | 
 66 |     def test_forward_rnn_layer_y_summarize(self):
 67 |         """
 68 |         Test unidirectional summarizing RNN layer in y-dimension.
 69 |         """
 70 |         rnn = layers.TransposedSummarizingRNN(10, 2, 'f', True, True)
 71 |         o = rnn(torch.randn(1, 10, 32, 64))
 72 |         self.assertEqual(o.shape, (1, 2, 1, 64))
 73 | 
 74 |     def test_bidi_rnn_layer_x(self):
 75 |         """
 76 |         Test bidirectional RNN layer in x-dimension.
 77 |         """
 78 |         rnn = layers.TransposedSummarizingRNN(10, 2, 'b', False, False)
 79 |         o = rnn(torch.randn(1, 10, 32, 64))
 80 |         self.assertEqual(o.shape, (1, 4, 32, 64))
 81 | 
 82 |     def test_bidi_rnn_layer_y(self):
 83 |         """
 84 |         Test bidirectional RNN layer in y-dimension.
 85 |         """
 86 |         rnn = layers.TransposedSummarizingRNN(10, 2, 'b', True, False)
 87 |         o = rnn(torch.randn(1, 10, 32, 64))
 88 |         self.assertEqual(o.shape, (1, 4, 32, 64))
 89 | 
 90 |     def test_bidi_rnn_layer_x_summarize(self):
 91 |         """
 92 |         Test bidirectional summarizing RNN layer in x-dimension.
 93 |         """
 94 |         rnn = layers.TransposedSummarizingRNN(10, 2, 'b', False, True)
 95 |         o = rnn(torch.randn(1, 10, 32, 64))
 96 |         self.assertEqual(o.shape, (1, 4, 32, 1))
 97 | 
 98 |     def test_bidi_rnn_layer_y_summarize(self):
 99 |         """
100 |         Test bidirectional summarizing RNN layer in y-dimension.
101 |         """
102 |         rnn = layers.TransposedSummarizingRNN(10, 2, 'b', True, True)
103 |         o = rnn(torch.randn(1, 10, 32, 64))
104 |         self.assertEqual(o.shape, (1, 4, 1, 64))
105 | 
106 |     def test_linsoftmax(self):
107 |         """
108 |         Test basic function of linear layer.
109 |         """
110 |         lin = layers.LinSoftmax(20, 10)
111 |         o = lin(torch.randn(1, 20, 12, 24))
112 |         self.assertEqual(o.shape, (1, 10, 12, 24))
113 | 
114 |     def test_linsoftmax_train(self):
115 |         """
116 |         Test function of linear layer in training mode (log_softmax)
117 |         """
118 |         lin = layers.LinSoftmax(20, 10).train()
119 |         o = lin(torch.randn(1, 20, 12, 24))
120 |         self.assertLess(o.max(), 0)
121 | 
122 |     def test_linsoftmax_test(self):
123 |         """
124 |         Test function of linear layer in eval mode (softmax)
125 |         """
126 |         lin = layers.LinSoftmax(20, 10).eval()
127 |         o = lin(torch.randn(1, 20, 12, 24))
128 |         self.assertGreaterEqual(o.min(), 0)
129 | 
130 |     def test_linsoftmax_aug(self):
131 |         """
132 |         Test basic function of linear layer with 1-augmentation.
133 |         """
134 |         lin = layers.LinSoftmax(20, 10, True)
135 |         o = lin(torch.randn(1, 20, 12, 24))
136 |         self.assertEqual(o.shape, (1, 10, 12, 24))
137 | 
138 |     def test_linsoftmax_aug_train(self):
139 |         """
140 |         Test function of linear layer in training mode (log_softmax) with 1-augmentation
141 |         """
142 |         lin = layers.LinSoftmax(20, 10, True).train()
143 |         o = lin(torch.randn(1, 20, 12, 24))
144 |         self.assertLess(o.max(), 0)
145 | 
146 |     def test_linsoftmax_aug_test(self):
147 |         """
148 |         Test function of linear layer in eval mode (softmax) with 1-augmentation
149 |         """
150 |         lin = layers.LinSoftmax(20, 10, True).eval()
151 |         o = lin(torch.randn(1, 20, 12, 24))
152 |         self.assertGreaterEqual(o.min(), 0)
153 | 
154 |     def test_actconv2d_lin(self):
155 |         """
156 |         Test convolutional layer without activation.
157 |         """
158 |         conv = layers.ActConv2D(5, 12, (3, 3), 'l')
159 |         o = conv(torch.randn(1, 5, 24, 12))
160 |         self.assertEqual(o.shape, (1, 12, 24, 12))
161 | 
162 |     def test_actconv2d_sigmoid(self):
163 |         """
164 |         Test convolutional layer with sigmoid activation.
165 |         """
166 |         conv = layers.ActConv2D(5, 12, (3, 3), 's')
167 |         o = conv(torch.randn(1, 5, 24, 12))
168 |         self.assertTrue(0 <= o.min() <= 1)
169 |         self.assertTrue(0 <= o.max() <= 1)
170 | 
171 |     def test_actconv2d_tanh(self):
172 |         """
173 |         Test convolutional layer with tanh activation.
174 |         """
175 |         conv = layers.ActConv2D(5, 12, (3, 3), 't')
176 |         o = conv(torch.randn(1, 5, 24, 12))
177 |         self.assertTrue(-1 <= o.min() <= 1)
178 |         self.assertTrue(-1 <= o.max() <= 1)
179 | 
180 |     def test_actconv2d_softmax(self):
181 |         """
182 |         Test convolutional layer with softmax activation.
183 |         """
184 |         conv = layers.ActConv2D(5, 12, (3, 3), 'm')
185 |         o = conv(torch.randn(1, 5, 24, 12))
186 |         self.assertTrue(0 <= o.min() <= 1)
187 |         self.assertTrue(0 <= o.max() <= 1)
188 | 
189 |     def test_actconv2d_relu(self):
190 |         """
191 |         Test convolutional layer with relu activation.
192 |         """
193 |         conv = layers.ActConv2D(5, 12, (3, 3), 'r')
194 |         o = conv(torch.randn(1, 5, 24, 12))
195 |         self.assertLessEqual(0, o.min())
196 |         self.assertLessEqual(0, o.max())
197 | 


--------------------------------------------------------------------------------
/kraken/lib/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Copyright 2015 Benjamin Kiessling
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 14 | # or implied. See the License for the specific language governing
 15 | # permissions and limitations under the License.
 16 | """
 17 | Training loop interception helpers
 18 | """
 19 | import abc
 20 | import torch
 21 | import numpy as np
 22 | 
 23 | from itertools import cycle
 24 | from torch.utils import data
 25 | from functools import partial
 26 | from typing import Tuple, Union, Optional, Callable, List, Dict, Any
 27 | from collections.abc import Iterable
 28 | 
 29 | class TrainStopper(Iterable):
 30 | 
 31 |     def __init__(self):
 32 |         self.best_loss = 0.0
 33 |         self.best_epoch = 0
 34 | 
 35 |     @abc.abstractmethod
 36 |     def update(self, val_loss: float) -> None:
 37 |         """
 38 |         Updates the internal state of the train stopper.
 39 |         """
 40 |         pass
 41 | 
 42 | 
 43 | def annealing_const(start: float, end: float, pct: float) -> float:
 44 |     return start
 45 | 
 46 | def annealing_linear(start: float, end: float, pct: float) -> float:
 47 |     return start + pct * (end-start)
 48 | 
 49 | def annealing_cos(start: float, end: float, pct: float) -> float:
 50 |     co = np.cos(np.pi * pct) + 1
 51 |     return end + (start-end)/2 * co
 52 | 
 53 | 
 54 | class TrainScheduler(object):
 55 |     """
 56 |     Implements learning rate scheduling.
 57 |     """
 58 |     def __init__(self, optimizer: torch.optim.Optimizer) -> None:
 59 |         self.steps: List[Dict[str, Any]] = []
 60 |         self.optimizer = optimizer
 61 |         self.cycle: Any = None
 62 | 
 63 |     def add_phase(self,
 64 |                   iterations: int,
 65 |                   lrate: Tuple[float, float] = (1e-4, 1e-4),
 66 |                   momentum: Tuple[float, float] = (0.9, 0.9),
 67 |                   wd: float = 0.0,
 68 |                   annealing_fn: Callable[[float, float, float], float] = annealing_const) -> None:
 69 |         """
 70 |         Adds a new phase to the scheduler.
 71 | 
 72 |         Args:
 73 |             sched (kraken.lib.train.Trainscheduler): TrainScheduler instance
 74 |             iterations (int): Number of iterations per cycle
 75 |             max_lr (float): Peak learning rate
 76 |             div (float): divisor to determine minimum learning rate (min_lr = max_lr / div)
 77 |             max_mon (float): Maximum momentum
 78 |             min_mon (float): Minimum momentum
 79 |             wd (float): Weight decay
 80 |             annealing_fn (Callable[[int, int, int], float]): LR change
 81 |             function. Can be one of `annealing_const` (keeping start value),
 82 |             `annealing_linear` (linear change), and `annealing_cos` (cosine
 83 |                     change).
 84 |         """
 85 |         self.steps.extend([{'lr': annealing_fn(*lrate, pct=x/iterations),
 86 |                             'momentum': annealing_fn(*momentum, pct=x/iterations),
 87 |                             'weight_decay': wd} for x in range(iterations)])
 88 | 
 89 |     def step(self) -> None:
 90 |         """
 91 |         Performs an optimization step.
 92 |         """
 93 |         if not self.cycle:
 94 |             self.cycle = cycle(self.steps)
 95 |         kwargs = next(self.cycle)
 96 |         for param_group in self.optimizer.param_groups:
 97 |             param_group.update(kwargs)
 98 | 
 99 | 
100 | def add_1cycle(sched: TrainScheduler, iterations: int,
101 |                max_lr: float = 1e-4, div: float = 25.0,
102 |                max_mom: float = 0.95, min_mom: float = 0.85, wd: float = 0.0):
103 |     """
104 |     Adds 1cycle policy [0] phases to a learning rate scheduler.
105 | 
106 |     [0] Smith, Leslie N. "A disciplined approach to neural network hyper-parameters: Part 1--learning rate, batch size, momentum, and weight decay." arXiv preprint arXiv:1803.09820 (2018).
107 | 
108 |     Args:
109 |         sched (kraken.lib.train.Trainscheduler): TrainScheduler instance
110 |         iterations (int): Number of iterations per cycle
111 |         max_lr (float): Peak learning rate
112 |         div (float): divisor to determine minimum learning rate (min_lr = max_lr / div)
113 |         max_mon (float): Maximum momentum
114 |         min_mon (float): Minimum momentum
115 |         wd (float): Weight decay
116 |     """
117 |     sched.add_phase(iterations//2, (max_lr/div, max_lr), (max_mom, min_mom), wd, annealing_linear)
118 |     sched.add_phase(iterations//2, (max_lr, max_lr/div), (min_mom, max_mom), wd, annealing_cos)
119 | 
120 | 
121 | class EarlyStopping(TrainStopper):
122 |     """
123 |     Early stopping to terminate training when validation loss doesn't improve
124 |     over a certain time.
125 |     """
126 |     def __init__(self, it: data.DataLoader = None, min_delta: float = 0.002, lag: int = 5) -> None:
127 |         """
128 |         Args:
129 |             it (torch.utils.data.DataLoader): training data loader
130 |             min_delta (float): minimum change in validation loss to qualify as improvement.
131 |             lag (int): Number of epochs to wait for improvement before
132 |                        terminating.
133 |         """
134 |         super().__init__()
135 |         self.min_delta = min_delta
136 |         self.lag = lag
137 |         self.it = it
138 |         self.wait = 0
139 |         self.epoch = -1
140 | 
141 |     def __iter__(self):
142 |         return self
143 | 
144 |     def __next__(self):
145 |         if self.wait >= self.lag:
146 |             raise StopIteration
147 |         self.epoch += 1
148 |         return self.it
149 | 
150 |     def update(self, val_loss: float) -> None:
151 |         """
152 |         Updates the internal validation loss state
153 |         """
154 |         if (val_loss - self.best_loss) < self.min_delta:
155 |             self.wait += 1
156 |         else:
157 |             self.wait = 0
158 |             self.best_loss = val_loss
159 |             self.best_epoch = self.epoch
160 | 
161 | 
162 | class EpochStopping(TrainStopper):
163 |     """
164 |     Dumb stopping after a fixed number of epochs.
165 |     """
166 |     def __init__(self, it: data.DataLoader = None, epochs: int = 100) -> None:
167 |         """
168 |         Args:
169 |             it (torch.utils.data.DataLoader): training data loader
170 |             epochs (int): Number of epochs to train for
171 |         """
172 |         super().__init__()
173 |         self.epochs = epochs
174 |         self.epoch = -1
175 |         self.it = it
176 | 
177 |     def __iter__(self):
178 |         return self
179 | 
180 |     def __next__(self):
181 |         if self.epoch < self.epochs - 1:
182 |             self.epoch += 1
183 |             return self.it
184 |         else:
185 |             raise StopIteration
186 | 
187 |     def update(self, val_loss: float) -> None:
188 |         """
189 |         Only update internal best epoch
190 |         """
191 |         if val_loss > self.best_loss:
192 |             self.best_loss = val_loss
193 |             self.best_epoch = self.epoch
194 | 


--------------------------------------------------------------------------------
/docs/vgsl.rst:
--------------------------------------------------------------------------------
  1 | .. _vgsl:
  2 | 
  3 | VGSL network specification
  4 | ==========================
  5 | 
  6 | kraken implements a dialect of the Variable-size Graph Specification Language
  7 | (VGSL), enabling the specification of different network architectures for image
  8 | processing purposes using a short definition string.
  9 | 
 10 | Basics
 11 | ------
 12 | 
 13 | A VGSL specification consists of an input block, one or more layers, and an
 14 | output block. For example:
 15 | 
 16 | .. code-block:: console
 17 | 
 18 |         [1,48,0,1 Cr3,3,32 Mp2,2 Cr3,3,64 Mp2,2 S1(1x12)1,3 Lbx100 Do O1c103]
 19 | 
 20 | The first block defines the input in order of [batch, heigh, width, channels]
 21 | with zero-valued dimensions being variable. Integer valued height or width
 22 | input specifications will result in the input images being automatically scaled
 23 | in either dimension.
 24 | 
 25 | When channels are set to 1 grayscale or B/W inputs are expected, 3 expects RGB
 26 | color images. Higher values in combination with a height of 1 result in the
 27 | network being fed 1 pixel wide grayscale strips scaled to the size of the
 28 | channel dimension.
 29 | 
 30 | After the input, a number of layers are defined. Layers operate on the channel
 31 | dimension; this is intuitive for convolutional layers but a recurrent layer
 32 | doing sequence classification along the width axis on an image of a particular
 33 | height requires the height dimension to be moved to the channel dimension,
 34 | e.g.:
 35 | 
 36 | .. code-block:: console
 37 | 
 38 |         [1,48,0,1 S1(1x48)1,3 Lbx100 O1c103]
 39 | 
 40 | or using the alternative slightly faster formulation:
 41 | 
 42 | .. code-block:: console
 43 | 
 44 |         [1,1,0,48 Lbx100 O1c103]
 45 | 
 46 | Finally an output definition is appended. When training sequence classification
 47 | networks with the provided tools the appropriate output definition is
 48 | automatically appended to the network based on the alphabet of the training
 49 | data.
 50 | 
 51 | Examples
 52 | --------
 53 | 
 54 | .. code-block:: console
 55 | 
 56 |         [1,1,0,48 Lbx100 Do 01c59]
 57 | 
 58 |         Creating new model [1,1,0,48 Lbx100 Do] with 59 outputs 
 59 |         layer		type	params 
 60 |         0		rnn	direction b transposed False summarize False out 100 legacy None
 61 |         1		dropout	probability 0.5 dims 1 
 62 |         2		linear	augmented False out 59 
 63 | 
 64 | A simple recurrent recognition model with a single LSTM layer classifying lines
 65 | normalized to 48 pixels in height.
 66 | 
 67 | .. code-block:: console
 68 | 
 69 |         [1,48,0,1 Cr3,3,32 Do0.1,2 Mp2,2 Cr3,3,64 Do0.1,2 Mp2,2 S1(1x12)1,3 Lbx100 Do 01c59]
 70 | 
 71 |         Creating new model [1,48,0,1 Cr3,3,32 Do0.1,2 Mp2,2 Cr3,3,64 Do0.1,2 Mp2,2 S1(1x12)1,3 Lbx100 Do] with 59 outputs 
 72 |         layer		type	params 
 73 |         0		conv	kernel 3 x 3 filters 32 activation r 
 74 |         1		dropout	probability 0.1 dims 2 
 75 |         2		maxpool	kernel 2 x 2 stride 2 x 2 
 76 |         3		conv	kernel 3 x 3 filters 64 activation r 
 77 |         4		dropout	probability 0.1 dims 2 
 78 |         5		maxpool	kernel 2 x 2 stride 2 x 2 
 79 |         6		reshape from 1 1 x 12 to 1/3 
 80 |         7		rnn	direction b transposed False summarize False out 100 legacy None 
 81 |         8		dropout	probability 0.5 dims 1 
 82 |         9		linear	augmented False out 59 
 83 | 
 84 | A model with a small convolutional stack before a recurrent LSTM layer. The
 85 | extended dropout layer syntax is used to reduce drop probability on the depth
 86 | dimension as the default is too high for convolutional layers. The remainder of
 87 | the height dimension (`12`) is reshaped into the depth dimensions before
 88 | applying the final recurrent and linear layers.
 89 | 
 90 | .. code-block:: console
 91 | 
 92 |         [1,0,0,3 Cr3,3,16 Mp3,3 Lfys64 Lbx128 Lbx256 Do 01c59]
 93 | 
 94 |         Creating new model [1,0,0,3 Cr3,3,16 Mp3,3 Lfys64 Lbx128 Lbx256 Do] with 59 outputs
 95 |         layer		type	params
 96 |         0		conv	kernel 3 x 3 filters 16 activation r
 97 |         1		maxpool	kernel 3 x 3 stride 3 x 3
 98 |         2		rnn	direction f transposed True summarize True out 64 legacy None
 99 |         3		rnn	direction b transposed False summarize False out 128 legacy None
100 |         4		rnn	direction b transposed False summarize False out 256 legacy None
101 |         5		dropout	probability 0.5 dims 1
102 |         6		linear	augmented False out 59
103 | 
104 | A model with arbitrary sized color image input, an initial summarizing
105 | recurrent layer to squash the height to 64, followed by 2 bi-directional
106 | recurrent layers and a linear projection.
107 | 
108 | Convolutional Layers
109 | --------------------
110 | 
111 | .. code-block:: console
112 | 
113 |         C[{name}](s|t|r|l|m)[{name}]<y>,<x>,<d>
114 |         s = sigmoid
115 |         t = tanh
116 |         r = relu
117 |         l = linear
118 |         m = softmax
119 | 
120 | Adds a 2D convolution with kernel size `(y, x)` and `d` output channels, applying
121 | the selected nonlinearity.
122 | 
123 | Recurrent Layers
124 | ----------------
125 | 
126 | .. code-block:: console
127 | 
128 |         L[{name}](f|r|b)(x|y)[s][{name}]<n> LSTM cell with n outputs.
129 |         G[{name}](f|r|b)(x|y)[s][{name}]<n> GRU cell with n outputs.
130 |         f runs the RNN forward only.
131 |         r runs the RNN reversed only.
132 |         b runs the RNN bidirectionally.
133 |         s (optional) summarizes the output in the requested dimension, return the last step.
134 | 
135 | Adds either an LSTM or GRU recurrent layer to the network using eiter the `x`
136 | (width) or `y` (height) dimension as the time axis. Input features are the
137 | channel dimension and the non-time-axis dimension (height/width) is treated as
138 | another batch dimension. For example, a `Lfx25` layer on an `1, 16, 906, 32`
139 | input will execute 16 independent forward passes on `906x32` tensors resulting
140 | in an output of shape `1, 16, 906, 25`. If this isn't desired either run a
141 | summarizing layer in the other direction, e.g. `Lfys20` for an input `1, 1,
142 | 906, 20`, or prepend a reshape layer `S1(1x16)1,3` combining the height and
143 | channel dimension for an `1, 1, 906, 512` input to the recurrent layer.
144 | 
145 | Helper and Plumbing Layers
146 | --------------------------
147 | 
148 | Max Pool
149 | ^^^^^^^^
150 | .. code-block:: console
151 | 
152 |         Mp[{name}]<y>,<x>[,<y_stride>,<x_stride>]
153 | 
154 | Adds a maximum pooling with `(y, x)` kernel_size and `(y_stride, x_stride)` stride.
155 | 
156 | Reshape
157 | ^^^^^^^
158 | 
159 | .. code-block:: console
160 | 
161 |         S[{name}]<d>(<a>x<b>)<e>,<f> Splits one dimension, moves one part to another
162 |                 dimension.
163 | 
164 | The `S` layer reshapes a source dimension `d` to `a,b` and distributes `a` into
165 | dimension `e`, respectively `b` into `f`.  Either `e` or `f` has to be equal to
166 | `d`. So `S1(1, 48)1, 3` on an `1, 48, 1020, 8` input will first reshape into
167 | `1, 1, 48, 1020, 8`, leave the `1` part in the height dimension and distribute
168 | the `48` sized tensor into the channel dimension resulting in a `1, 1, 1024,
169 | 48*8=384` sized output. `S` layers are mostly used to remove undesirable non-1
170 | height before a recurrent layer.
171 | 
172 | .. note::
173 | 
174 |         This `S` layer is equivalent to the one implemented in the tensorflow
175 |         implementation of VGSL, i.e. behaves differently from tesseract.
176 | 
177 | Regularization Layers
178 | ---------------------
179 | 
180 | .. code-block:: console
181 | 
182 |         Do[{name}][<prob>],[<dim>] Insert a 1D or 2D dropout layer
183 | 
184 | Adds an 1D or 2D dropout layer with a given probability. Defaults to `0.5` drop
185 | probability and 1D dropout. Set to `dim` to `2` after convolutional layers.
186 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  applehelp  to make an Apple Help Book"
 34 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 35 | 	@echo "  epub       to make an epub"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | html:
 55 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 58 | 
 59 | dirhtml:
 60 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 63 | 
 64 | singlehtml:
 65 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 68 | 
 69 | pickle:
 70 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 71 | 	@echo
 72 | 	@echo "Build finished; now you can process the pickle files."
 73 | 
 74 | json:
 75 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the JSON files."
 78 | 
 79 | htmlhelp:
 80 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 81 | 	@echo
 82 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 83 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 84 | 
 85 | qthelp:
 86 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 89 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 90 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/kraken.qhcp"
 91 | 	@echo "To view the help file:"
 92 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/kraken.qhc"
 93 | 
 94 | applehelp:
 95 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
 96 | 	@echo
 97 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
 98 | 	@echo "N.B. You won't be able to view it unless you put it in" \
 99 | 	      "~/Library/Documentation/Help or install it in your application" \
100 | 	      "bundle."
101 | 
102 | devhelp:
103 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
104 | 	@echo
105 | 	@echo "Build finished."
106 | 	@echo "To view the help file:"
107 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/kraken"
108 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/kraken"
109 | 	@echo "# devhelp"
110 | 
111 | epub:
112 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
113 | 	@echo
114 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
115 | 
116 | latex:
117 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
118 | 	@echo
119 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
120 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
121 | 	      "(use \`make latexpdf' here to do that automatically)."
122 | 
123 | latexpdf:
124 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
125 | 	@echo "Running LaTeX files through pdflatex..."
126 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
127 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
128 | 
129 | latexpdfja:
130 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
131 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
132 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
133 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
134 | 
135 | text:
136 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
137 | 	@echo
138 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
139 | 
140 | man:
141 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
142 | 	@echo
143 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
144 | 
145 | texinfo:
146 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
147 | 	@echo
148 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
149 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
150 | 	      "(use \`make info' here to do that automatically)."
151 | 
152 | info:
153 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
154 | 	@echo "Running Texinfo files through makeinfo..."
155 | 	make -C $(BUILDDIR)/texinfo info
156 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
157 | 
158 | gettext:
159 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
160 | 	@echo
161 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
162 | 
163 | changes:
164 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
165 | 	@echo
166 | 	@echo "The overview file is in $(BUILDDIR)/changes."
167 | 
168 | linkcheck:
169 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
170 | 	@echo
171 | 	@echo "Link check complete; look for any errors in the above output " \
172 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
173 | 
174 | doctest:
175 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
176 | 	@echo "Testing of doctests in the sources finished, look at the " \
177 | 	      "results in $(BUILDDIR)/doctest/output.txt."
178 | 
179 | coverage:
180 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
181 | 	@echo "Testing of coverage in the sources finished, look at the " \
182 | 	      "results in $(BUILDDIR)/coverage/python.txt."
183 | 
184 | xml:
185 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
186 | 	@echo
187 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
188 | 
189 | pseudoxml:
190 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
191 | 	@echo
192 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
193 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	echo.  coverage   to run coverage check of the documentation if enabled
 41 | 	goto end
 42 | )
 43 | 
 44 | if "%1" == "clean" (
 45 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 46 | 	del /q /s %BUILDDIR%\*
 47 | 	goto end
 48 | )
 49 | 
 50 | 
 51 | REM Check if sphinx-build is available and fallback to Python version if any
 52 | %SPHINXBUILD% 2> nul
 53 | if errorlevel 9009 goto sphinx_python
 54 | goto sphinx_ok
 55 | 
 56 | :sphinx_python
 57 | 
 58 | set SPHINXBUILD=python -m sphinx.__init__
 59 | %SPHINXBUILD% 2> nul
 60 | if errorlevel 9009 (
 61 | 	echo.
 62 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 63 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 64 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 65 | 	echo.may add the Sphinx directory to PATH.
 66 | 	echo.
 67 | 	echo.If you don't have Sphinx installed, grab it from
 68 | 	echo.http://sphinx-doc.org/
 69 | 	exit /b 1
 70 | )
 71 | 
 72 | :sphinx_ok
 73 | 
 74 | 
 75 | if "%1" == "html" (
 76 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 77 | 	if errorlevel 1 exit /b 1
 78 | 	echo.
 79 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "dirhtml" (
 84 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 85 | 	if errorlevel 1 exit /b 1
 86 | 	echo.
 87 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 88 | 	goto end
 89 | )
 90 | 
 91 | if "%1" == "singlehtml" (
 92 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 93 | 	if errorlevel 1 exit /b 1
 94 | 	echo.
 95 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 96 | 	goto end
 97 | )
 98 | 
 99 | if "%1" == "pickle" (
100 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | 	if errorlevel 1 exit /b 1
102 | 	echo.
103 | 	echo.Build finished; now you can process the pickle files.
104 | 	goto end
105 | )
106 | 
107 | if "%1" == "json" (
108 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | 	if errorlevel 1 exit /b 1
110 | 	echo.
111 | 	echo.Build finished; now you can process the JSON files.
112 | 	goto end
113 | )
114 | 
115 | if "%1" == "htmlhelp" (
116 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | 	if errorlevel 1 exit /b 1
118 | 	echo.
119 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "qthelp" (
125 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\kraken.qhcp
131 | 	echo.To view the help file:
132 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\kraken.ghc
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "devhelp" (
137 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "epub" (
145 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "latex" (
153 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | 	goto end
158 | )
159 | 
160 | if "%1" == "latexpdf" (
161 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | 	cd %BUILDDIR%/latex
163 | 	make all-pdf
164 | 	cd %~dp0
165 | 	echo.
166 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdfja" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf-ja
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "text" (
181 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | 	if errorlevel 1 exit /b 1
183 | 	echo.
184 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
185 | 	goto end
186 | )
187 | 
188 | if "%1" == "man" (
189 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | 	if errorlevel 1 exit /b 1
191 | 	echo.
192 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | 	goto end
194 | )
195 | 
196 | if "%1" == "texinfo" (
197 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | 	if errorlevel 1 exit /b 1
199 | 	echo.
200 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | 	goto end
202 | )
203 | 
204 | if "%1" == "gettext" (
205 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | 	if errorlevel 1 exit /b 1
207 | 	echo.
208 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | 	goto end
210 | )
211 | 
212 | if "%1" == "changes" (
213 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | 	if errorlevel 1 exit /b 1
215 | 	echo.
216 | 	echo.The overview file is in %BUILDDIR%/changes.
217 | 	goto end
218 | )
219 | 
220 | if "%1" == "linkcheck" (
221 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | 	if errorlevel 1 exit /b 1
223 | 	echo.
224 | 	echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | 	goto end
227 | )
228 | 
229 | if "%1" == "doctest" (
230 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | 	if errorlevel 1 exit /b 1
232 | 	echo.
233 | 	echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | 	goto end
236 | )
237 | 
238 | if "%1" == "coverage" (
239 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | 	if errorlevel 1 exit /b 1
241 | 	echo.
242 | 	echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | 	goto end
245 | )
246 | 
247 | if "%1" == "xml" (
248 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | 	if errorlevel 1 exit /b 1
250 | 	echo.
251 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | 	goto end
253 | )
254 | 
255 | if "%1" == "pseudoxml" (
256 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | 	if errorlevel 1 exit /b 1
258 | 	echo.
259 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | 	goto end
261 | )
262 | 
263 | :end
264 | 


--------------------------------------------------------------------------------
/kraken/lib/codec.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Copyright 2017 Benjamin Kiessling
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 14 | # or implied. See the License for the specific language governing
 15 | # permissions and limitations under the License.
 16 | 
 17 | """
 18 | pytorch compatible codec with many-to-many mapping between labels and
 19 | graphemes.
 20 | """
 21 | import regex
 22 | import numpy as np
 23 | 
 24 | from typing import List, Tuple, Set, Union, Dict, Sequence
 25 | from torch import IntTensor
 26 | from kraken.lib.exceptions import KrakenEncodeException
 27 | 
 28 | __all__ = ['PytorchCodec']
 29 | 
 30 | 
 31 | class PytorchCodec(object):
 32 |     """
 33 |     Translates between labels and graphemes.
 34 |     """
 35 |     def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str]) -> None:
 36 |         """
 37 |         Builds a codec converting between graphemes/code points and integer
 38 |         label sequences.
 39 | 
 40 |         charset may either be a string, a list or a dict. In the first case
 41 |         each code point will be assigned a label, in the second case each
 42 |         string in the list will be assigned a label, and in the final case each
 43 |         key string will be mapped to the value sequence of integers. In the
 44 |         first two cases labels will be assigned automatically.
 45 | 
 46 |         As 0 is the blank label in a CTC output layer, output labels and input
 47 |         dictionaries are/should be 1-indexed.
 48 | 
 49 |         Args:
 50 |             charset (unicode, list, dict): Input character set.
 51 |         """
 52 |         if isinstance(charset, dict):
 53 |             self.c2l = charset
 54 |         else:
 55 |             self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)}
 56 |         # map integer labels to code points because regex only works with strings
 57 |         self.l2c = {}  # type: Dict[str, str]
 58 |         for k, v in self.c2l.items():
 59 |             self.l2c[''.join(chr(c) for c in v)] = k
 60 | 
 61 |         # sort prefixes for c2l regex
 62 |         self.c2l_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.c2l.keys(), key=len, reverse=True)))
 63 |         # sort prefixes for l2c regex
 64 |         self.l2c_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.l2c.keys(), key=len, reverse=True)))
 65 | 
 66 |     def __len__(self) -> int:
 67 |         """
 68 |         Total number of input labels the codec can decode.
 69 |         """
 70 |         return len(self.l2c.keys())
 71 | 
 72 |     def max_label(self) -> int:
 73 |         """
 74 |         Returns the maximum label value.
 75 |         """
 76 |         return max(l for labels in self.c2l.values() for l in labels)
 77 | 
 78 |     def encode(self, s: str) -> IntTensor:
 79 |         """
 80 |         Encodes a string into a sequence of labels.
 81 | 
 82 |         Args:
 83 |             s (str): Input unicode string
 84 | 
 85 |         Returns:
 86 |             (torch.IntTensor) encoded label sequence
 87 | 
 88 |         Raises:
 89 |             KrakenEncodeException if encoding fails.
 90 |         """
 91 |         splits = self._greedy_split(s, self.c2l_regex)
 92 |         labels = []  # type: List[int]
 93 |         for c in splits:
 94 |             labels.extend(self.c2l[c])
 95 |         return IntTensor(labels)
 96 | 
 97 |     def decode(self, labels: Sequence[Tuple[int, int, int, float]]) -> List[Tuple[str, int, int, float]]:
 98 |         """
 99 |         Decodes a labelling.
100 | 
101 |         Given a labelling with cuts and  confidences returns a string with the
102 |         cuts and confidences aggregated across label-code point
103 |         correspondences. When decoding multilabels to code points the resulting
104 |         cuts are min/max, confidences are averaged.
105 | 
106 |         Args:
107 |             labels (list): Input containing tuples (label, start, end,
108 |                            confidence).
109 | 
110 |         Returns:
111 |             list: A list of tuples (code point, start, end, confidence)
112 |         """
113 |         # map into unicode space
114 |         uni_labels = ''.join(chr(v) for v, _, _, _ in labels)
115 |         start = [x for _, x, _, _ in labels]
116 |         end = [x for _, _, x, _ in labels]
117 |         con = [x for _, _, _, x in labels]
118 |         splits = self._greedy_split(uni_labels, self.l2c_regex)
119 |         decoded = []
120 |         idx = 0
121 |         for i in splits:
122 |             decoded.extend([(c, s, e, u) for c, s, e, u in zip(self.l2c[i],
123 |                                                                len(self.l2c[i]) * [start[idx]],
124 |                                                                len(self.l2c[i]) * [end[idx + len(i) - 1]],
125 |                                                                len(self.l2c[i]) * [np.mean(con[idx:idx + len(i)])])])
126 |             idx += len(i)
127 |         return decoded
128 | 
129 |     def _greedy_split(self, input: str, re: regex.Regex) -> List[str]:
130 |         """
131 |         Splits an input string greedily from a list of prefixes. Stops when no
132 |         more matches are found.
133 | 
134 |         Args:
135 |             input (str): input string
136 |             re (regex.Regex): Prefix match object
137 | 
138 |         Returns:
139 |             (list) of prefixes
140 | 
141 |         Raises:
142 |             (KrakenEncodeException) if no prefix match is found for some part
143 |             of the string.
144 |         """
145 |         r = []  # type: List[str]
146 |         idx = 0
147 |         while True:
148 |             mo = re.match(input, idx)
149 |             if mo is None or idx == len(input):
150 |                 if len(input) > idx:
151 |                     raise KrakenEncodeException('No prefix matches for input after {}'.format(idx))
152 |                 return r
153 |             r.append(mo.group())
154 |             idx = mo.end()
155 | 
156 |     def merge(self, codec: 'PytorchCodec') -> Tuple['PytorchCodec', Set]:
157 |         """
158 |         Transforms this codec (c1) into another (c2) reusing as many labels as
159 |         possible.
160 | 
161 |         The resulting codec is able to encode the same code point sequences
162 |         while not necessarily having the same labels for them as c2.
163 |         Retains matching character -> label mappings from both codecs, removes
164 |         mappings not c2, and adds mappings not in c1. Compound labels in c2 for
165 |         code point sequences not in c1 containing labels also in use in c1 are
166 |         added as separate labels.
167 | 
168 |         Args:
169 |             codec (kraken.lib.codec.PytorchCodec):
170 | 
171 |         Returns:
172 |             A merged codec and a list of labels that were removed from the
173 |             original codec.
174 |         """
175 |         # find character sequences not encodable (exact match) by new codec.
176 |         # get labels for these sequences as deletion candidates
177 |         rm_candidates = {cseq: enc for cseq, enc in self.c2l.items() if cseq not in codec.c2l}
178 |         c2l_cand = self.c2l.copy()
179 |         for x in rm_candidates.keys():
180 |             c2l_cand.pop(x)
181 |         # remove labels from candidate list that are in use for other decodings
182 |         rm_labels = [label for v in rm_candidates.values() for label in v]
183 |         for v in c2l_cand.values():
184 |             for l in rm_labels:
185 |                 if l in v:
186 |                     rm_labels.remove(l)
187 |         # iteratively remove labels, decrementing subsequent labels to close
188 |         # (new) holes in the codec.
189 |         offset_rm_labels = [v-idx for idx, v in enumerate(sorted(set(rm_labels)))]
190 |         for rlabel in offset_rm_labels:
191 |             c2l_cand = {k: [l-1 if l > rlabel else l for l in v] for k, v in c2l_cand.items()}
192 |         # add mappings not in original codec
193 |         add_list = {cseq: enc for cseq, enc in codec.c2l.items() if cseq not in self.c2l}
194 |         # renumber
195 |         start_idx = max(label for v in c2l_cand.values() for label in v) + 1
196 |         add_labels = {k: v for v, k in enumerate(sorted(set(label for v in add_list.values() for label in v)), start_idx)}
197 |         for k, v in add_list.items():
198 |             c2l_cand[k] = [add_labels[label] for label in v]
199 |         return PytorchCodec(c2l_cand), set(rm_labels)
200 | 


--------------------------------------------------------------------------------
/kraken/serialization.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Copyright 2015 Benjamin Kiessling
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | # http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 14 | # or implied. See the License for the specific language governing
 15 | # permissions and limitations under the License.
 16 | from jinja2 import Environment, PackageLoader
 17 | 
 18 | import regex
 19 | import logging
 20 | import unicodedata
 21 | 
 22 | from collections import Counter
 23 | 
 24 | from kraken.rpred import ocr_record
 25 | from kraken.lib.util import make_printable
 26 | 
 27 | from typing import List, Tuple, Iterable, Optional, Sequence
 28 | 
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | __all__ = ['serialize']
 32 | 
 33 | 
 34 | def _rescale(val: Sequence[float], low: float, high: float) -> List[float]:
 35 |     """
 36 |     Rescales a list of confidence value between 0 and 1 to an interval [low,
 37 |     high].
 38 | 
 39 |     Args:
 40 |         val (float): List of values in interval (0,1)
 41 |         low (float): Lower bound of rescaling interval
 42 |         high (float): Upper bound of rescaling interval
 43 | 
 44 |     Returns:
 45 |         Rescaled value (float).
 46 |     """
 47 |     return [(high - low) * x + low for x in val]
 48 | 
 49 | 
 50 | def max_bbox(boxes: Iterable[Tuple[int, int, int, int]]) -> Tuple[int, int, int, int]:
 51 |     """
 52 |     Calculates the minimal bounding box containing all boxes contained in an
 53 |     iterator.
 54 | 
 55 |     Args:
 56 |         boxes (iterator): An iterator returning tuples of the format (x0, y0,
 57 |                           x1, y1)
 58 |     Returns:
 59 |         A box covering all bounding boxes in the input argument
 60 |     """
 61 |     # XXX: fix type hinting
 62 |     sbox = list(map(sorted, list(zip(*boxes))))
 63 |     return (sbox[0][0], sbox[1][0], sbox[2][-1], sbox[3][-1])  # type: ignore
 64 | 
 65 | 
 66 | def serialize(records: Sequence[ocr_record],
 67 |               image_name: str = None,
 68 |               image_size: Tuple[int, int] = (0, 0),
 69 |               writing_mode: str = 'horizontal-tb',
 70 |               scripts: Optional[Iterable[str]] = None,
 71 |               template: str = 'hocr') -> str:
 72 |     """
 73 |     Serializes a list of ocr_records into an output document.
 74 | 
 75 |     Serializes a list of predictions and their corresponding positions by doing
 76 |     some hOCR-specific preprocessing and then renders them through one of
 77 |     several jinja2 templates.
 78 | 
 79 |     Note: Empty records are ignored for serialization purposes.
 80 | 
 81 |     Args:
 82 |         records (iterable): List of kraken.rpred.ocr_record
 83 |         image_name (str): Name of the source image
 84 |         image_size (tuple): Dimensions of the source image
 85 |         writing_mode (str): Sets the principal layout of lines and the
 86 |                             direction in which blocks progress. Valid values
 87 |                             are horizontal-tb, vertical-rl, and
 88 |                             vertical-lr.
 89 |         scripts (list): List of scripts contained in the OCR records
 90 |         template (str): Selector for the serialization format. May be
 91 |                         'hocr' or 'alto'.
 92 | 
 93 |     Returns:
 94 |             (str) rendered template.
 95 |     """
 96 |     logger.info('Serialize {} records from {} with template {}.'.format(len(records), image_name, template))
 97 |     page = {'lines': [], 'size': image_size, 'name': image_name, 'writing_mode': writing_mode, 'scripts': scripts}  # type: dict
 98 |     seg_idx = 0
 99 |     char_idx = 0
100 |     for idx, record in enumerate(records):
101 |         # skip empty records
102 |         if not record.prediction:
103 |             logger.debug('Empty record. Skipping')
104 |             continue
105 |         line = {'index': idx,
106 |                 'bbox': max_bbox(record.cuts),
107 |                 'cuts': record.cuts,
108 |                 'confidences': record.confidences,
109 |                 'recognition': []
110 |                 }
111 |         splits = regex.split(r'(\s+)', record.prediction)
112 |         line_offset = 0
113 |         logger.debug('Record contains {} segments'.format(len(splits)))
114 |         for segment in splits:
115 |             if len(segment) == 0:
116 |                 continue
117 |             seg_bbox = max_bbox(record.cuts[line_offset:line_offset + len(segment)])
118 | 
119 |             line['recognition'].extend([{'bbox': seg_bbox,
120 |                                          'confidences': record.confidences[line_offset:line_offset + len(segment)],
121 |                                          'cuts': record.cuts[line_offset:line_offset + len(segment)],
122 |                                          'text': segment,
123 |                                          'recognition': [{'bbox': cut, 'confidence': conf, 'text': char, 'index': cid}
124 |                                                          for conf, cut, char, cid in
125 |                                                          zip(record.confidences[line_offset:line_offset + len(segment)],
126 |                                                              record.cuts[line_offset:line_offset + len(segment)],
127 |                                                              segment,
128 |                                                              range(char_idx, char_idx + len(segment)))],
129 |                                          'index': seg_idx}])
130 |             char_idx += len(segment)
131 |             seg_idx += 1
132 |             line_offset += len(segment)
133 |         page['lines'].append(line)
134 |     logger.debug('Initializing jinja environment.')
135 |     env = Environment(loader=PackageLoader('kraken', 'templates'),
136 |                       trim_blocks=True,
137 |                       lstrip_blocks=True,
138 |                       autoescape=True)
139 |     env.tests['whitespace'] = str.isspace
140 |     env.filters['rescale'] = _rescale
141 |     logger.debug('Retrieving template.')
142 |     tmpl = env.get_template(template)
143 |     logger.debug('Rendering data.')
144 |     return tmpl.render(page=page)
145 | 
146 | 
147 | def render_report(model: str,
148 |                   chars: int,
149 |                   errors: int,
150 |                   char_confusions: Counter,
151 |                   scripts: Counter,
152 |                   insertions: Counter,
153 |                   deletions: int,
154 |                   substitutions: Counter) -> str:
155 |     """
156 |     Renders an accuracy report.
157 | 
158 |     Args:
159 |         model (str): Model name.
160 |         errors (int): Number of errors on test set.
161 |         char_confusions (dict): Dictionary mapping a tuple (gt, pred) to a
162 |                                 number of occurrences.
163 |         scripts (dict): Dictionary counting character per script.
164 |         insertions (dict): Dictionary counting insertion operations per Unicode
165 |                            script
166 |         deletions (int): Number of deletions
167 |         substitutions (dict): Dictionary counting substitution operations per
168 |                               Unicode script.
169 | 
170 |     Returns:
171 |         A string containing the rendered report.
172 |     """
173 |     logger.info('Serializing report for {}'.format(model))
174 | 
175 |     report = {'model': model,
176 |               'chars': chars,
177 |               'errors': errors,
178 |               'accuracy': (chars-errors)/chars * 100,
179 |               'insertions': sum(insertions.values()),
180 |               'deletions': deletions,
181 |               'substitutions': sum(substitutions.values()),
182 |               'scripts': sorted([{'script': k,
183 |                                   'count': v,
184 |                                   'errors': insertions[k] + substitutions[k],
185 |                                   'accuracy': 100 * (v-(insertions[k] + substitutions[k]))/v} for k, v in scripts.items()],
186 |                                 key=lambda x: x['accuracy'],
187 |                                 reverse=True),
188 |               'counts': sorted([{'correct': make_printable(k[0]),
189 |                                  'generated': make_printable(k[1]),
190 |                                  'errors': v} for k, v in char_confusions.items() if k[0] != k[1]],
191 |                                key=lambda x: x['errors'],
192 |                                reverse=True)}
193 |     logger.debug('Initializing jinja environment.')
194 |     env = Environment(loader=PackageLoader('kraken', 'templates'),
195 |                       trim_blocks=True,
196 |                       lstrip_blocks=True,
197 |                       autoescape=True)
198 |     logger.debug('Retrieving template.')
199 |     tmpl = env.get_template('report')
200 |     logger.debug('Rendering data.')
201 |     return tmpl.render(report=report)
202 | 
203 | 


--------------------------------------------------------------------------------
/kraken/lib/clstm_pb2.py:
--------------------------------------------------------------------------------
  1 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
  2 | # source: clstm.proto
  3 | 
  4 | import sys
  5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
  6 | from google.protobuf import descriptor as _descriptor
  7 | from google.protobuf import message as _message
  8 | from google.protobuf import reflection as _reflection
  9 | from google.protobuf import symbol_database as _symbol_database
 10 | from google.protobuf import descriptor_pb2
 11 | # @@protoc_insertion_point(imports)
 12 | 
 13 | _sym_db = _symbol_database.Default()
 14 | 
 15 | 
 16 | 
 17 | 
 18 | DESCRIPTOR = _descriptor.FileDescriptor(
 19 |   name='clstm.proto',
 20 |   package='clstm',
 21 |   syntax='proto2',
 22 |   serialized_pb=_b('\n\x0b\x63lstm.proto\x12\x05\x63lstm\"&\n\x08KeyValue\x12\x0b\n\x03key\x18\x01 \x02(\t\x12\r\n\x05value\x18\x02 \x02(\t\"1\n\x05\x41rray\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0b\n\x03\x64im\x18\x02 \x03(\x05\x12\r\n\x05value\x18\x03 \x03(\x02\"\xcf\x01\n\x0cNetworkProto\x12\x0c\n\x04kind\x18\x01 \x02(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06ninput\x18\n \x02(\x05\x12\x0f\n\x07noutput\x18\x0b \x02(\x05\x12\x0e\n\x06icodec\x18\x0c \x03(\x05\x12\r\n\x05\x63odec\x18\r \x03(\x05\x12\"\n\tattribute\x18\x14 \x03(\x0b\x32\x0f.clstm.KeyValue\x12\x1d\n\x07weights\x18\x1e \x03(\x0b\x32\x0c.clstm.Array\x12 \n\x03sub\x18( \x03(\x0b\x32\x13.clstm.NetworkProto')
 23 | )
 24 | 
 25 | 
 26 | 
 27 | 
 28 | _KEYVALUE = _descriptor.Descriptor(
 29 |   name='KeyValue',
 30 |   full_name='clstm.KeyValue',
 31 |   filename=None,
 32 |   file=DESCRIPTOR,
 33 |   containing_type=None,
 34 |   fields=[
 35 |     _descriptor.FieldDescriptor(
 36 |       name='key', full_name='clstm.KeyValue.key', index=0,
 37 |       number=1, type=9, cpp_type=9, label=2,
 38 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 39 |       message_type=None, enum_type=None, containing_type=None,
 40 |       is_extension=False, extension_scope=None,
 41 |       options=None),
 42 |     _descriptor.FieldDescriptor(
 43 |       name='value', full_name='clstm.KeyValue.value', index=1,
 44 |       number=2, type=9, cpp_type=9, label=2,
 45 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 46 |       message_type=None, enum_type=None, containing_type=None,
 47 |       is_extension=False, extension_scope=None,
 48 |       options=None),
 49 |   ],
 50 |   extensions=[
 51 |   ],
 52 |   nested_types=[],
 53 |   enum_types=[
 54 |   ],
 55 |   options=None,
 56 |   is_extendable=False,
 57 |   syntax='proto2',
 58 |   extension_ranges=[],
 59 |   oneofs=[
 60 |   ],
 61 |   serialized_start=22,
 62 |   serialized_end=60,
 63 | )
 64 | 
 65 | 
 66 | _ARRAY = _descriptor.Descriptor(
 67 |   name='Array',
 68 |   full_name='clstm.Array',
 69 |   filename=None,
 70 |   file=DESCRIPTOR,
 71 |   containing_type=None,
 72 |   fields=[
 73 |     _descriptor.FieldDescriptor(
 74 |       name='name', full_name='clstm.Array.name', index=0,
 75 |       number=1, type=9, cpp_type=9, label=1,
 76 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 77 |       message_type=None, enum_type=None, containing_type=None,
 78 |       is_extension=False, extension_scope=None,
 79 |       options=None),
 80 |     _descriptor.FieldDescriptor(
 81 |       name='dim', full_name='clstm.Array.dim', index=1,
 82 |       number=2, type=5, cpp_type=1, label=3,
 83 |       has_default_value=False, default_value=[],
 84 |       message_type=None, enum_type=None, containing_type=None,
 85 |       is_extension=False, extension_scope=None,
 86 |       options=None),
 87 |     _descriptor.FieldDescriptor(
 88 |       name='value', full_name='clstm.Array.value', index=2,
 89 |       number=3, type=2, cpp_type=6, label=3,
 90 |       has_default_value=False, default_value=[],
 91 |       message_type=None, enum_type=None, containing_type=None,
 92 |       is_extension=False, extension_scope=None,
 93 |       options=None),
 94 |   ],
 95 |   extensions=[
 96 |   ],
 97 |   nested_types=[],
 98 |   enum_types=[
 99 |   ],
100 |   options=None,
101 |   is_extendable=False,
102 |   syntax='proto2',
103 |   extension_ranges=[],
104 |   oneofs=[
105 |   ],
106 |   serialized_start=62,
107 |   serialized_end=111,
108 | )
109 | 
110 | 
111 | _NETWORKPROTO = _descriptor.Descriptor(
112 |   name='NetworkProto',
113 |   full_name='clstm.NetworkProto',
114 |   filename=None,
115 |   file=DESCRIPTOR,
116 |   containing_type=None,
117 |   fields=[
118 |     _descriptor.FieldDescriptor(
119 |       name='kind', full_name='clstm.NetworkProto.kind', index=0,
120 |       number=1, type=9, cpp_type=9, label=2,
121 |       has_default_value=False, default_value=_b("").decode('utf-8'),
122 |       message_type=None, enum_type=None, containing_type=None,
123 |       is_extension=False, extension_scope=None,
124 |       options=None),
125 |     _descriptor.FieldDescriptor(
126 |       name='name', full_name='clstm.NetworkProto.name', index=1,
127 |       number=2, type=9, cpp_type=9, label=1,
128 |       has_default_value=False, default_value=_b("").decode('utf-8'),
129 |       message_type=None, enum_type=None, containing_type=None,
130 |       is_extension=False, extension_scope=None,
131 |       options=None),
132 |     _descriptor.FieldDescriptor(
133 |       name='ninput', full_name='clstm.NetworkProto.ninput', index=2,
134 |       number=10, type=5, cpp_type=1, label=2,
135 |       has_default_value=False, default_value=0,
136 |       message_type=None, enum_type=None, containing_type=None,
137 |       is_extension=False, extension_scope=None,
138 |       options=None),
139 |     _descriptor.FieldDescriptor(
140 |       name='noutput', full_name='clstm.NetworkProto.noutput', index=3,
141 |       number=11, type=5, cpp_type=1, label=2,
142 |       has_default_value=False, default_value=0,
143 |       message_type=None, enum_type=None, containing_type=None,
144 |       is_extension=False, extension_scope=None,
145 |       options=None),
146 |     _descriptor.FieldDescriptor(
147 |       name='icodec', full_name='clstm.NetworkProto.icodec', index=4,
148 |       number=12, type=5, cpp_type=1, label=3,
149 |       has_default_value=False, default_value=[],
150 |       message_type=None, enum_type=None, containing_type=None,
151 |       is_extension=False, extension_scope=None,
152 |       options=None),
153 |     _descriptor.FieldDescriptor(
154 |       name='codec', full_name='clstm.NetworkProto.codec', index=5,
155 |       number=13, type=5, cpp_type=1, label=3,
156 |       has_default_value=False, default_value=[],
157 |       message_type=None, enum_type=None, containing_type=None,
158 |       is_extension=False, extension_scope=None,
159 |       options=None),
160 |     _descriptor.FieldDescriptor(
161 |       name='attribute', full_name='clstm.NetworkProto.attribute', index=6,
162 |       number=20, type=11, cpp_type=10, label=3,
163 |       has_default_value=False, default_value=[],
164 |       message_type=None, enum_type=None, containing_type=None,
165 |       is_extension=False, extension_scope=None,
166 |       options=None),
167 |     _descriptor.FieldDescriptor(
168 |       name='weights', full_name='clstm.NetworkProto.weights', index=7,
169 |       number=30, type=11, cpp_type=10, label=3,
170 |       has_default_value=False, default_value=[],
171 |       message_type=None, enum_type=None, containing_type=None,
172 |       is_extension=False, extension_scope=None,
173 |       options=None),
174 |     _descriptor.FieldDescriptor(
175 |       name='sub', full_name='clstm.NetworkProto.sub', index=8,
176 |       number=40, type=11, cpp_type=10, label=3,
177 |       has_default_value=False, default_value=[],
178 |       message_type=None, enum_type=None, containing_type=None,
179 |       is_extension=False, extension_scope=None,
180 |       options=None),
181 |   ],
182 |   extensions=[
183 |   ],
184 |   nested_types=[],
185 |   enum_types=[
186 |   ],
187 |   options=None,
188 |   is_extendable=False,
189 |   syntax='proto2',
190 |   extension_ranges=[],
191 |   oneofs=[
192 |   ],
193 |   serialized_start=114,
194 |   serialized_end=321,
195 | )
196 | 
197 | _NETWORKPROTO.fields_by_name['attribute'].message_type = _KEYVALUE
198 | _NETWORKPROTO.fields_by_name['weights'].message_type = _ARRAY
199 | _NETWORKPROTO.fields_by_name['sub'].message_type = _NETWORKPROTO
200 | DESCRIPTOR.message_types_by_name['KeyValue'] = _KEYVALUE
201 | DESCRIPTOR.message_types_by_name['Array'] = _ARRAY
202 | DESCRIPTOR.message_types_by_name['NetworkProto'] = _NETWORKPROTO
203 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
204 | 
205 | KeyValue = _reflection.GeneratedProtocolMessageType('KeyValue', (_message.Message,), dict(
206 |   DESCRIPTOR = _KEYVALUE,
207 |   __module__ = 'clstm_pb2'
208 |   # @@protoc_insertion_point(class_scope:clstm.KeyValue)
209 |   ))
210 | _sym_db.RegisterMessage(KeyValue)
211 | 
212 | Array = _reflection.GeneratedProtocolMessageType('Array', (_message.Message,), dict(
213 |   DESCRIPTOR = _ARRAY,
214 |   __module__ = 'clstm_pb2'
215 |   # @@protoc_insertion_point(class_scope:clstm.Array)
216 |   ))
217 | _sym_db.RegisterMessage(Array)
218 | 
219 | NetworkProto = _reflection.GeneratedProtocolMessageType('NetworkProto', (_message.Message,), dict(
220 |   DESCRIPTOR = _NETWORKPROTO,
221 |   __module__ = 'clstm_pb2'
222 |   # @@protoc_insertion_point(class_scope:clstm.NetworkProto)
223 |   ))
224 | _sym_db.RegisterMessage(NetworkProto)
225 | 
226 | 
227 | # @@protoc_insertion_point(module_scope)
228 | 


--------------------------------------------------------------------------------
/docs/advanced.rst:
--------------------------------------------------------------------------------
  1 | .. _advanced:
  2 | 
  3 | Advanced Usage
  4 | ==============
  5 | 
  6 | Optical character recognition is the serial execution of multiple steps, in the
  7 | case of kraken binarization (converting color and grayscale images into bitonal
  8 | ones), layout analysis/page segmentation (extracting topological text lines
  9 | from an image), recognition (feeding text lines images into an classifiers),
 10 | and finally serialization of results into an appropriate format such as hOCR or
 11 | ALTO.
 12 | 
 13 | Input Specification
 14 | -------------------
 15 | 
 16 | All kraken subcommands operating on input-output pairs, i.e. producing one
 17 | output document for one input document follow the basic syntax:
 18 | 
 19 | .. code-block:: console
 20 | 
 21 |         $ kraken -i input_1 output_1 -i input_2 output_2 ... subcommand_1 subcommand_2 ... subcommand_n
 22 | 
 23 | In particular subcommands may be chained.
 24 | 
 25 | Binarization
 26 | ------------
 27 | 
 28 | The binarization subcommand accepts almost the same parameters as
 29 | ``ocropus-nlbin``. Only options not related to binarization, e.g. skew
 30 | detection are missing. In addition, error checking (image sizes, inversion
 31 | detection, grayscale enforcement) is always disabled and kraken will happily
 32 | binarize any image that is thrown at it.
 33 | 
 34 | Available parameters are:
 35 | 
 36 | ===========     ====
 37 | option          type
 38 | ===========     ==== 
 39 | --threshold     FLOAT
 40 | --zoom          FLOAT
 41 | --escale        FLOAT
 42 | --border        FLOAT
 43 | --perc          INTEGER RANGE
 44 | --range         INTEGER
 45 | --low           INTEGER RANGE
 46 | --high          INTEGER RANGE
 47 | ===========     ====
 48 | 
 49 | Page Segmentation and Script Detection
 50 | --------------------------------------
 51 | 
 52 | The `segment` subcommand access two operations page segmentation into lines and
 53 | script detection of those lines.
 54 | 
 55 | Page segmentation is mostly parameterless, although a switch to change the
 56 | color of column separators has been retained. The segmentation is written as a
 57 | `JSON <http://json.org/>`_ file containing bounding boxes in reading order and
 58 | the general text direction (horizontal, i.e. LTR or RTL text in top-to-bottom
 59 | reading order or vertical-ltr/rtl for vertical lines read from left-to-right or
 60 | right-to-left).
 61 | 
 62 | The script detection splits extracted lines from the segmenter into strip
 63 | sharing a particular script that can then be recognized by supplying
 64 | appropriate models for each detected script to the `ocr` subcommand.
 65 | 
 66 | Combined output from both consists of lists in the `boxes` field corresponding
 67 | to a topographical line and containing one or more bounding boxes of a
 68 | particular script. Identifiers are `ISO 15924
 69 | <http://www.unicode.org/iso15924/iso15924-codes.html>`_ 4 character codes.
 70 | 
 71 | .. code-block:: console
 72 | 
 73 |         $ kraken -i 14.tif lines.txt segment
 74 |         $ cat lines.json
 75 | 	{
 76 | 	   "boxes" : [
 77 |             [
 78 |                 ["Grek", [561, 216, 1626,309]]
 79 |             ],
 80 |             [
 81 |                 ["Latn", [2172, 197, 2424, 244]]
 82 |             ],
 83 |             [
 84 |                 ["Grek", [1678, 221, 2236, 320]],
 85 |                 ["Arab", [2241, 221, 2302, 320]]
 86 |             ],
 87 |             
 88 |                 ["Grek", [412, 318, 2215, 416]],
 89 |                 ["Latn", [2208, 318, 2424, 416]]
 90 |             ],
 91 |             ...
 92 |    	   ],
 93 |            "text_direction" : "horizontal-tb"
 94 | 	}
 95 | 
 96 | Script detection is automatically enabled; by explicitly disabling script
 97 | detection the `boxes` field will contain only a list of line bounding boxes:
 98 | 
 99 | .. code-block:: console
100 | 
101 | 	      [546, 216, 1626, 309],
102 | 	      [2169, 197, 2423, 244],
103 | 	      [1676, 221, 2293, 320],
104 |               ...
105 | 	      [503, 2641, 848, 2681]
106 | 
107 | Available page segmentation parameters are:
108 | 
109 | =============================================== ======
110 | option                                          action
111 | =============================================== ======
112 | -d, --text-direction                            Sets principal text direction. Valid values are `horizontal-lr`, `horizontal-rl`, `vertical-lr`, and `vertical-rl`.
113 | --scale FLOAT                                   Estimate of the average line height on the page
114 | -m, --maxcolseps                                Maximum number of columns in the input document. Set to `0` for uni-column layouts.
115 | -b, --black-colseps / -w, --white-colseps       Switch to black column separators.
116 | -r, --remove-hlines / -l, --hlines              Disables prefiltering of small horizontal lines. Improves segmenter output on some Arabic texts.
117 | =============================================== ======
118 | 
119 | The parameters specific to the script identification are:
120 | 
121 | =============================================== ======
122 | option                                          action
123 | =============================================== ======
124 | -s/-n                                           Enables/disables script detection
125 | -a, --allowed-script                            Whitelists specific scripts for detection output. Other detected script runs are merged with their adjacent scripts, after a heuristic pre-merging step.
126 | =============================================== ======
127 | 
128 | Model Repository
129 | ----------------
130 | 
131 | There is a semi-curated `repository
132 | <https://github.com/mittagessen/kraken-models>`_ of freely licensed recognition
133 | models that can be accessed from the command line using a few subcommands. For
134 | evaluating a series of models it is also possible to just clone the repository
135 | using the normal git client. 
136 | 
137 | The ``list`` subcommand retrieves a list of all models available and prints
138 | them including some additional information (identifier, type, and a short
139 | description):
140 | 
141 | .. code-block:: console
142 | 
143 |         $ kraken list
144 |         Retrieving model list   ✓
145 |         default (pyrnn) - A converted version of en-default.pyrnn.gz
146 |         toy (clstm) - A toy model trained on 400 lines of the UW3 data set.
147 |         ...
148 | 
149 | To access more detailed information the ``show`` subcommand may be used:
150 | 
151 | .. code-block:: console
152 | 
153 |         $ kraken show toy
154 |         name: toy.clstm
155 | 
156 |         A toy model trained on 400 lines of the UW3 data set.
157 | 
158 |         author: Benjamin Kiessling (mittagessen@l.unchti.me)
159 |         http://kraken.re
160 | 
161 | If a suitable model has been decided upon it can be retrieved using the ``get``
162 | subcommand:
163 | 
164 | .. code-block:: console
165 | 
166 |         $ kraken get toy
167 |         Retrieving model        ✓
168 | 
169 | Models will be placed in $XDG_BASE_DIR and can be accessed using their name as
170 | shown by the ``show`` command, e.g.:
171 | 
172 | .. code-block:: console
173 | 
174 |         $ kraken -i ... ... ocr -m toy
175 | 
176 | Additions and updates to existing models are always welcome! Just open a pull
177 | request or write an email.
178 | 
179 | Recognition
180 | -----------
181 | 
182 | Recognition requires a grey-scale or binarized image, a page segmentation for
183 | that image, and a model file. In particular there is no requirement to use the
184 | page segmentation algorithm contained in the ``segment`` subcommand or the
185 | binarization provided by kraken. 
186 | 
187 | Multi-script recognition is possible by supplying a script-annotated
188 | segmentation and a mapping between scripts and models:
189 | 
190 | .. code-block:: console
191 | 
192 |         $ kraken -i ... ... ocr -m Grek:porson.clstm -m Latn:antiqua.clstm
193 | 
194 | All polytonic Greek text portions will be recognized using the `porson.clstm`
195 | model while Latin text will be fed into the `antiqua.clstm` model. It is
196 | possible to define a fallback model that other text will be fed to:
197 | 
198 | .. code-block:: console
199 | 
200 |         $ kraken -i ... ... ocr -m ... -m ... -m default:porson.clstm
201 | 
202 | It is also possible to disable recognition on a particular script by mapping to
203 | the special model keyword `ignore`. Ignored lines will still be serialized but
204 | will not contain any recognition results.
205 | 
206 | The ``ocr`` subcommand is able to serialize the recognition results either as
207 | plain text (default), as `hOCR <http://hocr.info>`_, into `ALTO
208 | <http://www.loc.gov/standards/alto/>`_, or abbyyXML containing additional
209 | metadata such as bounding boxes and confidences:
210 | 
211 | .. code-block:: console
212 | 
213 |         $ kraken -i ... ... ocr -t # text output
214 |         $ kraken -i ... ... ocr -h # hOCR output
215 |         $ kraken -i ... ... ocr -a # ALTO output
216 |         $ kraken -i ... ... ocr -y # abbyyXML output
217 | 
218 | hOCR output is slightly different from hOCR files produced by ocropus. Each
219 | ``ocr_line`` span contains not only the bounding box of the line but also
220 | character boxes (``x_bboxes`` attribute) indicating the coordinates of each
221 | character. In each line alternating sequences of alphanumeric and
222 | non-alphanumeric (in the unicode sense) characters are put into ``ocrx_word``
223 | spans. Both have bounding boxes as attributes and the recognition confidence
224 | for each character in the ``x_conf`` attribute.
225 | 
226 | Paragraph detection has been removed as it was deemed to be unduly dependent on
227 | certain typographic features which may not be valid for your input.
228 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # kraken documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri May 22 16:51:45 2015.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | from __future__ import absolute_import
 16 | 
 17 | import sys
 18 | import os
 19 | import shlex
 20 | 
 21 | from subprocess import Popen, PIPE
 22 | # If extensions (or modules to document with autodoc) are in another directory,
 23 | # add these directories to sys.path here. If the directory is relative to the
 24 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 25 | #sys.path.insert(0, os.path.abspath('../kraken'))
 26 | 
 27 | # -- General configuration ------------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #needs_sphinx = '1.0'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = [
 36 |     'sphinx.ext.autodoc',
 37 |     'sphinx.ext.napoleon',
 38 | ]
 39 | 
 40 | # Add any paths that contain templates here, relative to this directory.
 41 | templates_path = ['_templates']
 42 | 
 43 | # The suffix(es) of source filenames.
 44 | # You can specify multiple suffix as a list of string:
 45 | # source_suffix = ['.rst', '.md']
 46 | source_suffix = '.rst'
 47 | 
 48 | # The encoding of source files.
 49 | #source_encoding = 'utf-8-sig'
 50 | 
 51 | # The master toctree document.
 52 | master_doc = 'index'
 53 | 
 54 | # General information about the project.
 55 | project = u'kraken'
 56 | copyright = u'2015, mittagessen'
 57 | author = u'mittagessen'
 58 | 
 59 | # The version info for the project you're documenting, acts as replacement for
 60 | # |version| and |release|, also used in various other places throughout the
 61 | # built documents.
 62 | #
 63 | # The short X.Y version.
 64 | pipe = Popen('git describe --tags --always master', stdout=PIPE, shell=True)
 65 | version = pipe.stdout.read().decode('utf-8')
 66 | release = version
 67 | 
 68 | # The language for content autogenerated by Sphinx. Refer to documentation
 69 | # for a list of supported languages.
 70 | #
 71 | # This is also used if you do content translation via gettext catalogs.
 72 | # Usually you set "language" from the command line for these cases.
 73 | language = None
 74 | 
 75 | # There are two options for replacing |today|: either, you set today to some
 76 | # non-false value, then it is used:
 77 | #today = ''
 78 | # Else, today_fmt is used as the format for a strftime call.
 79 | #today_fmt = '%B %d, %Y'
 80 | 
 81 | # List of patterns, relative to source directory, that match files and
 82 | # directories to ignore when looking for source files.
 83 | exclude_patterns = ['_build']
 84 | 
 85 | # The reST default role (used for this markup: `text`) to use for all
 86 | # documents.
 87 | #default_role = None
 88 | 
 89 | # If true, '()' will be appended to :func: etc. cross-reference text.
 90 | #add_function_parentheses = True
 91 | 
 92 | # If true, the current module name will be prepended to all description
 93 | # unit titles (such as .. function::).
 94 | #add_module_names = True
 95 | 
 96 | # If true, sectionauthor and moduleauthor directives will be shown in the
 97 | # output. They are ignored by default.
 98 | #show_authors = False
 99 | 
100 | # The name of the Pygments (syntax highlighting) style to use.
101 | pygments_style = 'sphinx'
102 | 
103 | # A list of ignored prefixes for module index sorting.
104 | #modindex_common_prefix = []
105 | 
106 | # If true, keep warnings as "system message" paragraphs in the built documents.
107 | #keep_warnings = False
108 | 
109 | # If true, `todo` and `todoList` produce output, else they produce nothing.
110 | todo_include_todos = False
111 | 
112 | 
113 | # -- Options for HTML output ----------------------------------------------
114 | 
115 | # The theme to use for HTML and HTML Help pages.  See the documentation for
116 | # a list of builtin themes.
117 | html_theme = 'alabaster'
118 | 
119 | # Theme options are theme-specific and customize the look and feel of a theme
120 | # further.  For a list of options available for each theme, see the
121 | # documentation.
122 | html_theme_options = {
123 |     'github_user': 'mittagessen',
124 |     'github_repo': 'kraken',
125 |     'travis_button': 'true',
126 | }
127 | 
128 | # Add any paths that contain custom themes here, relative to this directory.
129 | #html_theme_path = []
130 | 
131 | # The name for this set of Sphinx documents.  If None, it defaults to
132 | # "<project> v<release> documentation".
133 | #html_title = None
134 | 
135 | # A shorter title for the navigation bar.  Default is the same as html_title.
136 | #html_short_title = None
137 | 
138 | # The name of an image file (relative to this directory) to place at the top
139 | # of the sidebar.
140 | html_logo = '_static/kraken.png'
141 | 
142 | # The name of an image file (within the static path) to use as favicon of the
143 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
144 | # pixels large.
145 | #html_favicon = None
146 | 
147 | # Add any paths that contain custom static files (such as style sheets) here,
148 | # relative to this directory. They are copied after the builtin static files,
149 | # so a file named "default.css" will overwrite the builtin "default.css".
150 | html_static_path = ['_static']
151 | 
152 | # Add any extra paths that contain custom files (such as robots.txt or
153 | # .htaccess) here, relative to this directory. These files are copied
154 | # directly to the root of the documentation.
155 | #html_extra_path = []
156 | 
157 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
158 | # using the given strftime format.
159 | #html_last_updated_fmt = '%b %d, %Y'
160 | 
161 | # If true, SmartyPants will be used to convert quotes and dashes to
162 | # typographically correct entities.
163 | #html_use_smartypants = True
164 | 
165 | # Custom sidebar templates, maps document names to template names.
166 | html_sidebars = {
167 |     'index':    ['sidebarintro.html', 'navigation.html', 'searchbox.html', 'versions.html'],
168 |     '**':       ['localtoc.html', 'relations.html', 'searchbox.html']
169 | }
170 | 
171 | # Additional templates that should be rendered to pages, maps page names to
172 | # template names.
173 | #html_additional_pages = {}
174 | 
175 | # If false, no module index is generated.
176 | #html_domain_indices = True
177 | 
178 | # If false, no index is generated.
179 | #html_use_index = True
180 | 
181 | # If true, the index is split into individual pages for each letter.
182 | #html_split_index = False
183 | 
184 | # If true, links to the reST sources are added to the pages.
185 | #html_show_sourcelink = True
186 | 
187 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
188 | #html_show_sphinx = True
189 | 
190 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
191 | #html_show_copyright = True
192 | 
193 | # If true, an OpenSearch description file will be output, and all pages will
194 | # contain a <link> tag referring to it.  The value of this option must be the
195 | # base URL from which the finished HTML is served.
196 | #html_use_opensearch = ''
197 | 
198 | # This is the file name suffix for HTML files (e.g. ".xhtml").
199 | #html_file_suffix = None
200 | 
201 | # Language to be used for generating the HTML full-text search index.
202 | # Sphinx supports the following languages:
203 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
204 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
205 | #html_search_language = 'en'
206 | 
207 | # A dictionary with options for the search language support, empty by default.
208 | # Now only 'ja' uses this config value
209 | #html_search_options = {'type': 'default'}
210 | 
211 | # The name of a javascript file (relative to the configuration directory) that
212 | # implements a search results scorer. If empty, the default will be used.
213 | #html_search_scorer = 'scorer.js'
214 | 
215 | # Output file base name for HTML help builder.
216 | htmlhelp_basename = 'krakendoc'
217 | 
218 | # -- Options for LaTeX output ---------------------------------------------
219 | 
220 | latex_elements = {
221 | # The paper size ('letterpaper' or 'a4paper').
222 | #'papersize': 'letterpaper',
223 | 
224 | # The font size ('10pt', '11pt' or '12pt').
225 | #'pointsize': '10pt',
226 | 
227 | # Additional stuff for the LaTeX preamble.
228 | #'preamble': '',
229 | 
230 | # Latex figure (float) alignment
231 | #'figure_align': 'htbp',
232 | }
233 | 
234 | # Grouping the document tree into LaTeX files. List of tuples
235 | # (source start file, target name, title,
236 | #  author, documentclass [howto, manual, or own class]).
237 | latex_documents = [
238 |   (master_doc, 'kraken.tex', u'kraken Documentation',
239 |    u'mittagessen', 'manual'),
240 | ]
241 | 
242 | # The name of an image file (relative to this directory) to place at the top of
243 | # the title page.
244 | #latex_logo = None
245 | 
246 | # For "manual" documents, if this is true, then toplevel headings are parts,
247 | # not chapters.
248 | #latex_use_parts = False
249 | 
250 | # If true, show page references after internal links.
251 | #latex_show_pagerefs = False
252 | 
253 | # If true, show URL addresses after external links.
254 | #latex_show_urls = False
255 | 
256 | # Documents to append as an appendix to all manuals.
257 | #latex_appendices = []
258 | 
259 | # If false, no module index is generated.
260 | #latex_domain_indices = True
261 | 
262 | 
263 | # -- Options for manual page output ---------------------------------------
264 | 
265 | # One entry per manual page. List of tuples
266 | # (source start file, name, description, authors, manual section).
267 | man_pages = [
268 |     (master_doc, 'kraken', u'kraken Documentation',
269 |      [author], 1)
270 | ]
271 | 
272 | # If true, show URL addresses after external links.
273 | #man_show_urls = False
274 | 
275 | 
276 | # -- Options for Texinfo output -------------------------------------------
277 | 
278 | # Grouping the document tree into Texinfo files. List of tuples
279 | # (source start file, target name, title, author,
280 | #  dir menu entry, description, category)
281 | texinfo_documents = [
282 |   (master_doc, 'kraken', u'kraken Documentation',
283 |    author, 'kraken', 'One line description of project.',
284 |    'Miscellaneous'),
285 | ]
286 | 
287 | # Documents to append as an appendix to all manuals.
288 | #texinfo_appendices = []
289 | 
290 | # If false, no module index is generated.
291 | #texinfo_domain_indices = True
292 | 
293 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
294 | #texinfo_show_urls = 'footnote'
295 | 
296 | # If true, do not generate a @detailmenu in the "Top" node's menu.
297 | #texinfo_no_detailmenu = False
298 | 
299 | scv_whitelist_branches = ('master',)
300 | import re
301 | scv_whitelist_tags = (re.compile(r'^\d+\.\d+\.0$'),)
302 | 
303 | scv_greatest_tag = True
304 | 
305 | scv_show_banner = True
306 | scv_banner_greatest_tag = True
307 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/kraken/lib/pyrnn_pb2.py:
--------------------------------------------------------------------------------
  1 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
  2 | # source: proto/pyrnn.proto
  3 | 
  4 | from google.protobuf import descriptor as _descriptor
  5 | from google.protobuf import message as _message
  6 | from google.protobuf import reflection as _reflection
  7 | from google.protobuf import symbol_database as _symbol_database
  8 | from google.protobuf import descriptor_pb2
  9 | # @@protoc_insertion_point(imports)
 10 | 
 11 | _sym_db = _symbol_database.Default()
 12 | 
 13 | 
 14 | 
 15 | 
 16 | DESCRIPTOR = _descriptor.FileDescriptor(
 17 |   name='proto/pyrnn.proto',
 18 |   package='kraken',
 19 |   syntax='proto2',
 20 |   serialized_pb=b'\n\x11proto/pyrnn.proto\x12\x06kraken\"\'\n\x05\x61rray\x12\x0b\n\x03\x64im\x18\x01 \x03(\r\x12\x11\n\x05value\x18\x02 \x03(\x02\x42\x02\x10\x01\"\xca\x01\n\x04lstm\x12\x1a\n\x03wgi\x18\x01 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wgf\x18\x02 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wgo\x18\x03 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wci\x18\x04 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wip\x18\x05 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wfp\x18\x06 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wop\x18\x07 \x02(\x0b\x32\r.kraken.array\"$\n\x07softmax\x12\x19\n\x02w2\x18\x01 \x02(\x0b\x32\r.kraken.array\"\xb1\x01\n\x05pyrnn\x12\x0c\n\x04kind\x18\x01 \x02(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06ninput\x18\n \x02(\r\x12\x0f\n\x07noutput\x18\x0b \x02(\r\x12\r\n\x05\x63odec\x18\x0c \x03(\t\x12\x1c\n\x06\x66wdnet\x18\r \x02(\x0b\x32\x0c.kraken.lstm\x12\x1c\n\x06revnet\x18\x0e \x02(\x0b\x32\x0c.kraken.lstm\x12 \n\x07softmax\x18\x0f \x02(\x0b\x32\x0f.kraken.softmax'
 21 | )
 22 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
 23 | 
 24 | 
 25 | 
 26 | 
 27 | _ARRAY = _descriptor.Descriptor(
 28 |   name='array',
 29 |   full_name='kraken.array',
 30 |   filename=None,
 31 |   file=DESCRIPTOR,
 32 |   containing_type=None,
 33 |   fields=[
 34 |     _descriptor.FieldDescriptor(
 35 |       name='dim', full_name='kraken.array.dim', index=0,
 36 |       number=1, type=13, cpp_type=3, label=3,
 37 |       has_default_value=False, default_value=[],
 38 |       message_type=None, enum_type=None, containing_type=None,
 39 |       is_extension=False, extension_scope=None,
 40 |       options=None),
 41 |     _descriptor.FieldDescriptor(
 42 |       name='value', full_name='kraken.array.value', index=1,
 43 |       number=2, type=2, cpp_type=6, label=3,
 44 |       has_default_value=False, default_value=[],
 45 |       message_type=None, enum_type=None, containing_type=None,
 46 |       is_extension=False, extension_scope=None,
 47 |       options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), b'\020\001')),
 48 |   ],
 49 |   extensions=[
 50 |   ],
 51 |   nested_types=[],
 52 |   enum_types=[
 53 |   ],
 54 |   options=None,
 55 |   is_extendable=False,
 56 |   syntax='proto2',
 57 |   extension_ranges=[],
 58 |   oneofs=[
 59 |   ],
 60 |   serialized_start=29,
 61 |   serialized_end=68,
 62 | )
 63 | 
 64 | 
 65 | _LSTM = _descriptor.Descriptor(
 66 |   name='lstm',
 67 |   full_name='kraken.lstm',
 68 |   filename=None,
 69 |   file=DESCRIPTOR,
 70 |   containing_type=None,
 71 |   fields=[
 72 |     _descriptor.FieldDescriptor(
 73 |       name='wgi', full_name='kraken.lstm.wgi', index=0,
 74 |       number=1, type=11, cpp_type=10, label=2,
 75 |       has_default_value=False, default_value=None,
 76 |       message_type=None, enum_type=None, containing_type=None,
 77 |       is_extension=False, extension_scope=None,
 78 |       options=None),
 79 |     _descriptor.FieldDescriptor(
 80 |       name='wgf', full_name='kraken.lstm.wgf', index=1,
 81 |       number=2, type=11, cpp_type=10, label=2,
 82 |       has_default_value=False, default_value=None,
 83 |       message_type=None, enum_type=None, containing_type=None,
 84 |       is_extension=False, extension_scope=None,
 85 |       options=None),
 86 |     _descriptor.FieldDescriptor(
 87 |       name='wgo', full_name='kraken.lstm.wgo', index=2,
 88 |       number=3, type=11, cpp_type=10, label=2,
 89 |       has_default_value=False, default_value=None,
 90 |       message_type=None, enum_type=None, containing_type=None,
 91 |       is_extension=False, extension_scope=None,
 92 |       options=None),
 93 |     _descriptor.FieldDescriptor(
 94 |       name='wci', full_name='kraken.lstm.wci', index=3,
 95 |       number=4, type=11, cpp_type=10, label=2,
 96 |       has_default_value=False, default_value=None,
 97 |       message_type=None, enum_type=None, containing_type=None,
 98 |       is_extension=False, extension_scope=None,
 99 |       options=None),
100 |     _descriptor.FieldDescriptor(
101 |       name='wip', full_name='kraken.lstm.wip', index=4,
102 |       number=5, type=11, cpp_type=10, label=2,
103 |       has_default_value=False, default_value=None,
104 |       message_type=None, enum_type=None, containing_type=None,
105 |       is_extension=False, extension_scope=None,
106 |       options=None),
107 |     _descriptor.FieldDescriptor(
108 |       name='wfp', full_name='kraken.lstm.wfp', index=5,
109 |       number=6, type=11, cpp_type=10, label=2,
110 |       has_default_value=False, default_value=None,
111 |       message_type=None, enum_type=None, containing_type=None,
112 |       is_extension=False, extension_scope=None,
113 |       options=None),
114 |     _descriptor.FieldDescriptor(
115 |       name='wop', full_name='kraken.lstm.wop', index=6,
116 |       number=7, type=11, cpp_type=10, label=2,
117 |       has_default_value=False, default_value=None,
118 |       message_type=None, enum_type=None, containing_type=None,
119 |       is_extension=False, extension_scope=None,
120 |       options=None),
121 |   ],
122 |   extensions=[
123 |   ],
124 |   nested_types=[],
125 |   enum_types=[
126 |   ],
127 |   options=None,
128 |   is_extendable=False,
129 |   syntax='proto2',
130 |   extension_ranges=[],
131 |   oneofs=[
132 |   ],
133 |   serialized_start=71,
134 |   serialized_end=273,
135 | )
136 | 
137 | 
138 | _SOFTMAX = _descriptor.Descriptor(
139 |   name='softmax',
140 |   full_name='kraken.softmax',
141 |   filename=None,
142 |   file=DESCRIPTOR,
143 |   containing_type=None,
144 |   fields=[
145 |     _descriptor.FieldDescriptor(
146 |       name='w2', full_name='kraken.softmax.w2', index=0,
147 |       number=1, type=11, cpp_type=10, label=2,
148 |       has_default_value=False, default_value=None,
149 |       message_type=None, enum_type=None, containing_type=None,
150 |       is_extension=False, extension_scope=None,
151 |       options=None),
152 |   ],
153 |   extensions=[
154 |   ],
155 |   nested_types=[],
156 |   enum_types=[
157 |   ],
158 |   options=None,
159 |   is_extendable=False,
160 |   syntax='proto2',
161 |   extension_ranges=[],
162 |   oneofs=[
163 |   ],
164 |   serialized_start=275,
165 |   serialized_end=311,
166 | )
167 | 
168 | 
169 | _PYRNN = _descriptor.Descriptor(
170 |   name='pyrnn',
171 |   full_name='kraken.pyrnn',
172 |   filename=None,
173 |   file=DESCRIPTOR,
174 |   containing_type=None,
175 |   fields=[
176 |     _descriptor.FieldDescriptor(
177 |       name='kind', full_name='kraken.pyrnn.kind', index=0,
178 |       number=1, type=9, cpp_type=9, label=2,
179 |       has_default_value=False, default_value=b"".decode('utf-8'),
180 |       message_type=None, enum_type=None, containing_type=None,
181 |       is_extension=False, extension_scope=None,
182 |       options=None),
183 |     _descriptor.FieldDescriptor(
184 |       name='name', full_name='kraken.pyrnn.name', index=1,
185 |       number=2, type=9, cpp_type=9, label=1,
186 |       has_default_value=False, default_value=b"".decode('utf-8'),
187 |       message_type=None, enum_type=None, containing_type=None,
188 |       is_extension=False, extension_scope=None,
189 |       options=None),
190 |     _descriptor.FieldDescriptor(
191 |       name='ninput', full_name='kraken.pyrnn.ninput', index=2,
192 |       number=10, type=13, cpp_type=3, label=2,
193 |       has_default_value=False, default_value=0,
194 |       message_type=None, enum_type=None, containing_type=None,
195 |       is_extension=False, extension_scope=None,
196 |       options=None),
197 |     _descriptor.FieldDescriptor(
198 |       name='noutput', full_name='kraken.pyrnn.noutput', index=3,
199 |       number=11, type=13, cpp_type=3, label=2,
200 |       has_default_value=False, default_value=0,
201 |       message_type=None, enum_type=None, containing_type=None,
202 |       is_extension=False, extension_scope=None,
203 |       options=None),
204 |     _descriptor.FieldDescriptor(
205 |       name='codec', full_name='kraken.pyrnn.codec', index=4,
206 |       number=12, type=9, cpp_type=9, label=3,
207 |       has_default_value=False, default_value=[],
208 |       message_type=None, enum_type=None, containing_type=None,
209 |       is_extension=False, extension_scope=None,
210 |       options=None),
211 |     _descriptor.FieldDescriptor(
212 |       name='fwdnet', full_name='kraken.pyrnn.fwdnet', index=5,
213 |       number=13, type=11, cpp_type=10, label=2,
214 |       has_default_value=False, default_value=None,
215 |       message_type=None, enum_type=None, containing_type=None,
216 |       is_extension=False, extension_scope=None,
217 |       options=None),
218 |     _descriptor.FieldDescriptor(
219 |       name='revnet', full_name='kraken.pyrnn.revnet', index=6,
220 |       number=14, type=11, cpp_type=10, label=2,
221 |       has_default_value=False, default_value=None,
222 |       message_type=None, enum_type=None, containing_type=None,
223 |       is_extension=False, extension_scope=None,
224 |       options=None),
225 |     _descriptor.FieldDescriptor(
226 |       name='softmax', full_name='kraken.pyrnn.softmax', index=7,
227 |       number=15, type=11, cpp_type=10, label=2,
228 |       has_default_value=False, default_value=None,
229 |       message_type=None, enum_type=None, containing_type=None,
230 |       is_extension=False, extension_scope=None,
231 |       options=None),
232 |   ],
233 |   extensions=[
234 |   ],
235 |   nested_types=[],
236 |   enum_types=[
237 |   ],
238 |   options=None,
239 |   is_extendable=False,
240 |   syntax='proto2',
241 |   extension_ranges=[],
242 |   oneofs=[
243 |   ],
244 |   serialized_start=314,
245 |   serialized_end=491,
246 | )
247 | 
248 | _LSTM.fields_by_name['wgi'].message_type = _ARRAY
249 | _LSTM.fields_by_name['wgf'].message_type = _ARRAY
250 | _LSTM.fields_by_name['wgo'].message_type = _ARRAY
251 | _LSTM.fields_by_name['wci'].message_type = _ARRAY
252 | _LSTM.fields_by_name['wip'].message_type = _ARRAY
253 | _LSTM.fields_by_name['wfp'].message_type = _ARRAY
254 | _LSTM.fields_by_name['wop'].message_type = _ARRAY
255 | _SOFTMAX.fields_by_name['w2'].message_type = _ARRAY
256 | _PYRNN.fields_by_name['fwdnet'].message_type = _LSTM
257 | _PYRNN.fields_by_name['revnet'].message_type = _LSTM
258 | _PYRNN.fields_by_name['softmax'].message_type = _SOFTMAX
259 | DESCRIPTOR.message_types_by_name['array'] = _ARRAY
260 | DESCRIPTOR.message_types_by_name['lstm'] = _LSTM
261 | DESCRIPTOR.message_types_by_name['softmax'] = _SOFTMAX
262 | DESCRIPTOR.message_types_by_name['pyrnn'] = _PYRNN
263 | 
264 | array = _reflection.GeneratedProtocolMessageType('array', (_message.Message,), dict(
265 |   DESCRIPTOR = _ARRAY,
266 |   __module__ = 'proto.pyrnn_pb2'
267 |   # @@protoc_insertion_point(class_scope:kraken.array)
268 |   ))
269 | _sym_db.RegisterMessage(array)
270 | 
271 | lstm = _reflection.GeneratedProtocolMessageType('lstm', (_message.Message,), dict(
272 |   DESCRIPTOR = _LSTM,
273 |   __module__ = 'proto.pyrnn_pb2'
274 |   # @@protoc_insertion_point(class_scope:kraken.lstm)
275 |   ))
276 | _sym_db.RegisterMessage(lstm)
277 | 
278 | softmax = _reflection.GeneratedProtocolMessageType('softmax', (_message.Message,), dict(
279 |   DESCRIPTOR = _SOFTMAX,
280 |   __module__ = 'proto.pyrnn_pb2'
281 |   # @@protoc_insertion_point(class_scope:kraken.softmax)
282 |   ))
283 | _sym_db.RegisterMessage(softmax)
284 | 
285 | pyrnn = _reflection.GeneratedProtocolMessageType('pyrnn', (_message.Message,), dict(
286 |   DESCRIPTOR = _PYRNN,
287 |   __module__ = 'proto.pyrnn_pb2'
288 |   # @@protoc_insertion_point(class_scope:kraken.pyrnn)
289 |   ))
290 | _sym_db.RegisterMessage(pyrnn)
291 | 
292 | 
293 | _ARRAY.fields_by_name['value'].has_options = True
294 | _ARRAY.fields_by_name['value']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), b'\020\001')
295 | # @@protoc_insertion_point(module_scope)
296 | 


--------------------------------------------------------------------------------
/tests/test_codec.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import unittest
  3 | import os
  4 | 
  5 | from future.utils import PY2
  6 | from nose.tools import raises
  7 | 
  8 | from torch import IntTensor
  9 | 
 10 | from kraken.lib import codec
 11 | from kraken.lib.exceptions import KrakenEncodeException
 12 | 
 13 | class TestCodec(unittest.TestCase):
 14 | 
 15 |     """
 16 |     Testing codec mapping routines
 17 |     """
 18 | 
 19 |     def setUp(self):
 20 |         # codec mapping one code point to one label
 21 |         self.o2o_codec = codec.PytorchCodec('ab')
 22 |         # codec mapping many code points to one label
 23 |         self.m2o_codec = codec.PytorchCodec(['aaa' , 'aa', 'a', 'b'])
 24 |         # codec mapping one code point to many labels
 25 |         self.o2m_codec = codec.PytorchCodec({'a': [10, 11, 12], 'b': [12, 45, 80]})
 26 |         # codec mapping many code points to many labels
 27 |         self.m2m_codec = codec.PytorchCodec({'aaa': [10, 11, 12], 'aa': [10, 10], 'a': [10], 'bb': [15], 'b': [12]})
 28 | 
 29 |         self.invalid_c_sequence = 'aaababbcaaa'
 30 |         self.valid_c_sequence = 'aaababbaaabbbb'
 31 | 
 32 |         self.invalid_l_sequence = [(45, 78, 778, 0.3793492615638364),
 33 |                                    (10, 203, 859, 0.9485075253700872),
 34 |                                    (11, 70, 601, 0.7885297329523855),
 35 |                                    (12, 251, 831, 0.7216817042926938),
 36 |                                    (900, 72, 950, 0.27609823017048707)]
 37 | 
 38 |     def test_o2o_encode(self):
 39 |         """
 40 |         Test correct encoding of one-to-one code point sequence
 41 |         """
 42 |         self.assertTrue(self.o2o_codec.encode(self.valid_c_sequence).eq(
 43 |                         IntTensor([1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2])).all())
 44 | 
 45 |     def test_m2o_encode(self):
 46 |         """
 47 |         Test correct encoding of many-to-one code point sequence
 48 |         """
 49 |         self.assertTrue(self.m2o_codec.encode(self.valid_c_sequence).eq(
 50 |                         IntTensor([3, 4, 1, 4, 4, 3, 4, 4, 4, 4])).all())
 51 | 
 52 |     def test_o2m_encode(self):
 53 |         """
 54 |         Test correct encoding of one-to-many code point sequence
 55 |         """
 56 |         self.assertTrue(self.o2m_codec.encode(self.valid_c_sequence).eq(
 57 |                         IntTensor([10, 11, 12, 10, 11, 12, 10, 11, 12,
 58 |                                    12, 45, 80, 10, 11, 12, 12, 45, 80, 12, 45,
 59 |                                    80, 10, 11, 12, 10, 11, 12, 10, 11, 12, 12,
 60 |                                    45, 80, 12, 45, 80, 12, 45, 80, 12, 45,
 61 |                                    80])).all())
 62 | 
 63 |     def test_m2m_encode(self):
 64 |         """
 65 |         Test correct encoding of many-to-many code point sequence
 66 |         """
 67 |         self.assertTrue(self.m2m_codec.encode(self.valid_c_sequence).eq(
 68 |                         IntTensor([10, 11, 12, 12, 10, 15, 10, 11, 12,
 69 |                                    15, 15])).all())
 70 | 
 71 |     def test_o2o_decode(self):
 72 |         """
 73 |         Test correct decoding of one-to-one label sequence
 74 |         """
 75 |         self.assertEqual(''.join(x[0] for x in self.o2o_codec.decode([(1, 288, 652, 0.8537325587315542),
 76 |                                                                       (1, 120, 861, 0.4968470297302481),
 77 |                                                                       (1, 372, 629, 0.008650773294205938),
 78 |                                                                       (2, 406, 831, 0.15637985875540783),
 79 |                                                                       (1, 3, 824, 0.26475146828232776),
 80 |                                                                       (2, 228, 959, 0.3062689368044844),
 81 |                                                                       (2, 472, 679, 0.8677848554329698),
 82 |                                                                       (1, 482, 771, 0.6055591197109657),
 83 |                                                                       (1, 452, 606, 0.40744265053745055),
 84 |                                                                       (1, 166, 879, 0.7509269177978337),
 85 |                                                                       (2, 92, 729, 0.34554103785480306),
 86 |                                                                       (2, 227, 959, 0.3006394689033981),
 87 |                                                                       (2, 341, 699, 0.07798704843315862),
 88 |                                                                       (2, 142, 513, 0.9933850573241767)])),
 89 |                          'aaababbaaabbbb')
 90 | 
 91 |     def test_m2o_decode(self):
 92 |         """
 93 |         Test correct decoding of many-to-one label sequence
 94 |         """
 95 |         self.assertEqual(''.join(x[0] for x in self.m2o_codec.decode([(3, 28, 967, 0.07761440833942468),
 96 |                                                                       (4, 282, 565, 0.4946281412618093),
 97 |                                                                       (1, 411, 853, 0.7767301050586806),
 98 |                                                                       (4, 409, 501, 0.47915609540996495),
 99 |                                                                       (4, 299, 637, 0.7755889399450564),
100 |                                                                       (3, 340, 834, 0.726656062406549),
101 |                                                                       (4, 296, 846, 0.2274859668684881),
102 |                                                                       (4, 238, 695, 0.32982930128257815),
103 |                                                                       (4, 187, 970, 0.43354272748701805),
104 |                                                                       (4, 376, 863, 0.24483897879550764)])),
105 |                          'aaababbaaabbbb')
106 | 
107 |     def test_o2m_decode(self):
108 |         """
109 |         Test correct decoding of one-to-many label sequence
110 |         """
111 |         self.assertEqual(''.join(x[0] for x in self.o2m_codec.decode([(10, 35, 959, 0.43819571289990644),
112 |                                                                       (11, 361, 904, 0.1801115018592916),
113 |                                                                       (12, 15, 616, 0.5987506334315549),
114 |                                                                       (10, 226, 577, 0.6178248939780698),
115 |                                                                       (11, 227, 814, 0.31531097360327787),
116 |                                                                       (12, 390, 826, 0.7706594984014595),
117 |                                                                       (10, 251, 579, 0.9442530315305507),
118 |                                                                       (11, 269, 870, 0.4475979925584944),
119 |                                                                       (12, 456, 609, 0.9396137478409995),
120 |                                                                       (12, 60, 757, 0.06416607235266458),
121 |                                                                       (45, 318, 918, 0.8129458423341515),
122 |                                                                       (80, 15, 914, 0.49773432435726517),
123 |                                                                       (10, 211, 648, 0.7919220961861382),
124 |                                                                       (11, 326, 804, 0.7852387442556333),
125 |                                                                       (12, 93, 978, 0.9376801123379804),
126 |                                                                       (12, 23, 698, 0.915543635886972),
127 |                                                                       (45, 71, 599, 0.8137750423628737),
128 |                                                                       (80, 167, 980, 0.6501035181890226),
129 |                                                                       (12, 259, 823, 0.3122860659712233),
130 |                                                                       (45, 312, 948, 0.20582589628806058),
131 |                                                                       (80, 430, 694, 0.3528792552966924),
132 |                                                                       (10, 470, 866, 0.0685524032330419),
133 |                                                                       (11, 459, 826, 0.39354887700146846),
134 |                                                                       (12, 392, 926, 0.4102018609185847),
135 |                                                                       (10, 271, 592, 0.1877915301623876),
136 |                                                                       (11, 206, 995, 0.21614062190981576),
137 |                                                                       (12, 466, 648, 0.3106914763314057),
138 |                                                                       (10, 368, 848, 0.28715379701274113),
139 |                                                                       (11, 252, 962, 0.5535299604896257),
140 |                                                                       (12, 387, 709, 0.844810014550603),
141 |                                                                       (12, 156, 916, 0.9803695305965802),
142 |                                                                       (45, 150, 555, 0.5969071330809561),
143 |                                                                       (80, 381, 922, 0.5608300913697513),
144 |                                                                       (12, 35, 762, 0.5227506455088722),
145 |                                                                       (45, 364, 931, 0.7205481732247938),
146 |                                                                       (80, 341, 580, 0.536934566913969),
147 |                                                                       (12, 79, 919, 0.5136066153481802),
148 |                                                                       (45, 377, 773, 0.6507467790760987),
149 |                                                                       (80, 497, 931, 0.7635100185309783),
150 |                                                                       (12, 76, 580, 0.9542477438586341),
151 |                                                                       (45, 37, 904, 0.4299813924853797),
152 |                                                                       (80, 425, 638, 0.6825047210425983)])),
153 |                          'aaababbaaabbbb')
154 | 
155 |     def test_m2m_decode(self):
156 |         """
157 |         Test correct decoding of many-to-many label sequence
158 |         """
159 |         self.assertEqual(''.join(x[0] for x in self.m2m_codec.decode([(10, 313, 788, 0.9379917930525369),
160 |                                                                       (11, 117, 793, 0.9974374577004185),
161 |                                                                       (12, 50, 707, 0.020074164253385374),
162 |                                                                       (12, 382, 669, 0.525910770170754),
163 |                                                                       (10, 458, 833, 0.4292373233167248),
164 |                                                                       (15, 45, 831, 0.5759709886686226),
165 |                                                                       (10, 465, 729, 0.8492104897235935),
166 |                                                                       (11, 78, 800, 0.24733538459309445),
167 |                                                                       (12, 375, 872, 0.26908722769105353),
168 |                                                                       (15, 296, 889, 0.44251812620463726),
169 |                                                                       (15, 237, 930, 0.5456105208117391)])),
170 |                          'aaababbaaabbbb')
171 | 
172 |     @raises(KrakenEncodeException)
173 |     def test_o2o_decode_invalid(self):
174 |         """
175 |         Test correct handling of undecodable sequences (one-to-one decoder)
176 |         """
177 |         self.o2o_codec.decode(self.invalid_l_sequence)
178 | 
179 |     @raises(KrakenEncodeException)
180 |     def test_m2o_decode_invalid(self):
181 |         """
182 |         Test correct handling of undecodable sequences (many-to-one decoder)
183 |         """
184 |         self.m2o_codec.decode(self.invalid_l_sequence)
185 | 
186 |     @raises(KrakenEncodeException)
187 |     def test_o2m_decode_invalid(self):
188 |         """
189 |         Test correct handling of undecodable sequences (one-to-many decoder)
190 |         """
191 |         self.o2m_codec.decode(self.invalid_l_sequence)
192 | 
193 |     @raises(KrakenEncodeException)
194 |     def test_m2m_decode_invalid(self):
195 |         """
196 |         Test correct handling of undecodable sequences (many-to-many decoder)
197 |         """
198 |         self.m2m_codec.decode(self.invalid_l_sequence)
199 | 
200 |     @raises(KrakenEncodeException)
201 |     def test_o2o_encode_invalid(self):
202 |         """
203 |         Test correct handling of unencodable sequences (one-to-one encoder)
204 |         """
205 |         self.o2o_codec.encode(self.invalid_c_sequence)
206 | 
207 |     @raises(KrakenEncodeException)
208 |     def test_m2o_encode_invalid(self):
209 |         """
210 |         Test correct handling of unencodable sequences (many-to-one encoder)
211 |         """
212 |         self.m2o_codec.encode(self.invalid_c_sequence)
213 | 
214 |     @raises(KrakenEncodeException)
215 |     def test_o2m_encode_invalid(self):
216 |         """
217 |         Test correct handling of unencodable sequences (one-to-many encoder)
218 |         """
219 |         self.o2m_codec.encode(self.invalid_c_sequence)
220 | 
221 |     @raises(KrakenEncodeException)
222 |     def test_m2m_encode_invalid(self):
223 |         """
224 |         Test correct handling of unencodable sequences (many-to-many encoder)
225 |         """
226 |         self.m2m_codec.encode(self.invalid_c_sequence)
227 | 


--------------------------------------------------------------------------------