├── kraken
├── lib
│ ├── __init__.py
│ ├── sl.py
│ ├── exceptions.py
│ ├── log.py
│ ├── util.py
│ ├── lineest.py
│ ├── lstm.py
│ ├── morph.py
│ ├── models.py
│ ├── ctc_decoder.py
│ ├── train.py
│ ├── codec.py
│ ├── clstm_pb2.py
│ └── pyrnn_pb2.py
├── script.clstm
├── __init__.py
├── templates
│ ├── report
│ ├── hocr
│ ├── abbyyxml
│ ├── style.css
│ ├── alto
│ └── layout.html
├── contrib
│ ├── recognition_boxes.py
│ └── generate_scripts.py
├── iso15924.json
├── binarization.py
├── transcribe.py
├── repo.py
└── serialization.py
├── docs
├── _static
│ └── kraken.png
├── gpu.rst
├── _templates
│ └── sidebarintro.html
├── api.rst
├── models.rst
├── index.rst
├── vgsl.rst
├── Makefile
├── make.bat
├── advanced.rst
└── conf.py
├── tests
├── resources
│ ├── bw.png
│ ├── input.jpg
│ ├── input.tif
│ ├── toy.clstm
│ ├── model.pronn
│ ├── model.pyrnn.gz
│ └── segmentation.json
├── test_train.py
├── test_rpred.py
├── test_transcribe.py
├── test_pageseg.py
├── test_vgsl.py
├── test_models.py
├── test_binarization.py
├── test_serialization.py
├── test_layers.py
└── test_codec.py
├── requirements.txt
├── setup.py
├── environment_cuda.yml
├── environment.yml
├── setup.cfg
├── .gitignore
├── .travis.yml
├── README.rst
└── LICENSE
/kraken/lib/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/kraken/script.clstm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/kraken/script.clstm
--------------------------------------------------------------------------------
/docs/_static/kraken.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/docs/_static/kraken.png
--------------------------------------------------------------------------------
/tests/resources/bw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/bw.png
--------------------------------------------------------------------------------
/tests/resources/input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/input.jpg
--------------------------------------------------------------------------------
/tests/resources/input.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/input.tif
--------------------------------------------------------------------------------
/tests/resources/toy.clstm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/toy.clstm
--------------------------------------------------------------------------------
/tests/resources/model.pronn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/model.pronn
--------------------------------------------------------------------------------
/tests/resources/model.pyrnn.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D-K-E/kraken/master/tests/resources/model.pyrnn.gz
--------------------------------------------------------------------------------
/kraken/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | entry point for kraken functionality
3 | """
4 |
5 | from __future__ import absolute_import, division, print_function
6 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml
2 | future
3 | requests
4 | click>=7.0
5 | numpy
6 | Pillow
7 | regex
8 | scipy
9 | protobuf>=3.0.0
10 | jinja2
11 | python-bidi
12 | torchvision
13 | torch>=0.4.1
14 | coremltools
15 |
--------------------------------------------------------------------------------
/docs/gpu.rst:
--------------------------------------------------------------------------------
1 | .. _gpu:
2 |
3 | GPU Acceleration
4 | ================
5 |
6 | The latest version of kraken uses a new pytorch backend which enables GPU
7 | acceleration both for training and recognition. Apart from a compatible Nvidia
8 | GPU, CUDA and cuDNN have to be installed so pytorch can run computation on it.
9 |
10 |
11 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from __future__ import absolute_import, division, print_function
4 |
5 | from setuptools import setup
6 |
7 | setup(
8 | include_package_data=True,
9 | test_suite="nose.collector",
10 | tests_require=['nose', 'hocr-spec'],
11 | setup_requires=['pbr'],
12 | pbr=True,
13 | )
14 |
--------------------------------------------------------------------------------
/docs/_templates/sidebarintro.html:
--------------------------------------------------------------------------------
1 |
Useful Links
2 |
8 |
--------------------------------------------------------------------------------
/environment_cuda.yml:
--------------------------------------------------------------------------------
1 | name: kraken
2 | channels:
3 | - pytorch
4 | - fastai
5 | - defaults
6 | dependencies:
7 | - python>=3.6
8 | - lxml
9 | - future
10 | - regex
11 | - requests
12 | - click>=7.0
13 | - numpy
14 | - pillow
15 | - scipy
16 | - protobuf>=3.0.0
17 | - jinja2
18 | - torchvision-nightly
19 | - pytorch-nightly
20 | - pip:
21 | - coremltools
22 | - python-bidi
23 | - git+https://github.com/mittagessen/kraken.git@master
24 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: kraken
2 | channels:
3 | - pytorch
4 | - fastai
5 | - defaults
6 | dependencies:
7 | - python>=3.6
8 | - lxml
9 | - future
10 | - regex
11 | - requests
12 | - click>=7.0
13 | - numpy
14 | - pillow
15 | - scipy
16 | - protobuf>=3.0.0
17 | - jinja2
18 | - torchvision-nightly-cpu
19 | - pytorch-nightly-cpu
20 | - pip:
21 | - coremltools
22 | - python-bidi
23 | - git+https://github.com/mittagessen/kraken.git@master
24 |
--------------------------------------------------------------------------------
/kraken/templates/report:
--------------------------------------------------------------------------------
1 | === report {{ report.name }} ===
2 |
3 | {{ report.chars }} Characters
4 | {{ report.errors }} Errors
5 | {{ '%0.2f'| format(report.accuracy) }}% Accuracy
6 |
7 | {{ report.insertions }} Insertions
8 | {{ report.deletions }} Deletions
9 | {{ report.substitutions }} Substitutions
10 |
11 | Count Missed %Right
12 | {% for script in report.scripts %}
13 | {{ script.count }} {{ script.errors }} {{'%0.2f'| format(script.accuracy) }}% {{ script.script }}
14 | {% endfor %}
15 |
16 | Errors Correct-Generated
17 | {% for count in report.counts %}
18 | {{ count.errors }} {{ '{ ' }}{{ count.correct }}{{ ' }' }} - {{ '{ ' }}{{ count.generated }}{{ ' }' }}
19 | {% endfor %}
20 |
--------------------------------------------------------------------------------
/kraken/lib/sl.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def dim0(s):
5 | """Dimension of the slice list for dimension 0."""
6 | return s[0].stop-s[0].start
7 |
8 |
9 | def dim1(s):
10 | """Dimension of the slice list for dimension 1."""
11 | return s[1].stop-s[1].start
12 |
13 |
14 | def area(a):
15 | """Return the area of the slice list (ignores anything past a[:2]."""
16 | return np.prod([max(x.stop-x.start, 0) for x in a[:2]])
17 |
18 |
19 | def width(s):
20 | return s[1].stop-s[1].start
21 |
22 |
23 | def height(s):
24 | return s[0].stop-s[0].start
25 |
26 |
27 | def aspect(a):
28 | return height(a)*1.0/width(a)
29 |
30 |
31 | def xcenter(s):
32 | return np.mean([s[1].stop, s[1].start])
33 |
34 |
35 | def ycenter(s):
36 | return np.mean([s[0].stop, s[0].start])
37 |
38 |
39 | def center(s):
40 | return (ycenter(s), xcenter(s))
41 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = kraken
3 | author = Benjamin Kiessling
4 | author-email = mittagessen@l.unchti.me
5 | summary = OCR/HTR engine for all the languages
6 | home-page = http://kraken.re
7 | description-file = README.rst
8 | license = Apache
9 | classifier =
10 | Development Status :: 5 - Stable
11 | Environment :: Console
12 | Intended Audience :: Science/Research
13 | License :: OSI Approved :: Apache Software License
14 | Operating System :: POSIX
15 | Programming Language :: Python :: 3.6
16 | Programming Language :: Python :: 3.7
17 |
18 | keywords =
19 | ocr
20 | ocropus
21 |
22 | [bdist_wheel]
23 | universal = 1
24 |
25 | [files]
26 | packages = kraken
27 |
28 | [entry_points]
29 | console_scripts =
30 | kraken = kraken.kraken:cli
31 | ketos = kraken.ketos:cli
32 |
33 | [flake8]
34 | max-line-length = 160
35 | exclude = tests/*
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | [._]*.s[a-w][a-z]
2 | [._]s[a-w][a-z]
3 | *.un~
4 | Session.vim
5 | .netrwhist
6 | *~
7 | # Byte-compiled / optimized / DLL files
8 | __pycache__/
9 | *.py[cod]
10 | *$py.class
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Distribution / packaging
16 | .Python
17 | env/
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 |
52 | # Sphinx documentation
53 | docs/_build/
54 |
--------------------------------------------------------------------------------
/kraken/templates/hocr:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | {% if page.scripts %}
8 |
9 | {% endif %}
10 |
11 |
12 |
13 | {% for line in page.lines %}
14 |
15 | {% for segment in line.recognition %}
16 | {{ segment.text }}
17 | {% endfor %}
18 |
19 |
20 | {% endfor %}
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/tests/test_train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import unittest
3 |
4 | from nose.tools import raises
5 |
6 | from kraken.lib import train
7 | from itertools import cycle
8 |
9 | class TestTrain(unittest.TestCase):
10 | """
11 | Testing model trainer interrupter classes
12 | """
13 | def test_early_stopping(self):
14 | """
15 | Tests early stopping interrupter.
16 | """
17 | it = train.EarlyStopping(cycle('a'), min_delta = 1, lag = 5)
18 | for epoch, _ in enumerate(it):
19 | it.update(epoch if epoch < 10 else 10)
20 | self.assertEqual(15, epoch)
21 | self.assertEqual(it.best_epoch, 10)
22 | self.assertEqual(it.best_loss, 10)
23 |
24 | def test_epoch_stopping(self):
25 | """
26 | Tests stopping after n epochs.
27 | """
28 | it = train.EpochStopping(cycle('a'), epochs = 57)
29 | for epoch, _ in enumerate(it):
30 | it.update(epoch)
31 | self.assertEqual(56, epoch)
32 | self.assertEqual(it.best_epoch, 56)
33 | self.assertEqual(it.best_loss, 56)
34 |
--------------------------------------------------------------------------------
/kraken/contrib/recognition_boxes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | import sys
5 |
6 | from PIL import Image, ImageDraw
7 |
8 | from kraken.pageseg import segment
9 | from kraken.binarization import nlbin
10 | from kraken.rpred import rpred
11 | from itertools import cycle
12 | from kraken.lib import models
13 |
14 | cmap = cycle([(230, 25, 75, 127),
15 | (60, 180, 75, 127),
16 | (255, 225, 25, 127),
17 | (0, 130, 200, 127),
18 | (245, 130, 48, 127),
19 | (145, 30, 180, 127),
20 | (70, 240, 240, 127)])
21 |
22 | net = models.load_any(sys.argv[1])
23 |
24 | for fname in sys.argv[2:]:
25 | im = Image.open(fname)
26 | print(fname)
27 | im = nlbin(im)
28 | res = segment(im, maxcolseps=0)
29 | pred = rpred(net, im, res)
30 | im = im.convert('RGBA')
31 | tmp = Image.new('RGBA', im.size, (0, 0, 0, 0))
32 | draw = ImageDraw.Draw(tmp)
33 | for line in pred:
34 | for box in line.cuts:
35 | draw.rectangle(box, fill=next(cmap))
36 | im = Image.alpha_composite(im, tmp)
37 | im.save('high_{}'.format(os.path.basename(fname)))
38 |
--------------------------------------------------------------------------------
/tests/test_rpred.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, division, print_function
4 |
5 | import os
6 | import unittest
7 |
8 | from PIL import Image
9 | from nose.tools import raises
10 |
11 | from kraken.lib.models import load_any
12 | from kraken.rpred import rpred
13 | from kraken.lib.exceptions import KrakenInputException
14 |
15 | thisfile = os.path.abspath(os.path.dirname(__file__))
16 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
17 |
18 | class TestRecognition(unittest.TestCase):
19 |
20 | """
21 | Tests of the recognition facility and associated routines.
22 | """
23 | def setUp(self):
24 | self.im = Image.open(os.path.join(resources, 'bw.png'))
25 |
26 | def tearDown(self):
27 | self.im.close()
28 |
29 | @raises(KrakenInputException)
30 | def test_rpred_outbounds(self):
31 | """
32 | Tests correct handling of invalid line coordinates.
33 | """
34 | nn = load_any(os.path.join(resources, 'toy.clstm'))
35 | pred = rpred(nn, self.im, {'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal'}, True)
36 | next(pred)
37 |
--------------------------------------------------------------------------------
/tests/test_transcribe.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, division, print_function
4 |
5 | import os
6 | import json
7 | import unittest
8 |
9 | from PIL import Image
10 | from lxml import etree
11 | from io import BytesIO
12 | from kraken.transcribe import TranscriptionInterface
13 |
14 | thisfile = os.path.abspath(os.path.dirname(__file__))
15 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
16 |
17 | class TestTranscriptionInterface(unittest.TestCase):
18 |
19 | """
20 | Test of the transcription interface generation
21 | """
22 |
23 | def test_transcription_generation(self):
24 | """
25 | Tests creation of transcription interfaces with segmentation.
26 | """
27 | tr = TranscriptionInterface()
28 | with open(os.path.join(resources, 'segmentation.json')) as fp:
29 | seg = json.load(fp)
30 | with Image.open(os.path.join(resources, 'input.jpg')) as im:
31 | tr.add_page(im, seg)
32 | fp = BytesIO()
33 | tr.write(fp)
34 | # this will not throw an exception ever so we need a better validator
35 | etree.HTML(fp.getvalue())
36 |
--------------------------------------------------------------------------------
/tests/resources/segmentation.json:
--------------------------------------------------------------------------------
1 | {"boxes": [[0, 29, 518, 56], [25, 54, 122, 82], [9, 74, 95, 119], [103, 75, 146, 131], [7, 138, 136, 231], [10, 228, 122, 348], [13, 230, 65, 285], [74, 304, 121, 354], [12, 353, 143, 405], [15, 450, 109, 521], [17, 511, 147, 574], [108, 544, 151, 597], [30, 591, 143, 694], [21, 696, 149, 838], [13, 832, 155, 900], [3, 880, 93, 970], [20, 989, 60, 1036], [13, 1096, 67, 1152], [87, 1502, 126, 1558], [7, 1866, 132, 1949], [21, 1978, 93, 2051], [26, 2048, 120, 2091], [518, 297, 580, 337], [654, 293, 1088, 332], [514, 353, 1294, 398], [519, 407, 1294, 447], [515, 453, 1292, 499], [518, 505, 1290, 546], [517, 553, 1292, 594], [514, 603, 1292, 647], [518, 652, 1293, 693], [519, 700, 1296, 742], [518, 750, 1296, 797], [518, 799, 1292, 841], [514, 848, 1296, 897], [515, 895, 885, 944], [517, 943, 1294, 990], [514, 995, 1351, 1043], [513, 1043, 1294, 1094], [513, 1094, 1293, 1141], [512, 1143, 1294, 1192], [512, 1192, 1293, 1240], [513, 1241, 1294, 1284], [517, 1290, 1292, 1331], [515, 1340, 1291, 1383], [514, 1388, 1295, 1438], [517, 1436, 1292, 1487], [516, 1483, 1291, 1539], [1078, 1546, 1283, 1584], [530, 1581, 1291, 1636], [514, 1639, 1291, 1689], [512, 1680, 859, 1716], [1389, 24, 1453, 45]], "text_direction": "horizontal-lr", "script_detection": false}
--------------------------------------------------------------------------------
/kraken/contrib/generate_scripts.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Script fetching the latest unicode Scripts.txt and dumping it as json.
4 | """
5 | from urllib import request
6 | import json
7 | import regex
8 |
9 | uri = 'http://www.unicode.org/Public/UNIDATA/Scripts.txt'
10 |
11 | re = regex.compile('^(?P[0-9A-F]{4,6})(..(?P[0-9A-F]{4,6}))?\s+; (?P[A-Za-z]+)')
12 |
13 | with open('scripts.json', 'w') as fp, request.urlopen(uri) as req:
14 | d = []
15 | for line in req:
16 | line = line.decode('utf-8')
17 | if line.startswith('#') or line.strip() == '':
18 | continue
19 | m = re.match(line)
20 | if m:
21 | print(line)
22 | start = int(m.group('start'), base=16)
23 | end = start
24 | if m.group('end'):
25 | end = int(m.group('end'), base=16)
26 | name = m.group('name')
27 | if len(d) > 0 and d[-1][2] == name and (start - 1 == d[-1][1] or start -1 == d[-1][0]):
28 | print('merging {} and ({}, {}, {})'.format(d[-1], start, end, name))
29 | d[-1] = (d[-1][0], end, name)
30 | else:
31 | d.append((start, end if end != start else None, name))
32 | json.dump(d, fp)
33 |
--------------------------------------------------------------------------------
/kraken/templates/abbyyxml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | {% for line in page.lines %}
7 |
8 | {% for segment in line.recognition %}
9 | {% for char in segment.recognition %}
10 | {% if loop.first %}
11 | {{ char.text }}
12 | {% else %}
13 | {{ char.text }}
14 | {% endif %}
15 | {% endfor %}
16 | {% endfor %}
17 |
18 |
19 | {% endfor %}
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/kraken/lib/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | kraken.lib.exceptions
4 | ~~~~~~~~~~~~~~~~~~~~~
5 |
6 | All custom exceptions raised by kraken's modules and packages. Packages should
7 | always define their exceptions here.
8 | """
9 |
10 |
11 | class KrakenEncodeException(Exception):
12 |
13 | def __init__(self, message=None):
14 | Exception.__init__(self, message)
15 |
16 |
17 | class KrakenRecordException(Exception):
18 |
19 | def __init__(self, message=None):
20 | Exception.__init__(self, message)
21 |
22 |
23 | class KrakenInvalidModelException(Exception):
24 |
25 | def __init__(self, message=None):
26 | Exception.__init__(self, message)
27 |
28 |
29 | class KrakenInputException(Exception):
30 |
31 | def __init__(self, message=None):
32 | Exception.__init__(self, message)
33 |
34 |
35 | class KrakenRepoException(Exception):
36 |
37 | def __init__(self, message=None):
38 | Exception.__init__(self, message)
39 |
40 |
41 | class KrakenCairoSurfaceException(Exception):
42 | """
43 | Raised when the Cairo surface couldn't be created.
44 |
45 | Attributes:
46 | message (str): Error message
47 | width (int): Width of the surface
48 | height (int): Height of the surface
49 | """
50 | def __init__(self, message: str, width: int, height: int) -> None:
51 | self.message = message
52 | self.width = width
53 | self.height = height
54 |
55 | def __repr__(self) -> str:
56 | return repr(self.message)
57 |
--------------------------------------------------------------------------------
/tests/test_pageseg.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, division, print_function
4 |
5 | import unittest
6 | import os
7 |
8 | from PIL import Image
9 | from nose.tools import raises
10 |
11 | from kraken.pageseg import segment
12 | from kraken.lib.exceptions import KrakenInputException
13 |
14 | thisfile = os.path.abspath(os.path.dirname(__file__))
15 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
16 |
17 |
18 | class TestPageSeg(unittest.TestCase):
19 |
20 | """
21 | Tests of the page segmentation functionality
22 | """
23 | @raises(KrakenInputException)
24 | def test_segment_color(self):
25 | """
26 | Test correct handling of color input.
27 | """
28 | with Image.open(os.path.join(resources, 'input.jpg')) as im:
29 | segment(im)
30 |
31 | def test_segment_bw(self):
32 | """
33 | Tests segmentation of bi-level input.
34 | """
35 | with Image.open(os.path.join(resources, 'bw.png')) as im:
36 | lines = segment(im)
37 | # test if line count is roughly correct
38 | self.assertAlmostEqual(len(lines['boxes']), 30, msg='Segmentation differs '
39 | 'wildly from true line count', delta=5)
40 | # check if lines do not extend beyond image
41 | for box in lines['boxes']:
42 | self.assertLess(0, box[0], msg='Line x0 < 0')
43 | self.assertLess(0, box[1], msg='Line y0 < 0')
44 | self.assertGreater(im.size[0], box[2], msg='Line x1 > {}'.format(im.size[0]))
45 | self.assertGreater(im.size[1], box[3], msg='Line y1 > {}'.format(im.size[1]))
46 |
--------------------------------------------------------------------------------
/kraken/templates/style.css:
--------------------------------------------------------------------------------
1 | body {
2 | background: #f3f3f3;
3 | {% if font.family %}
4 | font-family: {{ font.family }};
5 | {% endif %}
6 | {% if font.style %}
7 | font-style: {{ font.style }};
8 | {% endif %}
9 | {% if font.weight %}
10 | font-style: {{ font.weight }};
11 | {% endif %}
12 | }
13 |
14 | [contenteditable=true]:empty:before {
15 | content: attr(data-placeholder);
16 | display: block; /* For Firefox */
17 | }
18 |
19 | li[contenteditable=true]:hover, li[contenteditable=true].hovered, span[contenteditable=true]:hover, span[contenteditable=true].hovered {
20 | border: 1px solid #ff0000;
21 | }
22 |
23 | .rect:hover, a.hovered {
24 | box-shadow: inset 0 0 0 1px #ff0000;
25 | }
26 |
27 | li[contenteditable=true]{
28 | border: 1px dashed #000;
29 | width: 100%;
30 | padding: 2px;
31 | margin: 0 0 5px 0;
32 | }
33 |
34 | ul {
35 | list-style-type:none;
36 | }
37 |
38 | nav {
39 | background: #444;
40 | position: fixed;
41 | top: 0;
42 | left: 0;
43 | height: 100%;
44 | width: 10%;
45 | font-family: "Helvetica Neue", Arial, sans-serif;
46 | }
47 |
48 | nav ul {
49 | list-style: none;
50 | margin-right: 1em;
51 | }
52 |
53 | nav li {
54 | display : inline-block;
55 | }
56 |
57 | nav a {
58 | color: white;
59 | text-decoration: none;
60 | }
61 |
62 | nav a:hover {
63 | text-decoration: underline;
64 | }
65 |
66 | .container {
67 | position: relative;
68 | margin-left: 15%;
69 | display: table;
70 | height: 100%;
71 | width: 85%;
72 | }
73 |
74 | .img_container {
75 | position: relative;
76 | }
77 |
78 | .column {
79 | display: table-cell;
80 | vertical-align: top;
81 | width: 50%;
82 | height: 100%;
83 | padding: 1rem;
84 | }
85 |
86 | #download_button {
87 | position: fixed;
88 | padding: 0;
89 | text-align: center;
90 | width: 10%;
91 | bottom: 50px;
92 | }
93 |
94 | .corrected {
95 | background-color: #73AD21;
96 | }
97 |
98 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | matrix:
3 | include:
4 | - python: 3.6
5 | - python: 3.7
6 | dist: xenial
7 | sudo: required
8 | notifications:
9 | email: false
10 | sudo: false
11 | install:
12 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
13 | - bash miniconda.sh -b -p $HOME/miniconda
14 | - export PATH="$HOME/miniconda/bin:$PATH"
15 | - conda config --set always_yes yes --set changeps1 no --set show_channel_urls yes
16 | - conda update conda
17 | - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION
18 | - source activate test-environment
19 | - conda install pbr nose pip
20 | - sed '/coremltools\|python-bidi\|regex\|torch/d' requirements.txt | xargs conda install
21 | - conda install torchvision-cpu -c pytorch
22 | - conda uninstall pytorch-cpu -c pytorch
23 | - conda install pytorch-nightly-cpu -c pytorch
24 | - pip install -r requirements.txt
25 | - conda list
26 | - pip freeze
27 | - python setup.py install
28 | script:
29 | - python setup.py nosetests
30 | deploy:
31 | provider: pypi
32 | username: ogl-iris
33 | distributions: sdist bdist_wheel
34 | skip_cleanup: true
35 | skip_upload_docs: true
36 | on:
37 | tags: true
38 | password:
39 | secure: i/TwRgfux3ebFtTgg8Od/7KGHr1AZgHJ/9r4Yop7HoZhKsgSW8Q3e65K/LJ9aQFxmggeneAdOZFboStl9li48FpfFTqJy9TioSyaDoxDv5oPmUDFKHzbjExlupa7BzeL/OaNYSzkD8S2CIcnaiQspFASCWy0pHvveTU0MvdeaFbZ+lEdwH7Kb4DotzRA2p0wOwuq84P6Vunqi9UEvVP4e/f2j1Hin+zGs08nnxfC8A1XXkKZlnnRtbaGqKkzcSyeYFDcHfFENU1E3KEbeR6xqpWgZla/WIxnQTjUaZy9/RVLja8JLoPI86WofYScKcvYRUBPX74RBgjQhpNusuZ1umGxG+1C5TzF705YqWdYCM96qqUA/hBlDSngk+ZjraPJAtSPlJCx6VaiuIu8VPgP2jcazKaMduq5C6NT0XJtNUS22cdoox3Fzhhf/f6mLPMeBxQJewYo3Qbj86Ll5M8O5SmGdwAnmGDEwL0+cqb5oULXQcK1fJMnqR68KqSoFq89zNdTEEHTjMCLJO9Yfjmpd6iY33nOXhCEWNFRKEQVbeyFcudQemDxSSGTq2LNrgzMjJj4O3chjqbU9y5KiQF5lpH28/S/ele7VrbpX9bbn3/QmSQnJhByiypOQ2vEricn3aEoToE8Ws//OCmqItoOYTzRNHs/EUST0Zah2W/LTX8=
40 |
--------------------------------------------------------------------------------
/tests/test_vgsl.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import unittest
3 |
4 | from nose.tools import raises
5 |
6 | import os
7 | import torch
8 | import tempfile
9 | from kraken.lib import vgsl
10 |
11 |
12 | class TestVGSL(unittest.TestCase):
13 | """
14 | Testing VGSL module
15 | """
16 | def test_helper_train(self):
17 | """
18 | Tests train/eval mode helper methods
19 | """
20 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
21 | rnn.train()
22 | self.assertTrue(torch.is_grad_enabled())
23 | self.assertTrue(rnn.nn.training)
24 | rnn.eval()
25 | self.assertFalse(torch.is_grad_enabled())
26 | self.assertFalse(rnn.nn.training)
27 |
28 | def test_helper_threads(self):
29 | """
30 | Test openmp threads helper method.
31 | """
32 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
33 | rnn.set_num_threads(4)
34 | self.assertEqual(torch.get_num_threads(), 4)
35 |
36 | def test_save_model(self):
37 | """
38 | Test model serialization.
39 | """
40 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
41 | with tempfile.TemporaryDirectory() as dir:
42 | rnn.save_model(dir + '/foo.mlmodel')
43 | self.assertTrue(os.path.exists(dir + '/foo.mlmodel'))
44 |
45 | def test_resize(self):
46 | """
47 | Tests resizing of output layers.
48 | """
49 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
50 | rnn.resize_output(80)
51 | self.assertEqual(rnn.nn[-1].lin.out_features, 80)
52 |
53 | def test_del_resize(self):
54 | """
55 | Tests resizing of output layers with entry deletion.
56 | """
57 | rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]')
58 | rnn.resize_output(80, [2, 4, 5, 6, 7, 12, 25])
59 | self.assertEqual(rnn.nn[-1].lin.out_features, 80)
60 |
--------------------------------------------------------------------------------
/tests/test_models.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import unittest
3 | import os
4 | import tempfile
5 | import pickle
6 |
7 | from nose.tools import raises
8 |
9 | import kraken.lib.lstm
10 |
11 | from kraken.lib import models
12 | from kraken.lib.exceptions import KrakenInvalidModelException
13 |
14 | thisfile = os.path.abspath(os.path.dirname(__file__))
15 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
16 |
17 | class TestModels(unittest.TestCase):
18 | """
19 | Testing model loading routines
20 | """
21 |
22 | def setUp(self):
23 | self.temp = tempfile.NamedTemporaryFile(delete=False)
24 |
25 | def tearDown(self):
26 | self.temp.close()
27 | os.unlink(self.temp.name)
28 |
29 | @raises(KrakenInvalidModelException)
30 | def test_load_invalid(self):
31 | """
32 | Tests correct handling of invalid files.
33 | """
34 | models.load_any(self.temp.name)
35 |
36 | def test_load_clstm(self):
37 | """
38 | Tests loading of valid clstm files.
39 | """
40 | rnn = models.load_any(os.path.join(resources, 'toy.clstm').encode('utf-8'))
41 | self.assertIsInstance(rnn, models.TorchSeqRecognizer)
42 |
43 | @raises(KrakenInvalidModelException)
44 | def test_load_pyrnn_no_seqrecognizer(self):
45 | """
46 | Test correct handling of non-SeqRecognizer pickles.
47 | """
48 | pickle.dump(u'Iámnõtãrécðçnízer', self.temp)
49 | self.temp.close()
50 | models.load_any(self.temp.name)
51 |
52 | @raises(KrakenInvalidModelException)
53 | def test_load_any_pyrnn_py3(self):
54 | """
55 | Test load_any doesn't load pickled models on python 3
56 | """
57 | rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz'))
58 |
59 | def test_load_any_proto(self):
60 | """
61 | Test load_any loads protobuf models.
62 | """
63 | rnn = models.load_any(os.path.join(resources, 'model.pronn'))
64 | self.assertIsInstance(rnn, kraken.lib.models.TorchSeqRecognizer)
65 |
--------------------------------------------------------------------------------
/tests/test_binarization.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, division, print_function
4 |
5 | import unittest
6 | import os
7 |
8 | from PIL import Image
9 | from kraken.binarization import nlbin
10 |
11 | thisfile = os.path.abspath(os.path.dirname(__file__))
12 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
13 |
14 | class TestBinarization(unittest.TestCase):
15 |
16 | """
17 | Tests of the nlbin function for binarization of images
18 | """
19 | def test_not_binarize_bw(self):
20 | """
21 | Test that mode '1' images aren't binarized again.
22 | """
23 | with Image.new('1', (1000,1000)) as im:
24 | self.assertEqual(im, nlbin(im))
25 |
26 | def test_binarize_no_bw(self):
27 | """
28 | Tests binarization of image formats without a 1bpp mode (JPG).
29 | """
30 | with Image.open(os.path.join(resources, 'input.jpg')) as im:
31 | res = nlbin(im)
32 | # calculate histogram and check if only pixels of value 0/255 exist
33 | self.assertEqual(254, res.histogram().count(0), msg='Output not '
34 | 'binarized')
35 |
36 | def test_binarize_tif(self):
37 | """
38 | Tests binarization of RGB TIFF images.
39 | """
40 | with Image.open(os.path.join(resources, 'input.tif')) as im:
41 | res = nlbin(im)
42 | # calculate histogram and check if only pixels of value 0/255 exist
43 | self.assertEqual(254, res.histogram().count(0), msg='Output not '
44 | 'binarized')
45 |
46 | def test_binarize_grayscale(self):
47 | """
48 | Test binarization of mode 'L' images.
49 | """
50 | with Image.open(os.path.join(resources, 'input.tif')) as im:
51 | res = nlbin(im.convert('L'))
52 | # calculate histogram and check if only pixels of value 0/255 exist
53 | self.assertEqual(254, res.histogram().count(0), msg='Output not '
54 | 'binarized')
55 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | kraken API
2 | ==========
3 |
4 | .. module:: kraken
5 |
6 | Kraken provides routines which are usable by third party tools. In general
7 | you can expect function in the ``kraken`` package to remain stable. We will try
8 | to keep these backward compatible, but as kraken is still in an early
9 | development stage and the API is still quite rudimentary nothing can be
10 | garantueed.
11 |
12 | kraken.binarization module
13 | --------------------------
14 |
15 | .. automodule:: kraken.binarization
16 | :members:
17 | :show-inheritance:
18 |
19 | kraken.serialization module
20 | ---------------------------
21 |
22 | .. automodule:: kraken.serialization
23 | :members:
24 | :show-inheritance:
25 |
26 | kraken.pageseg module
27 | ---------------------
28 |
29 | .. automodule:: kraken.pageseg
30 | :members:
31 | :show-inheritance:
32 |
33 | kraken.rpred module
34 | -------------------
35 |
36 | .. automodule:: kraken.rpred
37 | :members:
38 | :show-inheritance:
39 |
40 | kraken.transcribe module
41 | ------------------------
42 |
43 | .. automodule:: kraken.transcribe
44 | :members:
45 | :show-inheritance:
46 |
47 | kraken.linegen module
48 | ---------------------
49 |
50 | .. automodule:: kraken.linegen
51 | :members:
52 | :show-inheritance:
53 |
54 | kraken.lib.models module
55 | ------------------------
56 |
57 | .. automodule:: kraken.lib.models
58 | :members:
59 | :show-inheritance:
60 |
61 | kraken.lib.vgsl module
62 | ----------------------
63 |
64 | .. automodule:: kraken.lib.vgsl
65 | :members:
66 | :show-inheritance:
67 |
68 | kraken.lib.codec
69 | ----------------
70 |
71 | .. automodule:: kraken.lib.codec
72 | :members:
73 | :show-inheritance:
74 |
75 | kraken.lib.train module
76 | -----------------------
77 |
78 | .. automodule:: kraken.lib.train
79 | :members:
80 | :show-inheritance:
81 |
82 | kraken.lib.dataset module
83 | -------------------------
84 |
85 | .. automodule:: kraken.lib.dataset
86 | :members:
87 | :show-inheritance:
88 |
89 | kraken.lib.ctc_decoder
90 | ----------------------
91 |
92 | .. automodule:: kraken.lib.ctc_decoder
93 | :members:
94 | :show-inheritance:
95 |
--------------------------------------------------------------------------------
/docs/models.rst:
--------------------------------------------------------------------------------
1 | .. _models:
2 |
3 | Models
4 | ======
5 |
6 | There are currently three kinds of models containing the recurrent neural
7 | networks doing all the character recognition supported by kraken: ``pronn``
8 | files serializing old pickled ``pyrnn`` models as protobuf, clstm's native
9 | serialization, and versatile `Core ML
10 | `_ models.
11 |
12 | .. _pyrnn:
13 |
14 | pyrnn
15 | -----
16 |
17 | These are serialized instances of python ``lstm.SeqRecognizer`` objects. Using
18 | such a model just entails loading the pickle and calling the appropriate
19 | functions to perform recognition much like a shared library in other
20 | programming languages.
21 |
22 | Support for these models has been dropped with kraken 1.0 as python 2.7 is
23 | phased out.
24 |
25 | pronn
26 | -----
27 |
28 | Legacy python models can be converted to a protobuf based serialization. These
29 | are loadable by kraken 1.0 and will be automatically converted to Core ML.
30 |
31 | Protobuf models have several advantages over pickled ones. They are noticeably
32 | smaller (80Mb vs 1.8Mb for the default model), don't allow arbitrary code
33 | execution, and are upward compatible with python 3. Because they are so much
34 | more lightweight they are also loaded much faster.
35 |
36 | clstm
37 | -----
38 |
39 | `clstm `_, a small and fast implementation of
40 | LSTM networks that was used in previous kraken versions. The model files can be
41 | loaded with pytorch-based kraken and will be converted to Core ML.
42 |
43 | CoreML
44 | ------
45 |
46 | Core ML allows arbitrary network architectures in a compact serialization with
47 | metadata. This is the default format in pytorch-based kraken.
48 |
49 | Conversion
50 | ----------
51 |
52 | Per default pronn/clstm models are automatically converted to the new Core ML
53 | format when explicitely defined using the ``-m`` option to the ``ocr`` utility
54 | on the command line. They are stored in the user kraken directory (default is
55 | ~/.kraken) and will be automatically substituted in future runs.
56 |
57 | If conversion is not desired, e.g. because there is a bug in the conversion
58 | routine, it can be disabled using the ``--disable-autoconversion`` switch.
59 |
--------------------------------------------------------------------------------
/kraken/lib/log.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Copyright 2018 Benjamin Kiessling
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14 | # or implied. See the License for the specific language governing
15 | # permissions and limitations under the License.
16 | """
17 | kraken.lib.log
18 | ~~~~~~~~~~~~~~~~~
19 |
20 | Handlers and formatters for logging.
21 | """
22 | import time
23 | import click
24 | import logging
25 |
26 |
27 | class LogHandler(logging.Handler):
28 | def emit(self, record):
29 | msg = self.format(record)
30 | level = record.levelname.lower()
31 | err = level in ('warning', 'error', 'exception', 'critical')
32 | click.echo(msg, err=err)
33 |
34 |
35 | class LogFormatter(logging.Formatter):
36 | colors = {
37 | 'error': dict(fg='red'),
38 | 'exception': dict(fg='red'),
39 | 'critical': dict(fg='red'),
40 | 'warning': dict(fg='yellow'),
41 | }
42 |
43 | st_time = time.time()
44 |
45 | def format(self, record):
46 | if not record.exc_info:
47 | level = record.levelname.lower()
48 | msg = record.msg
49 | if level in self.colors:
50 | style = self.colors[level]
51 | else:
52 | style = {}
53 | msg = click.style(u'[{:2.4f}] {} '.format(time.time() - self.st_time, str(msg)), **style)
54 | return msg
55 | return logging.Formatter.format(self, record)
56 |
57 |
58 | def progressbar(*args, **kwargs):
59 | """
60 | Slight extension to click's progressbar disabling output on when log level
61 | is set below 30.
62 | """
63 | import logging
64 | logger = logging.getLogger(__name__)
65 | bar = click.progressbar(*args, **kwargs)
66 | if logger.getEffectiveLevel() < 30:
67 | bar.is_hidden = True # type: ignore
68 | return bar
69 |
70 |
71 | def set_logger(logger=None, level=logging.ERROR):
72 | handler = LogHandler()
73 | handler.setFormatter(LogFormatter())
74 | logger.addHandler(handler)
75 | logger.setLevel(level)
76 |
--------------------------------------------------------------------------------
/tests/test_serialization.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import absolute_import, division, print_function
4 |
5 | import unittest
6 | import json
7 | import os
8 |
9 | from lxml import etree
10 | from io import StringIO
11 | from hocr_spec import HocrValidator
12 |
13 | from kraken import rpred
14 | from kraken import serialization
15 |
16 | thisfile = os.path.abspath(os.path.dirname(__file__))
17 | resources = os.path.abspath(os.path.join(thisfile, 'resources'))
18 |
19 | class TestSerializations(unittest.TestCase):
20 | """
21 | Tests for output serialization
22 | """
23 | def setUp(self):
24 | with open(os.path.join(resources, 'records.json'), 'r') as fp:
25 | self.records = [rpred.ocr_record(**x) for x in json.load(fp)]
26 | self.validator = HocrValidator('standard')
27 |
28 | def test_vertical_hocr_serialization(self):
29 | """
30 | Test vertical line hOCR serialization
31 | """
32 | fp = StringIO()
33 |
34 | fp.write(serialization.serialize(self.records, image_name='foo.png', writing_mode='vertical-lr', template='hocr'))
35 | fp.seek(0)
36 |
37 | report = self.validator.validate(fp, parse_strict=True)
38 | self.assertTrue(report.is_valid())
39 |
40 | def test_hocr_serialization(self):
41 | """
42 | Test hOCR serialization
43 | """
44 | fp = StringIO()
45 |
46 | fp.write(serialization.serialize(self.records, image_name='foo.png', template='hocr'))
47 | fp.seek(0)
48 |
49 | report = self.validator.validate(fp, parse_strict=True)
50 | self.assertTrue(report.is_valid())
51 |
52 | def test_alto_serialization_validation(self):
53 | """
54 | Validates output against ALTO schema
55 | """
56 | fp = StringIO()
57 |
58 | fp.write(serialization.serialize(self.records, image_name='foo.png', template='alto'))
59 | doc = etree.fromstring(fp.getvalue().encode('utf-8'))
60 | with open(os.path.join(resources, 'alto-4-0.xsd')) as schema_fp:
61 | alto_schema = etree.XMLSchema(etree.parse(schema_fp))
62 | alto_schema.assertValid(doc)
63 |
64 | def test_abbyyxml_serialization_validation(self):
65 | """
66 | Validates output against abbyyXML schema
67 | """
68 | fp = StringIO()
69 |
70 | fp.write(serialization.serialize(self.records, image_name='foo.png', template='abbyyxml'))
71 | doc = etree.fromstring(fp.getvalue().encode('utf-8'))
72 | with open(os.path.join(resources, 'FineReader10-schema-v1.xml')) as schema_fp:
73 | abbyy_schema = etree.XMLSchema(etree.parse(schema_fp))
74 | abbyy_schema.assertValid(doc)
75 |
--------------------------------------------------------------------------------
/kraken/iso15924.json:
--------------------------------------------------------------------------------
1 | {"520": "Tang", "20": "Xsux", "30": "Xpeo", "550": "Blis", "40": "Ugar", "50": "Egyp", "570": "Brai", "60": "Egyh", "437": "Loma", "70": "Egyd", "80": "Hluw", "90": "Maya", "95": "Sgnw", "610": "Inds", "100": "Mero", "101": "Merc", "105": "Sarb", "106": "Narb", "620": "Roro", "115": "Phnx", "116": "Lydi", "120": "Tfng", "123": "Samr", "124": "Armi", "125": "Hebr", "126": "Palm", "127": "Hatr", "130": "Prti", "131": "Phli", "132": "Phlp", "133": "Phlv", "134": "Avst", "135": "Syrc", "136": "Syrn", "137": "Syrj", "138": "Syre", "139": "Mani", "140": "Mand", "145": "Mong", "159": "Nbat", "160": "Arab", "161": "Aran", "165": "Nkoo", "166": "Adlm", "170": "Thaa", "175": "Orkh", "176": "Hung", "200": "Grek", "201": "Cari", "202": "Lyci", "204": "Copt", "206": "Goth", "210": "Ital", "211": "Runr", "212": "Ogam", "215": "Latn", "216": "Latg", "217": "Latf", "218": "Moon", "219": "Osge", "220": "Cyrl", "221": "Cyrs", "225": "Glag", "226": "Elba", "227": "Perm", "230": "Armn", "239": "Aghb", "240": "Geor", "241": "Geok", "755": "Dupl", "250": "Dsrt", "259": "Bass", "260": "Osma", "261": "Olck", "262": "Wara", "263": "Pauc", "264": "Mroo", "265": "Medf", "280": "Visp", "281": "Shaw", "282": "Plrd", "284": "Jamo", "285": "Bopo", "286": "Hang", "287": "Kore", "288": "Kits", "290": "Teng", "291": "Cirt", "292": "Sara", "293": "Piqd", "300": "Brah", "302": "Sidd", "305": "Khar", "310": "Guru", "312": "Gong", "313": "Gonm", "314": "Mahj", "315": "Deva", "316": "Sylo", "317": "Kthi", "318": "Sind", "319": "Shrd", "320": "Gujr", "321": "Takr", "322": "Khoj", "323": "Mult", "324": "Modi", "325": "Beng", "326": "Tirh", "327": "Orya", "328": "Dogr", "329": "Soyo", "330": "Tibt", "331": "Phag", "332": "Marc", "333": "Newa", "334": "Bhks", "335": "Lepc", "336": "Limb", "337": "Mtei", "338": "Ahom", "339": "Zanb", "340": "Telu", "343": "Gran", "344": "Saur", "345": "Knda", "346": "Taml", "347": "Mlym", "348": "Sinh", "349": "Cakm", "350": "Mymr", "351": "Lana", "352": "Thai", "353": "Tale", "354": "Talu", "355": "Khmr", "356": "Laoo", "357": "Kali", "358": "Cham", "359": "Tavt", "360": "Bali", "361": "Java", "362": "Sund", "363": "Rjng", "364": "Leke", "365": "Batk", "366": "Maka", "367": "Bugi", "370": "Tglg", "371": "Hano", "372": "Buhd", "373": "Tagb", "900": "Qaaa", "398": "Sora", "399": "Lisu", "400": "Lina", "401": "Linb", "403": "Cprt", "410": "Hira", "411": "Kana", "412": "Hrkt", "413": "Jpan", "420": "Nkgb", "430": "Ethi", "435": "Bamu", "436": "Kpel", "949": "Qabx", "438": "Mend", "439": "Afak", "440": "Cans", "445": "Cher", "450": "Hmng", "460": "Yiii", "470": "Vaii", "480": "Wole", "993": "Zsye", "994": "Zinh", "995": "Zmth", "996": "Zsym", "997": "Zxxx", "998": "Zyyy", "999": "Zzzz", "499": "Nshu", "500": "Hani", "501": "Hans", "502": "Hant", "503": "Hanb", "505": "Kitl", "510": "Jurc"}
2 |
--------------------------------------------------------------------------------
/kraken/templates/alto:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | pixel
7 |
8 | {{ page.name }}
9 |
10 |
11 |
12 |
13 | kraken
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | {% for line in page.lines %}
24 |
29 | {% for segment in line.recognition %}
30 | {# ALTO forbids encoding whitespace before any String/Shape tags #}
31 | {% if segment.text is whitespace and loop.index > 1 %}
32 |
37 | {% else %}
38 |
45 | {% for char in segment.recognition %}
46 |
53 |
54 | {% endfor %}
55 |
56 | {% endif %}
57 | {% endfor %}
58 |
59 | {% endfor %}
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/kraken/lib/util.py:
--------------------------------------------------------------------------------
1 | """
2 | Ocropus's magic PIL-numpy array conversion routines. They express slightly
3 | different behavior from PIL.Image.toarray().
4 | """
5 | import unicodedata
6 | import numpy as np
7 |
8 | from PIL import Image
9 |
10 | __all__ = ['pil2array', 'array2pil']
11 |
12 |
13 | def pil2array(im: Image, alpha: int = 0) -> np.array:
14 | if im.mode == '1':
15 | return np.array(im.convert('L'))
16 | return np.array(im)
17 |
18 |
19 | def array2pil(a: np.array) -> Image:
20 | if a.dtype == np.dtype("B"):
21 | if a.ndim == 2:
22 | return Image.frombytes("L", (a.shape[1], a.shape[0]),
23 | a.tostring())
24 | elif a.ndim == 3:
25 | return Image.frombytes("RGB", (a.shape[1], a.shape[0]),
26 | a.tostring())
27 | else:
28 | raise Exception("bad image rank")
29 | elif a.dtype == np.dtype('float32'):
30 | return Image.frombytes("F", (a.shape[1], a.shape[0]), a.tostring())
31 | else:
32 | raise Exception("unknown image type")
33 |
34 |
35 | def is_bitonal(im: Image) -> bool:
36 | """
37 | Tests a PIL.Image for bitonality.
38 |
39 | Args:
40 | im (PIL.Image): Image to test
41 |
42 | Returns:
43 | True if the image contains only two different color values. False
44 | otherwise.
45 | """
46 | return im.getcolors(2) is not None
47 |
48 |
49 | def get_im_str(im: Image) -> str:
50 | return im.filename if hasattr(im, 'filename') else str(im)
51 |
52 |
53 | def is_printable(char: str) -> bool:
54 | """
55 | Determines if a chode point is printable/visible when printed.
56 |
57 | Args:
58 | char (str): Input code point.
59 |
60 | Returns:
61 | True if printable, False otherwise.
62 | """
63 | letters = ('LC', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu')
64 | numbers = ('Nd', 'Nl', 'No')
65 | punctuation = ('Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps')
66 | symbol = ('Sc', 'Sk', 'Sm', 'So')
67 | printable = letters + numbers + punctuation + symbol
68 |
69 | return unicodedata.category(char) in printable
70 |
71 |
72 | def make_printable(char: str) -> str:
73 | """
74 | Takes a Unicode code point and return a printable representation of it.
75 |
76 | Args:
77 | char (str): Input code point
78 |
79 | Returns:
80 | Either the original code point, the name of the code point if it is a
81 | combining mark, whitespace etc., or the hex code if it is a control
82 | symbol.
83 | """
84 | if not char or is_printable(char):
85 | return char
86 | elif unicodedata.category(char) in ('Cc', 'Cs', 'Co'):
87 | return '0x{:x}'.format(ord(char))
88 | else:
89 | return unicodedata.name(char)
90 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | Description
2 | ===========
3 |
4 | .. image:: https://travis-ci.org/mittagessen/kraken.svg?branch=master
5 | :target: https://travis-ci.org/mittagessen/kraken
6 |
7 | kraken is a fork of ocropus intended to rectify a number of issues while
8 | preserving (mostly) functional equivalence. Its main features are:
9 |
10 | - Script detection and multiscript recognition support
11 | - `Right-to-Left `_, `BiDi
12 | `_, and Top-to-Bottom
13 | script support
14 | - `ALTO `_, abbyXML, and hOCR output
15 | - Word bounding boxes and character cuts
16 | - `Public repository `_ of model files
17 | - Dynamic recognition model architectures and GPU acceleration
18 | - Clean public API
19 |
20 | Installation
21 | ============
22 |
23 | When using a recent version of pip all dependencies will be installed from
24 | binary wheel packages, so installing build-essential or your distributions
25 | equivalent is often unnecessary.
26 |
27 | Install the latest master version through `conda `_:
28 |
29 | ::
30 |
31 | $ wget https://raw.githubusercontent.com/mittagessen/kraken/master/environment.yml
32 | $ conda env create -f environment.yml
33 |
34 | or:
35 |
36 | ::
37 |
38 | $ wget https://raw.githubusercontent.com/mittagessen/kraken/master/environment_cuda.yml
39 | $ conda env create -f environment_cuda.yml
40 |
41 | for CUDA acceleration with the appropriate hardware.
42 |
43 | It is also possible to install the stable version with the old clstm backend from pypi:
44 |
45 | ::
46 |
47 | $ pip install kraken
48 |
49 | Finally you'll have to scrounge up a model to do the actual recognition of
50 | characters. To download the default model for printed English text and place it
51 | in the kraken directory for the current user:
52 |
53 | ::
54 |
55 | $ kraken get default
56 |
57 | A list of libre models available in the central repository can be retrieved by
58 | running:
59 |
60 | ::
61 |
62 | $ kraken list
63 |
64 | Quickstart
65 | ==========
66 |
67 | Recognizing text on an image using the default parameters including the
68 | prerequisite steps of binarization and page segmentation:
69 |
70 | ::
71 |
72 | $ kraken -i image.tif image.txt binarize segment ocr
73 |
74 | To binarize a single image using the nlbin algorithm:
75 |
76 | ::
77 |
78 | $ kraken -i image.tif bw.png binarize
79 |
80 | To segment a binarized image into reading-order sorted lines:
81 |
82 | ::
83 |
84 | $ kraken -i bw.png lines.json segment
85 |
86 | To OCR a binarized image using the default RNN and the previously generated
87 | page segmentation:
88 |
89 | ::
90 |
91 | $ kraken -i bw.png image.txt ocr --lines lines.json
92 |
93 | All subcommands and options are documented. Use the ``help`` option to get more
94 | information.
95 |
96 | Documentation
97 | =============
98 |
99 | Have a look at the `docs `_
100 |
101 | Funding
102 | =======
103 |
104 | kraken is developed at `Université PSL `_.
105 |
--------------------------------------------------------------------------------
/kraken/lib/lineest.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | import PIL
3 | import numpy as np
4 |
5 | from kraken.lib.util import pil2array, array2pil
6 | from scipy.ndimage import interpolation, filters
7 |
8 | __all__ = ['CenterNormalizer', 'dewarp']
9 |
10 |
11 | def scale_to_h(img, target_height, order=1, dtype=np.dtype('f'), cval=0):
12 | h, w = img.shape
13 | scale = target_height*1.0/h
14 | target_width = int(scale*w)
15 | with warnings.catch_warnings():
16 | warnings.simplefilter('ignore', UserWarning)
17 | output = interpolation.affine_transform(1.0*img, np.ones(2)/scale,
18 | order=order,
19 | output_shape=(target_height,
20 | target_width),
21 | mode='constant', cval=cval)
22 | output = np.array(output, dtype=dtype)
23 | return output
24 |
25 |
26 | class CenterNormalizer(object):
27 | def __init__(self, target_height=48, params=(4, 1.0, 0.3)):
28 | self.target_height = target_height
29 | self.range, self.smoothness, self.extra = params
30 |
31 | def setHeight(self, target_height):
32 | self.target_height = target_height
33 |
34 | def measure(self, line):
35 | h, w = line.shape
36 | # XXX: this filter is awfully slow
37 | smoothed = filters.gaussian_filter(line, (h*0.5, h*self.smoothness),
38 | mode='constant')
39 | smoothed += 0.001*filters.uniform_filter(smoothed, (h*0.5, w),
40 | mode='constant')
41 | self.shape = (h, w)
42 | a = np.argmax(smoothed, axis=0)
43 | a = filters.gaussian_filter(a, h*self.extra)
44 | self.center = np.array(a, 'i')
45 | deltas = np.abs(np.arange(h)[:, np.newaxis]-self.center[np.newaxis, :])
46 | self.mad = np.mean(deltas[line != 0])
47 | self.r = int(1+self.range*self.mad)
48 |
49 | def dewarp(self, img, cval=0, dtype=np.dtype('f')):
50 | if img.shape != self.shape:
51 | raise Exception('Measured and dewarp image shapes different')
52 | h, w = img.shape
53 | padded = np.vstack([cval*np.ones((h, w)), img, cval*np.ones((h, w))])
54 | center = self.center+h
55 | dewarped = [padded[center[i]-self.r:center[i]+self.r, i] for i in
56 | range(w)]
57 | dewarped = np.array(dewarped, dtype=dtype).T
58 | return dewarped
59 |
60 | def normalize(self, img, order=1, dtype=np.dtype('f'), cval=0):
61 | dewarped = self.dewarp(img, cval=cval, dtype=dtype)
62 | h, w = dewarped.shape
63 | scaled = scale_to_h(dewarped, self.target_height, order=order,
64 | dtype=dtype, cval=cval)
65 | return scaled
66 |
67 |
68 | def dewarp(normalizer: CenterNormalizer, im: PIL.Image) -> PIL.Image:
69 | """
70 | Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer
71 | instance.
72 |
73 | Args:
74 | normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer
75 | instance
76 | im (PIL.Image): Image to dewarp
77 |
78 | Returns:
79 | PIL.Image containing the dewarped image.
80 | """
81 | line = pil2array(im)
82 | temp = np.amax(line)-line
83 | temp = temp*1.0/np.amax(temp)
84 | normalizer.measure(temp)
85 | line = normalizer.normalize(line, cval=np.amax(line))
86 | return array2pil(line)
87 |
--------------------------------------------------------------------------------
/kraken/templates/layout.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
10 |
11 |
12 |
15 |
73 |
81 |
82 | {% for page in pages %}
83 |
84 |
85 |
86 |

87 | {% for line in page.lines %}
88 |
89 | {% endfor %}
90 |
91 |
92 |
93 |
94 | {% for line in page.lines %}
95 | -
96 | {% if line.text %}
97 | {{ line.text }}
98 | {% endif %}
99 |
100 | {% endfor %}
101 |
102 |
103 |
104 | {% endfor %}
105 |
106 |
107 |
--------------------------------------------------------------------------------
/kraken/lib/lstm.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | import numpy as np
3 |
4 | from typing import Dict
5 | from scipy.ndimage import measurements
6 | from scipy.special import expit
7 |
8 | initial_range = 0.1
9 |
10 |
11 | class Codec(object):
12 | """Translate between integer codes and characters."""
13 | def init(self, charset):
14 | charset = sorted(list(set(charset)))
15 | self.code2char = {} # type: Dict[int, str]
16 | self.char2code = {} # type: Dict[str, int]
17 | for code,char in enumerate(charset):
18 | self.code2char[code] = char
19 | self.char2code[char] = code
20 | return self
21 | def size(self):
22 | """The total number of codes (use this for the number of output
23 | classes when training a classifier."""
24 | return len(list(self.code2char.keys()))
25 | def encode(self, s):
26 | "Encode the string `s` into a code sequence."
27 | tab = self.char2code
28 | dflt = self.char2code["~"]
29 | return [self.char2code.get(c,dflt) for c in s]
30 | def decode(self, l):
31 | "Decode a code sequence into a string."
32 | s = [self.code2char.get(c,"~") for c in l]
33 | return s
34 |
35 | class Network:
36 | def predict(self,xs):
37 | """Prediction is the same as forward propagation."""
38 | return self.forward(xs)
39 |
40 | class Softmax(Network):
41 | """A logistic regression network."""
42 | def __init__(self,Nh,No,initial_range=0.1,rand=None):
43 | pass
44 | def ninputs(self):
45 | pass
46 | def noutputs(self):
47 | pass
48 | def forward(self,ys):
49 | pass
50 | def backward(self,deltas):
51 | pass
52 |
53 |
54 | class LSTM(Network):
55 | """A standard LSTM network. This is a direct implementation of all the forward
56 | and backward propagation formulas, mainly for speed. (There is another, more
57 | abstract implementation as well, but that's significantly slower in Python
58 | due to function call overhead.)"""
59 | def __init__(self,ni,ns,initial=0.1,maxlen=5000):
60 | pass
61 |
62 | def init_weights(self,initial):
63 | pass
64 |
65 | def allocate(self,n):
66 | pass
67 |
68 | def reset(self,n):
69 | pass
70 |
71 | def forward(self,xs):
72 | pass
73 |
74 | ################################################################
75 | # combination classifiers
76 | ################################################################
77 |
78 | class Stacked(Network):
79 | """Stack two networks on top of each other."""
80 | def __init__(self,nets):
81 | self.nets = nets
82 | def forward(self,xs):
83 | pass
84 |
85 | class Reversed(Network):
86 | """Run a network on the time-reversed input."""
87 | def __init__(self,net):
88 | self.net = net
89 | def forward(self,xs):
90 | pass
91 |
92 | class Parallel(Network):
93 | """Run multiple networks in parallel on the same input."""
94 | def __init__(self,*nets):
95 | self.nets = nets
96 | def forward(self,xs):
97 | pass
98 |
99 | def BIDILSTM(Ni,Ns,No):
100 | """A bidirectional LSTM, constructed from regular and reversed LSTMs."""
101 | lstm1 = LSTM(Ni,Ns)
102 | lstm2 = Reversed(LSTM(Ni,Ns))
103 | bidi = Parallel(lstm1,lstm2)
104 | logreg = Softmax(2*Ns,No)
105 | stacked = Stacked([bidi,logreg])
106 | return stacked
107 |
108 |
109 | class SeqRecognizer(Network):
110 | """Perform sequence recognition using BIDILSTM and alignment."""
111 | def __init__(self,ninput,nstates,noutput=-1,codec=None,normalize=None):
112 | self.Ni = ninput
113 | if codec: noutput = codec.size()
114 | self.No = noutput
115 | self.lstm = BIDILSTM(ninput,nstates,noutput)
116 | self.codec = codec
117 | def translate_back(self, output):
118 | pass
119 | def translate_back_locations(self, output):
120 | pass
121 | def predictSequence(self,xs):
122 | "Predict an integer sequence of codes."
123 | pass
124 | def l2s(self,l):
125 | "Convert a code sequence into a unicode string after recognition."
126 | l = self.codec.decode(l)
127 | return u"".join(l)
128 | def predictString(self,xs):
129 | "Predict output as a string. This uses codec and normalizer."
130 | pass
131 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | kraken
2 | ======
3 |
4 | .. toctree::
5 | :hidden:
6 | :maxdepth: 2
7 |
8 | advanced
9 | Training
10 | API
11 | Models
12 |
13 | kraken is a turn-key OCR system forked from `ocropus
14 | `_. It is intended to rectify a number of
15 | issues while preserving (mostly) functional equivalence.
16 |
17 | Features
18 | ========
19 |
20 | kraken's main features are:
21 |
22 | - Script detection and multi-script recognition support
23 | - `Right-to-Left `_, `BiDi
24 | `_, and Top-to-Bottom
25 | script support
26 | - `ALTO `_, abbyXML, and hOCR output
27 | - Word bounding boxes and character cuts
28 | - `Public repository `_ of model files
29 | - :ref:`Lightweight model files `
30 | - :ref:`Variable recognition network architectures `
31 |
32 | All functionality not pertaining to OCR and prerequisite steps has been
33 | removed, i.e. no more error rate measuring, etc.
34 |
35 | Pull requests and code contributions are always welcome.
36 |
37 | Installation
38 | ============
39 |
40 | kraken requires some external libraries to run. On Debian/Ubuntu they may be
41 | installed using:
42 |
43 | .. code-block:: console
44 |
45 | # apt install libpangocairo-1.0 libxml2 libblas3 liblapack3 python3-dev python3-pip
46 |
47 | pip
48 | ---
49 |
50 | .. code-block:: console
51 |
52 | $ pip3 install kraken
53 |
54 | or by running pip in the git repository:
55 |
56 | .. code-block:: console
57 |
58 | $ pip3 install .
59 |
60 | conda
61 | -----
62 |
63 | If you are running `Anaconda `_/miniconda, use:
64 |
65 | .. code-block:: console
66 |
67 | $ conda install -c mittagessen kraken
68 |
69 | Models
70 | ------
71 |
72 | Finally you'll have to scrounge up a recognition model to do the actual
73 | recognition of characters. To download the default English text recognition
74 | model and place it in the user's kraken directory:
75 |
76 | .. code-block:: console
77 |
78 | $ kraken get default
79 |
80 | A list of libre models available in the central repository can be retrieved by
81 | running:
82 |
83 | .. code-block:: console
84 |
85 | $ kraken list
86 |
87 | Model metadata can be extracted using:
88 |
89 | .. code-block:: console
90 |
91 | $ kraken show arabic-alam-al-kutub
92 | name: arabic-alam-al-kutub.clstm
93 |
94 | An experimental model for Classical Arabic texts.
95 |
96 | Network trained on 889 lines of [0] as a test case for a general Classical
97 | Arabic model. Ground truth was prepared by Sarah Savant
98 | and Maxim Romanov .
99 |
100 | Vocalization was omitted in the ground truth. Training was stopped at ~35000
101 | iterations with an accuracy of 97%.
102 |
103 | [0] Ibn al-Faqīh (d. 365 AH). Kitāb al-buldān. Edited by Yūsuf al-Hādī, 1st
104 | edition. Bayrūt: ʿĀlam al-kutub, 1416 AH/1996 CE.
105 | alphabet: !()-.0123456789:[] «»،؟ءابةتثجحخدذرزسشصضطظعغفقكلمنهوىي ARABIC
106 | MADDAH ABOVE, ARABIC HAMZA ABOVE, ARABIC HAMZA BELOW
107 |
108 | Quickstart
109 | ==========
110 |
111 | Recognizing text on an image using the default parameters including the
112 | prerequisite steps of binarization and page segmentation:
113 |
114 | .. code-block:: console
115 |
116 | $ kraken -i image.tif image.txt binarize segment ocr
117 | Loading RNN ✓
118 | Processing ⣻
119 |
120 | To binarize a single image using the nlbin algorithm:
121 |
122 | .. code-block:: console
123 |
124 | $ kraken -i image.tif bw.tif binarize
125 |
126 | To segment a binarized image into reading-order sorted lines:
127 |
128 | .. code-block:: console
129 |
130 | $ kraken -i bw.tif lines.json segment
131 |
132 | To OCR a binarized image using the default RNN and the previously generated
133 | page segmentation:
134 |
135 | .. code-block:: console
136 |
137 | $ kraken -i bw.tif image.txt ocr --lines lines.json
138 |
139 | All commands and their parameters are documented, just add the standard
140 | ``--help`` flag for further information.
141 |
142 | Training Tutorial
143 | =================
144 |
145 | There is a training tutorial at :doc:`training`.
146 |
147 | .. _license:
148 |
149 | License
150 | =======
151 |
152 | ``Kraken`` is provided under the terms and conditions of the `Apache 2.0
153 | License `_ retained
154 | from the original ``ocropus`` distribution.
155 |
--------------------------------------------------------------------------------
/kraken/binarization.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Copyright 2015 Benjamin Kiessling
4 | # 2014 Thomas M. Breuel
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15 | # or implied. See the License for the specific language governing
16 | # permissions and limitations under the License.
17 | """
18 | kraken.binarization
19 | ~~~~~~~~~~~~~~~~~~~
20 |
21 | An adaptive binarization algorithm.
22 | """
23 | import warnings
24 | import logging
25 | import numpy as np
26 |
27 | from PIL import Image
28 | from kraken.lib.util import pil2array, array2pil, is_bitonal, get_im_str
29 | from scipy.ndimage import filters, interpolation, morphology
30 |
31 | from kraken.lib.exceptions import KrakenInputException
32 |
33 | __all__ = ['nlbin']
34 |
35 | logger = logging.getLogger(__name__)
36 |
37 |
38 | def nlbin(im: Image,
39 | threshold: float = 0.5,
40 | zoom: float = 0.5,
41 | escale: float = 1.0,
42 | border: float = 0.1,
43 | perc: int = 80,
44 | range: int = 20,
45 | low: int = 5,
46 | high: int = 90) -> Image:
47 | """
48 | Performs binarization using non-linear processing.
49 |
50 | Args:
51 | im (PIL.Image):
52 | threshold (float):
53 | zoom (float): Zoom for background page estimation
54 | escale (float): Scale for estimating a mask over the text region
55 | border (float): Ignore this much of the border
56 | perc (int): Percentage for filters
57 | range (int): Range for filters
58 | low (int): Percentile for black estimation
59 | high (int): Percentile for white estimation
60 |
61 | Returns:
62 | PIL.Image containing the binarized image
63 |
64 | Raises:
65 | KrakenInputException when trying to binarize an empty image.
66 | """
67 | im_str = get_im_str(im)
68 | logger.info(u'Binarizing {}'.format(im_str))
69 | if is_bitonal(im):
70 | logger.info(u'Skipping binarization because {} is bitonal.'.format(im_str))
71 | return im
72 | # convert to grayscale first
73 | logger.debug(u'Converting {} to grayscale'.format(im_str))
74 | im = im.convert('L')
75 | raw = pil2array(im)
76 | logger.debug(u'Scaling and normalizing')
77 | # rescale image to between -1 or 0 and 1
78 | raw = raw/np.float(np.iinfo(raw.dtype).max)
79 | # perform image normalization
80 | if np.amax(raw) == np.amin(raw):
81 | logger.warning(u'Trying to binarize empty image {}'.format(im_str))
82 | raise KrakenInputException('Image is empty')
83 | image = raw-np.amin(raw)
84 | image /= np.amax(image)
85 |
86 | logger.debug(u'Interpolation and percentile filtering')
87 | with warnings.catch_warnings():
88 | warnings.simplefilter('ignore', UserWarning)
89 | m = interpolation.zoom(image, zoom)
90 | m = filters.percentile_filter(m, perc, size=(range, 2))
91 | m = filters.percentile_filter(m, perc, size=(2, range))
92 | m = interpolation.zoom(m, 1.0/zoom)
93 | w, h = np.minimum(np.array(image.shape), np.array(m.shape))
94 | flat = np.clip(image[:w, :h]-m[:w, :h]+1, 0, 1)
95 |
96 | # estimate low and high thresholds
97 | d0, d1 = flat.shape
98 | o0, o1 = int(border*d0), int(border*d1)
99 | est = flat[o0:d0-o0, o1:d1-o1]
100 | logger.debug(u'Threshold estimates {}'.format(est))
101 | # by default, we use only regions that contain
102 | # significant variance; this makes the percentile
103 | # based low and high estimates more reliable
104 | logger.debug(u'Refine estimates')
105 | v = est-filters.gaussian_filter(est, escale*20.0)
106 | v = filters.gaussian_filter(v**2, escale*20.0)**0.5
107 | v = (v > 0.3*np.amax(v))
108 | v = morphology.binary_dilation(v, structure=np.ones((int(escale * 50), 1)))
109 | v = morphology.binary_dilation(v, structure=np.ones((1, int(escale * 50))))
110 | est = est[v]
111 | lo = np.percentile(est.ravel(), low)
112 | hi = np.percentile(est.ravel(), high)
113 |
114 | flat -= lo
115 | flat /= (hi-lo)
116 | flat = np.clip(flat, 0, 1)
117 | logger.debug(u'Thresholding at {}'.format(threshold))
118 | bin = np.array(255*(flat > threshold), 'B')
119 | return array2pil(bin)
120 |
--------------------------------------------------------------------------------
/kraken/lib/morph.py:
--------------------------------------------------------------------------------
1 | """
2 | Various add-ons to the SciPy morphology package
3 | """
4 | import numpy as np
5 | from scipy.ndimage import morphology, measurements, filters
6 |
7 |
8 | def label(image: np.array, **kw) -> np.array:
9 | """
10 | Redefine the scipy.ndimage.measurements.label function to work with a wider
11 | range of data types. The default function is inconsistent about the data
12 | types it accepts on different platforms.
13 | """
14 | try:
15 | return measurements.label(image, **kw)
16 | except Exception:
17 | pass
18 | types = ["int32", "uint32", "int64", "uint64", "int16", "uint16"]
19 | for t in types:
20 | try:
21 | return measurements.label(np.array(image, dtype=t), **kw)
22 | except Exception:
23 | pass
24 | # let it raise the same exception as before
25 | return measurements.label(image, **kw)
26 |
27 |
28 | def find_objects(image: np.array, **kw) -> np.array:
29 | """
30 | Redefine the scipy.ndimage.measurements.find_objects function to work with
31 | a wider range of data types. The default function is inconsistent about
32 | the data types it accepts on different platforms.
33 | """
34 | try:
35 | return measurements.find_objects(image, **kw)
36 | except Exception:
37 | pass
38 | types = ["int32", "uint32", "int64", "uint64", "int16", "uint16"]
39 | for t in types:
40 | try:
41 | return measurements.find_objects(np.array(image, dtype=t), **kw)
42 | except Exception:
43 | pass
44 | # let it raise the same exception as before
45 | return measurements.find_objects(image, **kw)
46 |
47 |
48 | def r_dilation(image, size, origin=0):
49 | """Dilation with rectangular structuring element using maximum_filter"""
50 | return filters.maximum_filter(image, size, origin=origin)
51 |
52 |
53 | def r_erosion(image, size, origin=0):
54 | """Erosion with rectangular structuring element using maximum_filter"""
55 | return filters.minimum_filter(image, size, origin=origin)
56 |
57 |
58 | def rb_dilation(image, size, origin=0):
59 | """Binary dilation using linear filters."""
60 | output = np.zeros(image.shape, 'f')
61 | filters.uniform_filter(image, size, output=output, origin=origin,
62 | mode='constant', cval=0)
63 | return np.array(output > 0, 'i')
64 |
65 |
66 | def rb_erosion(image, size, origin=0):
67 | """Binary erosion using linear filters."""
68 | output = np.zeros(image.shape, 'f')
69 | filters.uniform_filter(image, size, output=output, origin=origin,
70 | mode='constant', cval=1)
71 | return np.array(output == 1, 'i')
72 |
73 |
74 | def rb_opening(image, size, origin=0):
75 | """Binary opening using linear filters."""
76 | image = rb_erosion(image, size, origin=origin)
77 | return rb_dilation(image, size, origin=origin)
78 |
79 |
80 | def spread_labels(labels, maxdist=9999999):
81 | """Spread the given labels to the background"""
82 | distances, features = morphology.distance_transform_edt(labels == 0,
83 | return_distances=1,
84 | return_indices=1)
85 | indexes = features[0] * labels.shape[1] + features[1]
86 | spread = labels.ravel()[indexes.ravel()].reshape(*labels.shape)
87 | spread *= (distances < maxdist)
88 | return spread
89 |
90 |
91 | def correspondences(labels1, labels2):
92 | """Given two labeled images, compute an array giving the correspondences
93 | between labels in the two images."""
94 | q = 100000
95 | combo = labels1 * q + labels2
96 | result = np.unique(combo)
97 | result = np.array([result // q, result % q])
98 | return result
99 |
100 |
101 | def propagate_labels(image, labels, conflict=0):
102 | """Given an image and a set of labels, apply the labels
103 | to all the regions in the image that overlap a label.
104 | Assign the value `conflict` to any labels that have a conflict."""
105 | rlabels, _ = label(image)
106 | cors = correspondences(rlabels, labels)
107 | outputs = np.zeros(np.amax(rlabels) + 1, 'i')
108 | oops = -(1 << 30)
109 | for o, i in cors.T:
110 | if outputs[o] != 0:
111 | outputs[o] = oops
112 | else:
113 | outputs[o] = i
114 | outputs[outputs == oops] = conflict
115 | outputs[0] = 0
116 | return outputs[rlabels]
117 |
118 |
119 | def select_regions(binary, f, min=0, nbest=100000):
120 | """Given a scoring function f over slice tuples (as returned by
121 | find_objects), keeps at most nbest regions whose scores is higher
122 | than min."""
123 | labels, n = label(binary)
124 | objects = find_objects(labels)
125 | scores = [f(o) for o in objects]
126 | best = np.argsort(scores)
127 | keep = np.zeros(len(objects) + 1, 'i')
128 | if nbest > 0:
129 | for i in best[-nbest:]:
130 | if scores[i] <= min:
131 | continue
132 | keep[i+1] = 1
133 | return keep[labels]
134 |
--------------------------------------------------------------------------------
/kraken/transcribe.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Copyright 2015 Benjamin Kiessling
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14 | # or implied. See the License for the specific language governing
15 | # permissions and limitations under the License.
16 | """
17 | Utility functions for ground truth transcription.
18 | """
19 | from kraken.lib.exceptions import KrakenInputException
20 | from kraken.lib.util import get_im_str
21 |
22 | from typing import List
23 |
24 | from jinja2 import Environment, PackageLoader
25 | from io import BytesIO
26 |
27 | import uuid
28 | import base64
29 | import logging
30 |
31 | logger = logging.getLogger()
32 |
33 |
34 | class TranscriptionInterface(object):
35 |
36 | def __init__(self, font=None, font_style=None):
37 | logging.info(u'Initializing transcription object.')
38 | logger.debug(u'Initializing jinja environment.')
39 | env = Environment(loader=PackageLoader('kraken', 'templates'), autoescape=True)
40 | logger.debug(u'Loading transcription template.')
41 | self.tmpl = env.get_template('layout.html')
42 | self.pages = [] # type: List[dict]
43 | self.font = {'font': font, 'style': font_style}
44 | self.text_direction = 'horizontal-tb'
45 | self.page_idx = 1
46 | self.line_idx = 1
47 | self.seg_idx = 1
48 |
49 | def add_page(self, im, segmentation=None, records=None):
50 | """
51 | Adds an image to the transcription interface, optionally filling in
52 | information from a list of ocr_record objects.
53 |
54 | Args:
55 | im (PIL.Image): Input image
56 | segmentation (dict): Output of the segment method.
57 | records (list): A list of ocr_record objects.
58 | """
59 | im_str = get_im_str(im)
60 | logger.info(u'Adding page {} with {} lines'.format(im_str, len(segmentation) if segmentation else len(records)))
61 | page = {}
62 | fd = BytesIO()
63 | im.save(fd, format='png', optimize=True)
64 | page['index'] = self.page_idx
65 | self.page_idx += 1
66 | logger.debug(u'Base64 encoding image')
67 | page['img'] = 'data:image/png;base64,' + base64.b64encode(fd.getvalue()).decode('ascii')
68 | page['lines'] = []
69 | if records:
70 | logger.debug(u'Adding records.')
71 | self.text_direction = segmentation['text_direction']
72 | for record, bbox in zip(records, segmentation['boxes']):
73 | page['lines'].append({'index': self.line_idx, 'text': record.prediction,
74 | 'left': 100*int(bbox[0]) / im.size[0],
75 | 'top': 100*int(bbox[1]) / im.size[1],
76 | 'width': 100*(bbox[2] - bbox[0])/im.size[0],
77 | 'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
78 | 'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
79 | int(bbox[1]),
80 | int(bbox[2]),
81 | int(bbox[3]))})
82 |
83 | self.line_idx += 1
84 | elif segmentation:
85 | logger.debug(u'Adding segmentations.')
86 | self.text_direction = segmentation['text_direction']
87 | for bbox in segmentation['boxes']:
88 | page['lines'].append({'index': self.line_idx,
89 | 'left': 100*int(bbox[0]) / im.size[0],
90 | 'top': 100*int(bbox[1]) / im.size[1],
91 | 'width': 100*(bbox[2] - bbox[0])/im.size[0],
92 | 'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
93 | 'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
94 | int(bbox[1]),
95 | int(bbox[2]),
96 | int(bbox[3]))})
97 | self.line_idx += 1
98 | else:
99 | raise KrakenInputException('Neither segmentations nor records given')
100 | self.pages.append(page)
101 |
102 | def write(self, fd):
103 | """
104 | Writes the HTML file to a file descriptor.
105 |
106 | Args:
107 | fd (File): File descriptor (mode='rb') to write to.
108 | """
109 | logger.info(u'Rendering and writing transcription.')
110 | fd.write(self.tmpl.render(uuid=str(uuid.uuid4()), pages=self.pages,
111 | font=self.font,
112 | text_direction=self.text_direction).encode('utf-8'))
113 |
--------------------------------------------------------------------------------
/kraken/lib/models.py:
--------------------------------------------------------------------------------
1 | """
2 | kraken.lib.models
3 | ~~~~~~~~~~~~~~~~~
4 |
5 | Wrapper around TorchVGSLModel including a variety of forward pass helpers for
6 | sequence classification.
7 | """
8 | from os.path import expandvars, expanduser, abspath
9 |
10 | import torch
11 | import numpy as np
12 | import kraken.lib.lineest
13 | import kraken.lib.ctc_decoder
14 |
15 | from typing import List, Tuple
16 |
17 | from kraken.lib.vgsl import TorchVGSLModel
18 | from kraken.lib.exceptions import KrakenInvalidModelException, KrakenInputException
19 |
20 | __all__ = ['TorchSeqRecognizer', 'load_any']
21 |
22 | import logging
23 |
24 | logger = logging.getLogger(__name__)
25 |
26 |
27 | class TorchSeqRecognizer(object):
28 | """
29 | A class wrapping a TorchVGSLModel with a more comfortable recognition interface.
30 | """
31 | def __init__(self, nn, decoder=kraken.lib.ctc_decoder.greedy_decoder, train: bool = False, device: str = 'cpu') -> None:
32 | """
33 | Constructs a sequence recognizer from a VGSL model and a decoder.
34 |
35 | Args:
36 | nn (kraken.lib.vgsl.TorchVGSLModel): neural network used for recognition
37 | decoder (func): Decoder function used for mapping softmax
38 | activations to labels and positions
39 | train (bool): Enables or disables gradient calculation
40 | device (torch.Device): Device to run model on
41 | """
42 | self.nn = nn
43 | self.kind = ''
44 | if train:
45 | self.nn.train()
46 | else:
47 | self.nn.eval()
48 | self.codec = self.nn.codec
49 | self.decoder = decoder
50 | self.train = train
51 | self.device = device
52 | self.nn.to(device)
53 |
54 | def to(self, device):
55 | """
56 | Moves model to device and automatically loads input tensors onto it.
57 | """
58 | self.device = device
59 | self.nn.to(device)
60 |
61 | def forward(self, line: torch.Tensor) -> np.array:
62 | """
63 | Performs a forward pass on a torch tensor of a line with shape (C, H, W)
64 | and returns a numpy array (W, C).
65 | """
66 | # make CHW -> 1CHW
67 | line = line.to(self.device)
68 | line = line.unsqueeze(0)
69 | o = self.nn.nn(line)
70 | if o.size(2) != 1:
71 | raise KrakenInputException('Expected dimension 3 to be 1, actual {}'.format(o.size()))
72 | self.outputs = o.detach().squeeze().cpu().numpy()
73 | return self.outputs
74 |
75 | def predict(self, line: torch.Tensor) -> List[Tuple[str, int, int, float]]:
76 | """
77 | Performs a forward pass on a torch tensor of a line with shape (C, H, W)
78 | and returns the decoding as a list of tuples (string, start, end,
79 | confidence).
80 | """
81 | o = self.forward(line)
82 | locs = self.decoder(o)
83 | return self.codec.decode(locs)
84 |
85 | def predict_string(self, line: torch.Tensor) -> str:
86 | """
87 | Performs a forward pass on a torch tensor of a line with shape (C, H, W)
88 | and returns a string of the results.
89 | """
90 | o = self.forward(line)
91 | locs = self.decoder(o)
92 | decoding = self.codec.decode(locs)
93 | return ''.join(x[0] for x in decoding)
94 |
95 | def predict_labels(self, line: torch.tensor) -> List[Tuple[int, int, int, float]]:
96 | """
97 | Performs a forward pass on a torch tensor of a line with shape (C, H, W)
98 | and returns a list of tuples (class, start, end, max). Max is the
99 | maximum value of the softmax layer in the region.
100 | """
101 | o = self.forward(line)
102 | return self.decoder(o)
103 |
104 |
105 | def load_any(fname: str, train: bool = False, device: str = 'cpu') -> TorchSeqRecognizer:
106 | """
107 | Loads anything that was, is, and will be a valid ocropus model and
108 | instantiates a shiny new kraken.lib.lstm.SeqRecognizer from the RNN
109 | configuration in the file.
110 |
111 | Currently it recognizes the following kinds of models:
112 |
113 | * pyrnn models containing BIDILSTMs
114 | * protobuf models containing converted python BIDILSTMs
115 | * protobuf models containing CLSTM networks
116 |
117 | Additionally an attribute 'kind' will be added to the SeqRecognizer
118 | containing a string representation of the source kind. Current known values
119 | are:
120 |
121 | * pyrnn for pickled BIDILSTMs
122 | * clstm for protobuf models generated by clstm
123 |
124 | Args:
125 | fname (str): Path to the model
126 | train (bool): Enables gradient calculation and dropout layers in model.
127 | device (str): Target device
128 |
129 | Returns:
130 | A kraken.lib.models.TorchSeqRecognizer object.
131 | """
132 | nn = None
133 | kind = ''
134 | fname = abspath(expandvars(expanduser(fname)))
135 | logger.info(u'Loading model from {}'.format(fname))
136 | try:
137 | nn = TorchVGSLModel.load_model(str(fname))
138 | kind = 'vgsl'
139 | except Exception:
140 | try:
141 | nn = TorchVGSLModel.load_clstm_model(fname)
142 | kind = 'clstm'
143 | except Exception:
144 | nn = TorchVGSLModel.load_pronn_model(fname)
145 | kind = 'pronn'
146 | try:
147 | nn = TorchVGSLModel.load_pyrnn_model(fname)
148 | kind = 'pyrnn'
149 | except Exception:
150 | pass
151 | if not nn:
152 | raise KrakenInvalidModelException('File {} not loadable by any parser.'.format(fname))
153 | seq = TorchSeqRecognizer(nn, train=train, device=device)
154 | seq.kind = kind
155 | return seq
156 |
--------------------------------------------------------------------------------
/kraken/repo.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Copyright 2015 Benjamin Kiessling
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14 | # or implied. See the License for the specific language governing
15 | # permissions and limitations under the License.
16 |
17 | # -*- coding: utf-8 -*-
18 | """
19 | Access functions to the model repository on github.
20 | """
21 | from collections import defaultdict
22 | from typing import Callable, Any
23 | from contextlib import closing
24 |
25 | from kraken.lib.exceptions import KrakenRepoException
26 |
27 | import base64
28 | import requests
29 | import json
30 | import os
31 | import logging
32 |
33 | __all__ = ['get_model', 'get_description', 'get_listing']
34 |
35 | logger = logging.getLogger(__name__)
36 |
37 | MODEL_REPO = 'https://api.github.com/repos/mittagessen/kraken-models/'
38 |
39 |
40 | def get_model(model_id: str, path: str, callback: Callable[..., Any]) -> None:
41 | """
42 | Retrieves a model and saves it to a path.
43 |
44 | Args:
45 | model_id (str): Identifier of the model
46 | path (str): Destination to write model to.
47 | callback (func): Function called for every 1024 octet chunk received.
48 | """
49 | logger.info(u'Saving model {} to {}'.format(model_id, path))
50 | logger.debug(u'Retrieving head of model repository')
51 | r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master'))
52 | callback()
53 | resp = r.json()
54 | if 'object' not in resp:
55 | logger.error(u'No \'object\' field in repo head API response.')
56 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
57 | head = resp['object']['sha']
58 | logger.debug(u'Retrieving tree of model repository')
59 | r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1})
60 | callback()
61 | resp = r.json()
62 | if 'tree' not in resp:
63 | logger.error(u'No \'tree\' field in repo API response.')
64 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
65 | url = None
66 | for el in resp['tree']:
67 | components = el['path'].split('/')
68 | if len(components) > 2 and components[1] == model_id and components[2] == 'DESCRIPTION':
69 | logger.debug(u'Retrieving description for {}'.format(components[1]))
70 | raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8')
71 | desc = json.loads(raw)
72 | spath = os.path.join(path, desc['name'])
73 | elif len(components) > 2 and components[1] == model_id:
74 | url = el['url']
75 | break
76 | if not url:
77 | logger.error(u'Model {} not in repository.'.format(model_id))
78 | raise KrakenRepoException('Modle {} not in repository'.format(model_id))
79 | with closing(requests.get(url, headers={'Accept': 'application/vnd.github.v3.raw'},
80 | stream=True)) as r:
81 | with open(spath, 'wb') as f:
82 | logger.debug(u'Downloading model')
83 | for chunk in r.iter_content(chunk_size=1024):
84 | callback()
85 | f.write(chunk)
86 | return
87 |
88 |
89 | def get_description(model_id: str) -> dict:
90 | logger.info('Retrieving metadata for {}'.format(model_id))
91 | logger.debug('Retrieving head of model repository')
92 | r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master'))
93 | resp = r.json()
94 | if 'object' not in resp:
95 | logger.error('No \'object\' field in repo head API response.')
96 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
97 | head = resp['object']['sha']
98 | logger.debug('Retrieving tree of model repository')
99 | r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1})
100 | resp = r.json()
101 | if 'tree' not in resp:
102 | logger.error('No \'tree\' field in repo API response.')
103 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
104 | for el in resp['tree']:
105 | components = el['path'].split('/')
106 | if len(components) > 2 and components[1] == model_id and components[2] == 'DESCRIPTION':
107 | logger.debug('Retrieving description for {}'.format(components[1]))
108 | raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8')
109 | return defaultdict(str, json.loads(raw))
110 | raise KrakenRepoException('No description for {} found'.format(model_id))
111 |
112 |
113 | def get_listing(callback: Callable[..., Any]) -> dict:
114 | logger.info(u'Retrieving model list')
115 | r = requests.get('{}{}'.format(MODEL_REPO, 'git/refs/heads/master'))
116 | callback()
117 | resp = r.json()
118 | if 'object' not in resp:
119 | logger.error(u'No \'object\' field in repo head API response.')
120 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
121 | head = resp['object']['sha']
122 | logger.debug(u'Retrieving tree of model repository')
123 | r = requests.get('{}{}{}'.format(MODEL_REPO, 'git/trees/', head), params={'recursive': 1})
124 | callback()
125 | resp = r.json()
126 | if 'tree' not in resp:
127 | logger.error(u'No \'tree\' field in repo API response.')
128 | raise KrakenRepoException('{}: {}'.format(r.status_code, resp['message']))
129 | models = {}
130 | for el in resp['tree']:
131 | components = el['path'].split('/')
132 | # new model
133 | if len(components) == 2:
134 | models[components[1]] = {'type': components[0]}
135 | if len(components) > 2 and components[2] == 'DESCRIPTION':
136 | logger.debug(u'Retrieving description for {}'.format(components[1]))
137 | r = requests.get(el['url'])
138 | if not r.ok:
139 | logger.error(u'Requests to \'{}\' failed with status {}'.format(el['url'], r.status_code))
140 | raise KrakenRepoException('{}: {}'.format(r.status_code, r.json()['message']))
141 | raw = base64.b64decode(requests.get(el['url']).json()['content']).decode('utf-8')
142 | callback()
143 | try:
144 | models[components[1]].update(json.loads(raw))
145 | except Exception:
146 | del models[components[1]]
147 | elif len(components) > 2 and components[1] in models:
148 | models[components[1]]['model'] = el['url']
149 | return models
150 |
--------------------------------------------------------------------------------
/kraken/lib/ctc_decoder.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2017 Benjamin Kiessling
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13 | # or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 | # -*- coding: utf-8 -*-
17 | """
18 | Decoders for softmax outputs of CTC trained networks.
19 | """
20 |
21 | import collections
22 | import numpy as np
23 |
24 | from typing import List, Tuple
25 | from scipy.special import logsumexp
26 | from scipy.ndimage import measurements
27 |
28 | from itertools import groupby
29 |
30 | __all__ = ['beam_decoder', 'greedy_decoder', 'blank_threshold_decoder']
31 |
32 |
33 | def beam_decoder(outputs: np.ndarray, beam_size: int = 3) -> List[Tuple[int, int, int, float]]:
34 | """
35 | Translates back the network output to a label sequence using
36 | same-prefix-merge beam search decoding as described in [0].
37 |
38 | [0] Hannun, Awni Y., et al. "First-pass large vocabulary continuous speech
39 | recognition using bi-directional recurrent DNNs." arXiv preprint
40 | arXiv:1408.2873 (2014).
41 |
42 | Args:
43 | output (numpy.array): (C, W) shaped softmax output tensor
44 |
45 | Returns:
46 | A list with tuples (class, start, end, prob). max is the maximum value
47 | of the softmax layer in the region.
48 | """
49 | c, w = outputs.shape
50 | probs = np.log(outputs)
51 | beam = [(tuple(), (0.0, float('-inf')))] # type: List[Tuple[Tuple, Tuple[float, float]]]
52 |
53 | # loop over each time step
54 | for t in range(w):
55 | next_beam = collections.defaultdict(lambda: 2*(float('-inf'),)) # type: dict
56 | # p_b -> prob for prefix ending in blank
57 | # p_nb -> prob for prefix not ending in blank
58 | for prefix, (p_b, p_nb) in beam:
59 | # only update ending-in-blank-prefix probability for blank
60 | n_p_b, n_p_nb = next_beam[prefix]
61 | n_p_b = logsumexp((n_p_b, p_b + probs[0, t], p_nb + probs[0, t]))
62 | next_beam[prefix] = (n_p_b, n_p_nb)
63 | # loop over non-blank classes
64 | for s in range(1, c):
65 | # only update the not-ending-in-blank-prefix probability for prefix+s
66 | l_end = prefix[-1][0] if prefix else None
67 | n_prefix = prefix + ((s, t, t),)
68 | n_p_b, n_p_nb = next_beam[n_prefix]
69 | if s == l_end:
70 | # substitute the previous non-blank-ending-prefix
71 | # probability for repeated labels
72 | n_p_nb = logsumexp((n_p_nb, p_b + probs[s, t]))
73 | else:
74 | n_p_nb = logsumexp((n_p_nb, p_b + probs[s, t], p_nb + probs[s, t]))
75 |
76 | next_beam[n_prefix] = (n_p_b, n_p_nb)
77 |
78 | # If s is repeated at the end we also update the unchanged
79 | # prefix. This is the merging case.
80 | if s == l_end:
81 | n_p_b, n_p_nb = next_beam[prefix]
82 | n_p_nb = logsumexp((n_p_nb, p_nb + probs[s, t]))
83 | # rewrite both new and old prefix positions
84 | next_beam[prefix[:-1] + ((prefix[-1][0], prefix[-1][1], t),)] = (n_p_b, n_p_nb)
85 | next_beam[n_prefix[:-1] + ((n_prefix[-1][0], n_prefix[-1][1], t),)] = next_beam.pop(n_prefix)
86 |
87 | # Sort and trim the beam before moving on to the
88 | # next time-step.
89 | beam = sorted(next_beam.items(),
90 | key=lambda x: logsumexp(x[1]),
91 | reverse=True)
92 | beam = beam[:beam_size]
93 | return [(c, start, end, max(outputs[c, start:end+1])) for (c, start, end) in beam[0][0]]
94 |
95 |
96 | def greedy_decoder(outputs: np.ndarray) -> List[Tuple[int, int, int, float]]:
97 | """
98 | Translates back the network output to a label sequence using greedy/best
99 | path decoding as described in [0].
100 |
101 | [0] Graves, Alex, et al. "Connectionist temporal classification: labelling
102 | unsegmented sequence data with recurrent neural networks." Proceedings of
103 | the 23rd international conference on Machine learning. ACM, 2006.
104 |
105 | Args:
106 | output (numpy.array): (C, W) shaped softmax output tensor
107 |
108 | Returns:
109 | A list with tuples (class, start, end, max). max is the maximum value
110 | of the softmax layer in the region.
111 | """
112 | labels = np.argmax(outputs, 0)
113 | seq_len = outputs.shape[1]
114 | mask = np.eye(outputs.shape[0], dtype='bool')[labels].T
115 | classes = []
116 | for label, group in groupby(zip(np.arange(seq_len), labels, outputs[mask]), key=lambda x: x[1]):
117 | lgroup = list(group)
118 | if label != 0:
119 | classes.append((label, lgroup[0][0], lgroup[-1][0], max(x[2] for x in lgroup)))
120 | return classes
121 |
122 |
123 | def blank_threshold_decoder(outputs: np.ndarray, threshold: float = 0.5) -> List[Tuple[int, int, int, float]]:
124 | """
125 | Translates back the network output to a label sequence as the original
126 | ocropy/clstm.
127 |
128 | Thresholds on class 0, then assigns the maximum (non-zero) class to each
129 | region.
130 |
131 | Args:
132 | output (numpy.array): (C, W) shaped softmax output tensor
133 | threshold (float): Threshold for 0 class when determining possible
134 | label locations.
135 |
136 | Returns:
137 | A list with tuples (class, start, end, max). max is the maximum value
138 | of the softmax layer in the region.
139 | """
140 | outputs = outputs.T
141 | labels, n = measurements.label(outputs[:, 0] < threshold)
142 | mask = np.tile(labels.reshape(-1, 1), (1, outputs.shape[1]))
143 | maxima = measurements.maximum_position(outputs, mask, np.arange(1, np.amax(mask)+1))
144 | p = 0
145 | start = None
146 | x = []
147 | for idx, val in enumerate(labels):
148 | if val != 0 and start is None:
149 | start = idx
150 | p += 1
151 | if val == 0 and start is not None:
152 | if maxima[p-1][1] == 0:
153 | start = None
154 | else:
155 | x.append((maxima[p-1][1], start, idx, outputs[maxima[p-1]]))
156 | start = None
157 | # append last non-zero region to list of no zero region occurs after it
158 | if start:
159 | x.append((maxima[p-1][1], start, len(outputs), outputs[maxima[p-1]]))
160 | return [y for y in x if x[0] != 0]
161 |
--------------------------------------------------------------------------------
/tests/test_layers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import unittest
3 |
4 | from nose.tools import raises
5 |
6 | import torch
7 | from kraken.lib import layers
8 |
9 |
10 | class TestLayers(unittest.TestCase):
11 |
12 | """
13 | Testing custom layer implementations.
14 | """
15 | def setUp(self):
16 | torch.set_grad_enabled(False)
17 |
18 | def test_maxpool(self):
19 | """
20 | Test maximum pooling layer.
21 | """
22 | mp = layers.MaxPool((3, 3), (2, 2))
23 | o = mp(torch.randn(1, 2, 32, 64))
24 | self.assertEqual(o.shape, (1, 2, 15, 31))
25 |
26 | def test_1d_dropout(self):
27 | """
28 | Test 1d dropout layer.
29 | """
30 | do = layers.Dropout(0.2, 1)
31 | o = do(torch.randn(1, 2, 32, 64))
32 | self.assertEqual(o.shape, (1, 2, 32, 64))
33 |
34 | def test_2d_dropout(self):
35 | """
36 | Test 2d dropout layer.
37 | """
38 | do = layers.Dropout(0.2, 2)
39 | o = do(torch.randn(1, 2, 32, 64))
40 | self.assertEqual(o.shape, (1, 2, 32, 64))
41 |
42 | def test_forward_rnn_layer_x(self):
43 | """
44 | Test unidirectional RNN layer in x-dimension.
45 | """
46 | rnn = layers.TransposedSummarizingRNN(10, 2, 'f', False, False)
47 | o = rnn(torch.randn(1, 10, 32, 64))
48 | self.assertEqual(o.shape, (1, 2, 32, 64))
49 |
50 | def test_forward_rnn_layer_y(self):
51 | """
52 | Test unidirectional RNN layer in y-dimension.
53 | """
54 | rnn = layers.TransposedSummarizingRNN(10, 2, 'f', True, False)
55 | o = rnn(torch.randn(1, 10, 32, 64))
56 | self.assertEqual(o.shape, (1, 2, 32, 64))
57 |
58 | def test_forward_rnn_layer_x_summarize(self):
59 | """
60 | Test unidirectional summarizing RNN layer in x-dimension.
61 | """
62 | rnn = layers.TransposedSummarizingRNN(10, 2, 'f', False, True)
63 | o = rnn(torch.randn(1, 10, 32, 64))
64 | self.assertEqual(o.shape, (1, 2, 32, 1))
65 |
66 | def test_forward_rnn_layer_y_summarize(self):
67 | """
68 | Test unidirectional summarizing RNN layer in y-dimension.
69 | """
70 | rnn = layers.TransposedSummarizingRNN(10, 2, 'f', True, True)
71 | o = rnn(torch.randn(1, 10, 32, 64))
72 | self.assertEqual(o.shape, (1, 2, 1, 64))
73 |
74 | def test_bidi_rnn_layer_x(self):
75 | """
76 | Test bidirectional RNN layer in x-dimension.
77 | """
78 | rnn = layers.TransposedSummarizingRNN(10, 2, 'b', False, False)
79 | o = rnn(torch.randn(1, 10, 32, 64))
80 | self.assertEqual(o.shape, (1, 4, 32, 64))
81 |
82 | def test_bidi_rnn_layer_y(self):
83 | """
84 | Test bidirectional RNN layer in y-dimension.
85 | """
86 | rnn = layers.TransposedSummarizingRNN(10, 2, 'b', True, False)
87 | o = rnn(torch.randn(1, 10, 32, 64))
88 | self.assertEqual(o.shape, (1, 4, 32, 64))
89 |
90 | def test_bidi_rnn_layer_x_summarize(self):
91 | """
92 | Test bidirectional summarizing RNN layer in x-dimension.
93 | """
94 | rnn = layers.TransposedSummarizingRNN(10, 2, 'b', False, True)
95 | o = rnn(torch.randn(1, 10, 32, 64))
96 | self.assertEqual(o.shape, (1, 4, 32, 1))
97 |
98 | def test_bidi_rnn_layer_y_summarize(self):
99 | """
100 | Test bidirectional summarizing RNN layer in y-dimension.
101 | """
102 | rnn = layers.TransposedSummarizingRNN(10, 2, 'b', True, True)
103 | o = rnn(torch.randn(1, 10, 32, 64))
104 | self.assertEqual(o.shape, (1, 4, 1, 64))
105 |
106 | def test_linsoftmax(self):
107 | """
108 | Test basic function of linear layer.
109 | """
110 | lin = layers.LinSoftmax(20, 10)
111 | o = lin(torch.randn(1, 20, 12, 24))
112 | self.assertEqual(o.shape, (1, 10, 12, 24))
113 |
114 | def test_linsoftmax_train(self):
115 | """
116 | Test function of linear layer in training mode (log_softmax)
117 | """
118 | lin = layers.LinSoftmax(20, 10).train()
119 | o = lin(torch.randn(1, 20, 12, 24))
120 | self.assertLess(o.max(), 0)
121 |
122 | def test_linsoftmax_test(self):
123 | """
124 | Test function of linear layer in eval mode (softmax)
125 | """
126 | lin = layers.LinSoftmax(20, 10).eval()
127 | o = lin(torch.randn(1, 20, 12, 24))
128 | self.assertGreaterEqual(o.min(), 0)
129 |
130 | def test_linsoftmax_aug(self):
131 | """
132 | Test basic function of linear layer with 1-augmentation.
133 | """
134 | lin = layers.LinSoftmax(20, 10, True)
135 | o = lin(torch.randn(1, 20, 12, 24))
136 | self.assertEqual(o.shape, (1, 10, 12, 24))
137 |
138 | def test_linsoftmax_aug_train(self):
139 | """
140 | Test function of linear layer in training mode (log_softmax) with 1-augmentation
141 | """
142 | lin = layers.LinSoftmax(20, 10, True).train()
143 | o = lin(torch.randn(1, 20, 12, 24))
144 | self.assertLess(o.max(), 0)
145 |
146 | def test_linsoftmax_aug_test(self):
147 | """
148 | Test function of linear layer in eval mode (softmax) with 1-augmentation
149 | """
150 | lin = layers.LinSoftmax(20, 10, True).eval()
151 | o = lin(torch.randn(1, 20, 12, 24))
152 | self.assertGreaterEqual(o.min(), 0)
153 |
154 | def test_actconv2d_lin(self):
155 | """
156 | Test convolutional layer without activation.
157 | """
158 | conv = layers.ActConv2D(5, 12, (3, 3), 'l')
159 | o = conv(torch.randn(1, 5, 24, 12))
160 | self.assertEqual(o.shape, (1, 12, 24, 12))
161 |
162 | def test_actconv2d_sigmoid(self):
163 | """
164 | Test convolutional layer with sigmoid activation.
165 | """
166 | conv = layers.ActConv2D(5, 12, (3, 3), 's')
167 | o = conv(torch.randn(1, 5, 24, 12))
168 | self.assertTrue(0 <= o.min() <= 1)
169 | self.assertTrue(0 <= o.max() <= 1)
170 |
171 | def test_actconv2d_tanh(self):
172 | """
173 | Test convolutional layer with tanh activation.
174 | """
175 | conv = layers.ActConv2D(5, 12, (3, 3), 't')
176 | o = conv(torch.randn(1, 5, 24, 12))
177 | self.assertTrue(-1 <= o.min() <= 1)
178 | self.assertTrue(-1 <= o.max() <= 1)
179 |
180 | def test_actconv2d_softmax(self):
181 | """
182 | Test convolutional layer with softmax activation.
183 | """
184 | conv = layers.ActConv2D(5, 12, (3, 3), 'm')
185 | o = conv(torch.randn(1, 5, 24, 12))
186 | self.assertTrue(0 <= o.min() <= 1)
187 | self.assertTrue(0 <= o.max() <= 1)
188 |
189 | def test_actconv2d_relu(self):
190 | """
191 | Test convolutional layer with relu activation.
192 | """
193 | conv = layers.ActConv2D(5, 12, (3, 3), 'r')
194 | o = conv(torch.randn(1, 5, 24, 12))
195 | self.assertLessEqual(0, o.min())
196 | self.assertLessEqual(0, o.max())
197 |
--------------------------------------------------------------------------------
/kraken/lib/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Copyright 2015 Benjamin Kiessling
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14 | # or implied. See the License for the specific language governing
15 | # permissions and limitations under the License.
16 | """
17 | Training loop interception helpers
18 | """
19 | import abc
20 | import torch
21 | import numpy as np
22 |
23 | from itertools import cycle
24 | from torch.utils import data
25 | from functools import partial
26 | from typing import Tuple, Union, Optional, Callable, List, Dict, Any
27 | from collections.abc import Iterable
28 |
29 | class TrainStopper(Iterable):
30 |
31 | def __init__(self):
32 | self.best_loss = 0.0
33 | self.best_epoch = 0
34 |
35 | @abc.abstractmethod
36 | def update(self, val_loss: float) -> None:
37 | """
38 | Updates the internal state of the train stopper.
39 | """
40 | pass
41 |
42 |
43 | def annealing_const(start: float, end: float, pct: float) -> float:
44 | return start
45 |
46 | def annealing_linear(start: float, end: float, pct: float) -> float:
47 | return start + pct * (end-start)
48 |
49 | def annealing_cos(start: float, end: float, pct: float) -> float:
50 | co = np.cos(np.pi * pct) + 1
51 | return end + (start-end)/2 * co
52 |
53 |
54 | class TrainScheduler(object):
55 | """
56 | Implements learning rate scheduling.
57 | """
58 | def __init__(self, optimizer: torch.optim.Optimizer) -> None:
59 | self.steps: List[Dict[str, Any]] = []
60 | self.optimizer = optimizer
61 | self.cycle: Any = None
62 |
63 | def add_phase(self,
64 | iterations: int,
65 | lrate: Tuple[float, float] = (1e-4, 1e-4),
66 | momentum: Tuple[float, float] = (0.9, 0.9),
67 | wd: float = 0.0,
68 | annealing_fn: Callable[[float, float, float], float] = annealing_const) -> None:
69 | """
70 | Adds a new phase to the scheduler.
71 |
72 | Args:
73 | sched (kraken.lib.train.Trainscheduler): TrainScheduler instance
74 | iterations (int): Number of iterations per cycle
75 | max_lr (float): Peak learning rate
76 | div (float): divisor to determine minimum learning rate (min_lr = max_lr / div)
77 | max_mon (float): Maximum momentum
78 | min_mon (float): Minimum momentum
79 | wd (float): Weight decay
80 | annealing_fn (Callable[[int, int, int], float]): LR change
81 | function. Can be one of `annealing_const` (keeping start value),
82 | `annealing_linear` (linear change), and `annealing_cos` (cosine
83 | change).
84 | """
85 | self.steps.extend([{'lr': annealing_fn(*lrate, pct=x/iterations),
86 | 'momentum': annealing_fn(*momentum, pct=x/iterations),
87 | 'weight_decay': wd} for x in range(iterations)])
88 |
89 | def step(self) -> None:
90 | """
91 | Performs an optimization step.
92 | """
93 | if not self.cycle:
94 | self.cycle = cycle(self.steps)
95 | kwargs = next(self.cycle)
96 | for param_group in self.optimizer.param_groups:
97 | param_group.update(kwargs)
98 |
99 |
100 | def add_1cycle(sched: TrainScheduler, iterations: int,
101 | max_lr: float = 1e-4, div: float = 25.0,
102 | max_mom: float = 0.95, min_mom: float = 0.85, wd: float = 0.0):
103 | """
104 | Adds 1cycle policy [0] phases to a learning rate scheduler.
105 |
106 | [0] Smith, Leslie N. "A disciplined approach to neural network hyper-parameters: Part 1--learning rate, batch size, momentum, and weight decay." arXiv preprint arXiv:1803.09820 (2018).
107 |
108 | Args:
109 | sched (kraken.lib.train.Trainscheduler): TrainScheduler instance
110 | iterations (int): Number of iterations per cycle
111 | max_lr (float): Peak learning rate
112 | div (float): divisor to determine minimum learning rate (min_lr = max_lr / div)
113 | max_mon (float): Maximum momentum
114 | min_mon (float): Minimum momentum
115 | wd (float): Weight decay
116 | """
117 | sched.add_phase(iterations//2, (max_lr/div, max_lr), (max_mom, min_mom), wd, annealing_linear)
118 | sched.add_phase(iterations//2, (max_lr, max_lr/div), (min_mom, max_mom), wd, annealing_cos)
119 |
120 |
121 | class EarlyStopping(TrainStopper):
122 | """
123 | Early stopping to terminate training when validation loss doesn't improve
124 | over a certain time.
125 | """
126 | def __init__(self, it: data.DataLoader = None, min_delta: float = 0.002, lag: int = 5) -> None:
127 | """
128 | Args:
129 | it (torch.utils.data.DataLoader): training data loader
130 | min_delta (float): minimum change in validation loss to qualify as improvement.
131 | lag (int): Number of epochs to wait for improvement before
132 | terminating.
133 | """
134 | super().__init__()
135 | self.min_delta = min_delta
136 | self.lag = lag
137 | self.it = it
138 | self.wait = 0
139 | self.epoch = -1
140 |
141 | def __iter__(self):
142 | return self
143 |
144 | def __next__(self):
145 | if self.wait >= self.lag:
146 | raise StopIteration
147 | self.epoch += 1
148 | return self.it
149 |
150 | def update(self, val_loss: float) -> None:
151 | """
152 | Updates the internal validation loss state
153 | """
154 | if (val_loss - self.best_loss) < self.min_delta:
155 | self.wait += 1
156 | else:
157 | self.wait = 0
158 | self.best_loss = val_loss
159 | self.best_epoch = self.epoch
160 |
161 |
162 | class EpochStopping(TrainStopper):
163 | """
164 | Dumb stopping after a fixed number of epochs.
165 | """
166 | def __init__(self, it: data.DataLoader = None, epochs: int = 100) -> None:
167 | """
168 | Args:
169 | it (torch.utils.data.DataLoader): training data loader
170 | epochs (int): Number of epochs to train for
171 | """
172 | super().__init__()
173 | self.epochs = epochs
174 | self.epoch = -1
175 | self.it = it
176 |
177 | def __iter__(self):
178 | return self
179 |
180 | def __next__(self):
181 | if self.epoch < self.epochs - 1:
182 | self.epoch += 1
183 | return self.it
184 | else:
185 | raise StopIteration
186 |
187 | def update(self, val_loss: float) -> None:
188 | """
189 | Only update internal best epoch
190 | """
191 | if val_loss > self.best_loss:
192 | self.best_loss = val_loss
193 | self.best_epoch = self.epoch
194 |
--------------------------------------------------------------------------------
/docs/vgsl.rst:
--------------------------------------------------------------------------------
1 | .. _vgsl:
2 |
3 | VGSL network specification
4 | ==========================
5 |
6 | kraken implements a dialect of the Variable-size Graph Specification Language
7 | (VGSL), enabling the specification of different network architectures for image
8 | processing purposes using a short definition string.
9 |
10 | Basics
11 | ------
12 |
13 | A VGSL specification consists of an input block, one or more layers, and an
14 | output block. For example:
15 |
16 | .. code-block:: console
17 |
18 | [1,48,0,1 Cr3,3,32 Mp2,2 Cr3,3,64 Mp2,2 S1(1x12)1,3 Lbx100 Do O1c103]
19 |
20 | The first block defines the input in order of [batch, heigh, width, channels]
21 | with zero-valued dimensions being variable. Integer valued height or width
22 | input specifications will result in the input images being automatically scaled
23 | in either dimension.
24 |
25 | When channels are set to 1 grayscale or B/W inputs are expected, 3 expects RGB
26 | color images. Higher values in combination with a height of 1 result in the
27 | network being fed 1 pixel wide grayscale strips scaled to the size of the
28 | channel dimension.
29 |
30 | After the input, a number of layers are defined. Layers operate on the channel
31 | dimension; this is intuitive for convolutional layers but a recurrent layer
32 | doing sequence classification along the width axis on an image of a particular
33 | height requires the height dimension to be moved to the channel dimension,
34 | e.g.:
35 |
36 | .. code-block:: console
37 |
38 | [1,48,0,1 S1(1x48)1,3 Lbx100 O1c103]
39 |
40 | or using the alternative slightly faster formulation:
41 |
42 | .. code-block:: console
43 |
44 | [1,1,0,48 Lbx100 O1c103]
45 |
46 | Finally an output definition is appended. When training sequence classification
47 | networks with the provided tools the appropriate output definition is
48 | automatically appended to the network based on the alphabet of the training
49 | data.
50 |
51 | Examples
52 | --------
53 |
54 | .. code-block:: console
55 |
56 | [1,1,0,48 Lbx100 Do 01c59]
57 |
58 | Creating new model [1,1,0,48 Lbx100 Do] with 59 outputs
59 | layer type params
60 | 0 rnn direction b transposed False summarize False out 100 legacy None
61 | 1 dropout probability 0.5 dims 1
62 | 2 linear augmented False out 59
63 |
64 | A simple recurrent recognition model with a single LSTM layer classifying lines
65 | normalized to 48 pixels in height.
66 |
67 | .. code-block:: console
68 |
69 | [1,48,0,1 Cr3,3,32 Do0.1,2 Mp2,2 Cr3,3,64 Do0.1,2 Mp2,2 S1(1x12)1,3 Lbx100 Do 01c59]
70 |
71 | Creating new model [1,48,0,1 Cr3,3,32 Do0.1,2 Mp2,2 Cr3,3,64 Do0.1,2 Mp2,2 S1(1x12)1,3 Lbx100 Do] with 59 outputs
72 | layer type params
73 | 0 conv kernel 3 x 3 filters 32 activation r
74 | 1 dropout probability 0.1 dims 2
75 | 2 maxpool kernel 2 x 2 stride 2 x 2
76 | 3 conv kernel 3 x 3 filters 64 activation r
77 | 4 dropout probability 0.1 dims 2
78 | 5 maxpool kernel 2 x 2 stride 2 x 2
79 | 6 reshape from 1 1 x 12 to 1/3
80 | 7 rnn direction b transposed False summarize False out 100 legacy None
81 | 8 dropout probability 0.5 dims 1
82 | 9 linear augmented False out 59
83 |
84 | A model with a small convolutional stack before a recurrent LSTM layer. The
85 | extended dropout layer syntax is used to reduce drop probability on the depth
86 | dimension as the default is too high for convolutional layers. The remainder of
87 | the height dimension (`12`) is reshaped into the depth dimensions before
88 | applying the final recurrent and linear layers.
89 |
90 | .. code-block:: console
91 |
92 | [1,0,0,3 Cr3,3,16 Mp3,3 Lfys64 Lbx128 Lbx256 Do 01c59]
93 |
94 | Creating new model [1,0,0,3 Cr3,3,16 Mp3,3 Lfys64 Lbx128 Lbx256 Do] with 59 outputs
95 | layer type params
96 | 0 conv kernel 3 x 3 filters 16 activation r
97 | 1 maxpool kernel 3 x 3 stride 3 x 3
98 | 2 rnn direction f transposed True summarize True out 64 legacy None
99 | 3 rnn direction b transposed False summarize False out 128 legacy None
100 | 4 rnn direction b transposed False summarize False out 256 legacy None
101 | 5 dropout probability 0.5 dims 1
102 | 6 linear augmented False out 59
103 |
104 | A model with arbitrary sized color image input, an initial summarizing
105 | recurrent layer to squash the height to 64, followed by 2 bi-directional
106 | recurrent layers and a linear projection.
107 |
108 | Convolutional Layers
109 | --------------------
110 |
111 | .. code-block:: console
112 |
113 | C[{name}](s|t|r|l|m)[{name}],,
114 | s = sigmoid
115 | t = tanh
116 | r = relu
117 | l = linear
118 | m = softmax
119 |
120 | Adds a 2D convolution with kernel size `(y, x)` and `d` output channels, applying
121 | the selected nonlinearity.
122 |
123 | Recurrent Layers
124 | ----------------
125 |
126 | .. code-block:: console
127 |
128 | L[{name}](f|r|b)(x|y)[s][{name}] LSTM cell with n outputs.
129 | G[{name}](f|r|b)(x|y)[s][{name}] GRU cell with n outputs.
130 | f runs the RNN forward only.
131 | r runs the RNN reversed only.
132 | b runs the RNN bidirectionally.
133 | s (optional) summarizes the output in the requested dimension, return the last step.
134 |
135 | Adds either an LSTM or GRU recurrent layer to the network using eiter the `x`
136 | (width) or `y` (height) dimension as the time axis. Input features are the
137 | channel dimension and the non-time-axis dimension (height/width) is treated as
138 | another batch dimension. For example, a `Lfx25` layer on an `1, 16, 906, 32`
139 | input will execute 16 independent forward passes on `906x32` tensors resulting
140 | in an output of shape `1, 16, 906, 25`. If this isn't desired either run a
141 | summarizing layer in the other direction, e.g. `Lfys20` for an input `1, 1,
142 | 906, 20`, or prepend a reshape layer `S1(1x16)1,3` combining the height and
143 | channel dimension for an `1, 1, 906, 512` input to the recurrent layer.
144 |
145 | Helper and Plumbing Layers
146 | --------------------------
147 |
148 | Max Pool
149 | ^^^^^^^^
150 | .. code-block:: console
151 |
152 | Mp[{name}],[,,]
153 |
154 | Adds a maximum pooling with `(y, x)` kernel_size and `(y_stride, x_stride)` stride.
155 |
156 | Reshape
157 | ^^^^^^^
158 |
159 | .. code-block:: console
160 |
161 | S[{name}](x), Splits one dimension, moves one part to another
162 | dimension.
163 |
164 | The `S` layer reshapes a source dimension `d` to `a,b` and distributes `a` into
165 | dimension `e`, respectively `b` into `f`. Either `e` or `f` has to be equal to
166 | `d`. So `S1(1, 48)1, 3` on an `1, 48, 1020, 8` input will first reshape into
167 | `1, 1, 48, 1020, 8`, leave the `1` part in the height dimension and distribute
168 | the `48` sized tensor into the channel dimension resulting in a `1, 1, 1024,
169 | 48*8=384` sized output. `S` layers are mostly used to remove undesirable non-1
170 | height before a recurrent layer.
171 |
172 | .. note::
173 |
174 | This `S` layer is equivalent to the one implemented in the tensorflow
175 | implementation of VGSL, i.e. behaves differently from tesseract.
176 |
177 | Regularization Layers
178 | ---------------------
179 |
180 | .. code-block:: console
181 |
182 | Do[{name}][],[] Insert a 1D or 2D dropout layer
183 |
184 | Adds an 1D or 2D dropout layer with a given probability. Defaults to `0.5` drop
185 | probability and 1D dropout. Set to `dim` to `2` after convolutional layers.
186 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
21 |
22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
23 |
24 | help:
25 | @echo "Please use \`make ' where is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " dirhtml to make HTML files named index.html in directories"
28 | @echo " singlehtml to make a single large HTML file"
29 | @echo " pickle to make pickle files"
30 | @echo " json to make JSON files"
31 | @echo " htmlhelp to make HTML files and a HTML help project"
32 | @echo " qthelp to make HTML files and a qthelp project"
33 | @echo " applehelp to make an Apple Help Book"
34 | @echo " devhelp to make HTML files and a Devhelp project"
35 | @echo " epub to make an epub"
36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
37 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
39 | @echo " text to make text files"
40 | @echo " man to make manual pages"
41 | @echo " texinfo to make Texinfo files"
42 | @echo " info to make Texinfo files and run them through makeinfo"
43 | @echo " gettext to make PO message catalogs"
44 | @echo " changes to make an overview of all changed/added/deprecated items"
45 | @echo " xml to make Docutils-native XML files"
46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
47 | @echo " linkcheck to check all external links for integrity"
48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
49 | @echo " coverage to run coverage check of the documentation (if enabled)"
50 |
51 | clean:
52 | rm -rf $(BUILDDIR)/*
53 |
54 | html:
55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
56 | @echo
57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
58 |
59 | dirhtml:
60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
61 | @echo
62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
63 |
64 | singlehtml:
65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
66 | @echo
67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
68 |
69 | pickle:
70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
71 | @echo
72 | @echo "Build finished; now you can process the pickle files."
73 |
74 | json:
75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
76 | @echo
77 | @echo "Build finished; now you can process the JSON files."
78 |
79 | htmlhelp:
80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
81 | @echo
82 | @echo "Build finished; now you can run HTML Help Workshop with the" \
83 | ".hhp project file in $(BUILDDIR)/htmlhelp."
84 |
85 | qthelp:
86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
87 | @echo
88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/kraken.qhcp"
91 | @echo "To view the help file:"
92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/kraken.qhc"
93 |
94 | applehelp:
95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
96 | @echo
97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
98 | @echo "N.B. You won't be able to view it unless you put it in" \
99 | "~/Library/Documentation/Help or install it in your application" \
100 | "bundle."
101 |
102 | devhelp:
103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
104 | @echo
105 | @echo "Build finished."
106 | @echo "To view the help file:"
107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/kraken"
108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/kraken"
109 | @echo "# devhelp"
110 |
111 | epub:
112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
113 | @echo
114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
115 |
116 | latex:
117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
118 | @echo
119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
121 | "(use \`make latexpdf' here to do that automatically)."
122 |
123 | latexpdf:
124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
125 | @echo "Running LaTeX files through pdflatex..."
126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
128 |
129 | latexpdfja:
130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
131 | @echo "Running LaTeX files through platex and dvipdfmx..."
132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
134 |
135 | text:
136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
137 | @echo
138 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
139 |
140 | man:
141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
142 | @echo
143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
144 |
145 | texinfo:
146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
147 | @echo
148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
149 | @echo "Run \`make' in that directory to run these through makeinfo" \
150 | "(use \`make info' here to do that automatically)."
151 |
152 | info:
153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
154 | @echo "Running Texinfo files through makeinfo..."
155 | make -C $(BUILDDIR)/texinfo info
156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
157 |
158 | gettext:
159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
160 | @echo
161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
162 |
163 | changes:
164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
165 | @echo
166 | @echo "The overview file is in $(BUILDDIR)/changes."
167 |
168 | linkcheck:
169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
170 | @echo
171 | @echo "Link check complete; look for any errors in the above output " \
172 | "or in $(BUILDDIR)/linkcheck/output.txt."
173 |
174 | doctest:
175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
176 | @echo "Testing of doctests in the sources finished, look at the " \
177 | "results in $(BUILDDIR)/doctest/output.txt."
178 |
179 | coverage:
180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
181 | @echo "Testing of coverage in the sources finished, look at the " \
182 | "results in $(BUILDDIR)/coverage/python.txt."
183 |
184 | xml:
185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
186 | @echo
187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
188 |
189 | pseudoxml:
190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
191 | @echo
192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
193 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=_build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
31 | echo. text to make text files
32 | echo. man to make manual pages
33 | echo. texinfo to make Texinfo files
34 | echo. gettext to make PO message catalogs
35 | echo. changes to make an overview over all changed/added/deprecated items
36 | echo. xml to make Docutils-native XML files
37 | echo. pseudoxml to make pseudoxml-XML files for display purposes
38 | echo. linkcheck to check all external links for integrity
39 | echo. doctest to run all doctests embedded in the documentation if enabled
40 | echo. coverage to run coverage check of the documentation if enabled
41 | goto end
42 | )
43 |
44 | if "%1" == "clean" (
45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
46 | del /q /s %BUILDDIR%\*
47 | goto end
48 | )
49 |
50 |
51 | REM Check if sphinx-build is available and fallback to Python version if any
52 | %SPHINXBUILD% 2> nul
53 | if errorlevel 9009 goto sphinx_python
54 | goto sphinx_ok
55 |
56 | :sphinx_python
57 |
58 | set SPHINXBUILD=python -m sphinx.__init__
59 | %SPHINXBUILD% 2> nul
60 | if errorlevel 9009 (
61 | echo.
62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
63 | echo.installed, then set the SPHINXBUILD environment variable to point
64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
65 | echo.may add the Sphinx directory to PATH.
66 | echo.
67 | echo.If you don't have Sphinx installed, grab it from
68 | echo.http://sphinx-doc.org/
69 | exit /b 1
70 | )
71 |
72 | :sphinx_ok
73 |
74 |
75 | if "%1" == "html" (
76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
77 | if errorlevel 1 exit /b 1
78 | echo.
79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
80 | goto end
81 | )
82 |
83 | if "%1" == "dirhtml" (
84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
85 | if errorlevel 1 exit /b 1
86 | echo.
87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
88 | goto end
89 | )
90 |
91 | if "%1" == "singlehtml" (
92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
93 | if errorlevel 1 exit /b 1
94 | echo.
95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
96 | goto end
97 | )
98 |
99 | if "%1" == "pickle" (
100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | if errorlevel 1 exit /b 1
102 | echo.
103 | echo.Build finished; now you can process the pickle files.
104 | goto end
105 | )
106 |
107 | if "%1" == "json" (
108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | if errorlevel 1 exit /b 1
110 | echo.
111 | echo.Build finished; now you can process the JSON files.
112 | goto end
113 | )
114 |
115 | if "%1" == "htmlhelp" (
116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | if errorlevel 1 exit /b 1
118 | echo.
119 | echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | goto end
122 | )
123 |
124 | if "%1" == "qthelp" (
125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | if errorlevel 1 exit /b 1
127 | echo.
128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\kraken.qhcp
131 | echo.To view the help file:
132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\kraken.ghc
133 | goto end
134 | )
135 |
136 | if "%1" == "devhelp" (
137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | if errorlevel 1 exit /b 1
139 | echo.
140 | echo.Build finished.
141 | goto end
142 | )
143 |
144 | if "%1" == "epub" (
145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | if errorlevel 1 exit /b 1
147 | echo.
148 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | goto end
150 | )
151 |
152 | if "%1" == "latex" (
153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | if errorlevel 1 exit /b 1
155 | echo.
156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | goto end
158 | )
159 |
160 | if "%1" == "latexpdf" (
161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | cd %BUILDDIR%/latex
163 | make all-pdf
164 | cd %~dp0
165 | echo.
166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | goto end
168 | )
169 |
170 | if "%1" == "latexpdfja" (
171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | cd %BUILDDIR%/latex
173 | make all-pdf-ja
174 | cd %~dp0
175 | echo.
176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | goto end
178 | )
179 |
180 | if "%1" == "text" (
181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | if errorlevel 1 exit /b 1
183 | echo.
184 | echo.Build finished. The text files are in %BUILDDIR%/text.
185 | goto end
186 | )
187 |
188 | if "%1" == "man" (
189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | if errorlevel 1 exit /b 1
191 | echo.
192 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | goto end
194 | )
195 |
196 | if "%1" == "texinfo" (
197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | if errorlevel 1 exit /b 1
199 | echo.
200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | goto end
202 | )
203 |
204 | if "%1" == "gettext" (
205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | if errorlevel 1 exit /b 1
207 | echo.
208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | goto end
210 | )
211 |
212 | if "%1" == "changes" (
213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | if errorlevel 1 exit /b 1
215 | echo.
216 | echo.The overview file is in %BUILDDIR%/changes.
217 | goto end
218 | )
219 |
220 | if "%1" == "linkcheck" (
221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | if errorlevel 1 exit /b 1
223 | echo.
224 | echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | goto end
227 | )
228 |
229 | if "%1" == "doctest" (
230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | if errorlevel 1 exit /b 1
232 | echo.
233 | echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | goto end
236 | )
237 |
238 | if "%1" == "coverage" (
239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | if errorlevel 1 exit /b 1
241 | echo.
242 | echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | goto end
245 | )
246 |
247 | if "%1" == "xml" (
248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | if errorlevel 1 exit /b 1
250 | echo.
251 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | goto end
253 | )
254 |
255 | if "%1" == "pseudoxml" (
256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | if errorlevel 1 exit /b 1
258 | echo.
259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | goto end
261 | )
262 |
263 | :end
264 |
--------------------------------------------------------------------------------
/kraken/lib/codec.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Copyright 2017 Benjamin Kiessling
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14 | # or implied. See the License for the specific language governing
15 | # permissions and limitations under the License.
16 |
17 | """
18 | pytorch compatible codec with many-to-many mapping between labels and
19 | graphemes.
20 | """
21 | import regex
22 | import numpy as np
23 |
24 | from typing import List, Tuple, Set, Union, Dict, Sequence
25 | from torch import IntTensor
26 | from kraken.lib.exceptions import KrakenEncodeException
27 |
28 | __all__ = ['PytorchCodec']
29 |
30 |
31 | class PytorchCodec(object):
32 | """
33 | Translates between labels and graphemes.
34 | """
35 | def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str]) -> None:
36 | """
37 | Builds a codec converting between graphemes/code points and integer
38 | label sequences.
39 |
40 | charset may either be a string, a list or a dict. In the first case
41 | each code point will be assigned a label, in the second case each
42 | string in the list will be assigned a label, and in the final case each
43 | key string will be mapped to the value sequence of integers. In the
44 | first two cases labels will be assigned automatically.
45 |
46 | As 0 is the blank label in a CTC output layer, output labels and input
47 | dictionaries are/should be 1-indexed.
48 |
49 | Args:
50 | charset (unicode, list, dict): Input character set.
51 | """
52 | if isinstance(charset, dict):
53 | self.c2l = charset
54 | else:
55 | self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)}
56 | # map integer labels to code points because regex only works with strings
57 | self.l2c = {} # type: Dict[str, str]
58 | for k, v in self.c2l.items():
59 | self.l2c[''.join(chr(c) for c in v)] = k
60 |
61 | # sort prefixes for c2l regex
62 | self.c2l_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.c2l.keys(), key=len, reverse=True)))
63 | # sort prefixes for l2c regex
64 | self.l2c_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.l2c.keys(), key=len, reverse=True)))
65 |
66 | def __len__(self) -> int:
67 | """
68 | Total number of input labels the codec can decode.
69 | """
70 | return len(self.l2c.keys())
71 |
72 | def max_label(self) -> int:
73 | """
74 | Returns the maximum label value.
75 | """
76 | return max(l for labels in self.c2l.values() for l in labels)
77 |
78 | def encode(self, s: str) -> IntTensor:
79 | """
80 | Encodes a string into a sequence of labels.
81 |
82 | Args:
83 | s (str): Input unicode string
84 |
85 | Returns:
86 | (torch.IntTensor) encoded label sequence
87 |
88 | Raises:
89 | KrakenEncodeException if encoding fails.
90 | """
91 | splits = self._greedy_split(s, self.c2l_regex)
92 | labels = [] # type: List[int]
93 | for c in splits:
94 | labels.extend(self.c2l[c])
95 | return IntTensor(labels)
96 |
97 | def decode(self, labels: Sequence[Tuple[int, int, int, float]]) -> List[Tuple[str, int, int, float]]:
98 | """
99 | Decodes a labelling.
100 |
101 | Given a labelling with cuts and confidences returns a string with the
102 | cuts and confidences aggregated across label-code point
103 | correspondences. When decoding multilabels to code points the resulting
104 | cuts are min/max, confidences are averaged.
105 |
106 | Args:
107 | labels (list): Input containing tuples (label, start, end,
108 | confidence).
109 |
110 | Returns:
111 | list: A list of tuples (code point, start, end, confidence)
112 | """
113 | # map into unicode space
114 | uni_labels = ''.join(chr(v) for v, _, _, _ in labels)
115 | start = [x for _, x, _, _ in labels]
116 | end = [x for _, _, x, _ in labels]
117 | con = [x for _, _, _, x in labels]
118 | splits = self._greedy_split(uni_labels, self.l2c_regex)
119 | decoded = []
120 | idx = 0
121 | for i in splits:
122 | decoded.extend([(c, s, e, u) for c, s, e, u in zip(self.l2c[i],
123 | len(self.l2c[i]) * [start[idx]],
124 | len(self.l2c[i]) * [end[idx + len(i) - 1]],
125 | len(self.l2c[i]) * [np.mean(con[idx:idx + len(i)])])])
126 | idx += len(i)
127 | return decoded
128 |
129 | def _greedy_split(self, input: str, re: regex.Regex) -> List[str]:
130 | """
131 | Splits an input string greedily from a list of prefixes. Stops when no
132 | more matches are found.
133 |
134 | Args:
135 | input (str): input string
136 | re (regex.Regex): Prefix match object
137 |
138 | Returns:
139 | (list) of prefixes
140 |
141 | Raises:
142 | (KrakenEncodeException) if no prefix match is found for some part
143 | of the string.
144 | """
145 | r = [] # type: List[str]
146 | idx = 0
147 | while True:
148 | mo = re.match(input, idx)
149 | if mo is None or idx == len(input):
150 | if len(input) > idx:
151 | raise KrakenEncodeException('No prefix matches for input after {}'.format(idx))
152 | return r
153 | r.append(mo.group())
154 | idx = mo.end()
155 |
156 | def merge(self, codec: 'PytorchCodec') -> Tuple['PytorchCodec', Set]:
157 | """
158 | Transforms this codec (c1) into another (c2) reusing as many labels as
159 | possible.
160 |
161 | The resulting codec is able to encode the same code point sequences
162 | while not necessarily having the same labels for them as c2.
163 | Retains matching character -> label mappings from both codecs, removes
164 | mappings not c2, and adds mappings not in c1. Compound labels in c2 for
165 | code point sequences not in c1 containing labels also in use in c1 are
166 | added as separate labels.
167 |
168 | Args:
169 | codec (kraken.lib.codec.PytorchCodec):
170 |
171 | Returns:
172 | A merged codec and a list of labels that were removed from the
173 | original codec.
174 | """
175 | # find character sequences not encodable (exact match) by new codec.
176 | # get labels for these sequences as deletion candidates
177 | rm_candidates = {cseq: enc for cseq, enc in self.c2l.items() if cseq not in codec.c2l}
178 | c2l_cand = self.c2l.copy()
179 | for x in rm_candidates.keys():
180 | c2l_cand.pop(x)
181 | # remove labels from candidate list that are in use for other decodings
182 | rm_labels = [label for v in rm_candidates.values() for label in v]
183 | for v in c2l_cand.values():
184 | for l in rm_labels:
185 | if l in v:
186 | rm_labels.remove(l)
187 | # iteratively remove labels, decrementing subsequent labels to close
188 | # (new) holes in the codec.
189 | offset_rm_labels = [v-idx for idx, v in enumerate(sorted(set(rm_labels)))]
190 | for rlabel in offset_rm_labels:
191 | c2l_cand = {k: [l-1 if l > rlabel else l for l in v] for k, v in c2l_cand.items()}
192 | # add mappings not in original codec
193 | add_list = {cseq: enc for cseq, enc in codec.c2l.items() if cseq not in self.c2l}
194 | # renumber
195 | start_idx = max(label for v in c2l_cand.values() for label in v) + 1
196 | add_labels = {k: v for v, k in enumerate(sorted(set(label for v in add_list.values() for label in v)), start_idx)}
197 | for k, v in add_list.items():
198 | c2l_cand[k] = [add_labels[label] for label in v]
199 | return PytorchCodec(c2l_cand), set(rm_labels)
200 |
--------------------------------------------------------------------------------
/kraken/serialization.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Copyright 2015 Benjamin Kiessling
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14 | # or implied. See the License for the specific language governing
15 | # permissions and limitations under the License.
16 | from jinja2 import Environment, PackageLoader
17 |
18 | import regex
19 | import logging
20 | import unicodedata
21 |
22 | from collections import Counter
23 |
24 | from kraken.rpred import ocr_record
25 | from kraken.lib.util import make_printable
26 |
27 | from typing import List, Tuple, Iterable, Optional, Sequence
28 |
29 | logger = logging.getLogger(__name__)
30 |
31 | __all__ = ['serialize']
32 |
33 |
34 | def _rescale(val: Sequence[float], low: float, high: float) -> List[float]:
35 | """
36 | Rescales a list of confidence value between 0 and 1 to an interval [low,
37 | high].
38 |
39 | Args:
40 | val (float): List of values in interval (0,1)
41 | low (float): Lower bound of rescaling interval
42 | high (float): Upper bound of rescaling interval
43 |
44 | Returns:
45 | Rescaled value (float).
46 | """
47 | return [(high - low) * x + low for x in val]
48 |
49 |
50 | def max_bbox(boxes: Iterable[Tuple[int, int, int, int]]) -> Tuple[int, int, int, int]:
51 | """
52 | Calculates the minimal bounding box containing all boxes contained in an
53 | iterator.
54 |
55 | Args:
56 | boxes (iterator): An iterator returning tuples of the format (x0, y0,
57 | x1, y1)
58 | Returns:
59 | A box covering all bounding boxes in the input argument
60 | """
61 | # XXX: fix type hinting
62 | sbox = list(map(sorted, list(zip(*boxes))))
63 | return (sbox[0][0], sbox[1][0], sbox[2][-1], sbox[3][-1]) # type: ignore
64 |
65 |
66 | def serialize(records: Sequence[ocr_record],
67 | image_name: str = None,
68 | image_size: Tuple[int, int] = (0, 0),
69 | writing_mode: str = 'horizontal-tb',
70 | scripts: Optional[Iterable[str]] = None,
71 | template: str = 'hocr') -> str:
72 | """
73 | Serializes a list of ocr_records into an output document.
74 |
75 | Serializes a list of predictions and their corresponding positions by doing
76 | some hOCR-specific preprocessing and then renders them through one of
77 | several jinja2 templates.
78 |
79 | Note: Empty records are ignored for serialization purposes.
80 |
81 | Args:
82 | records (iterable): List of kraken.rpred.ocr_record
83 | image_name (str): Name of the source image
84 | image_size (tuple): Dimensions of the source image
85 | writing_mode (str): Sets the principal layout of lines and the
86 | direction in which blocks progress. Valid values
87 | are horizontal-tb, vertical-rl, and
88 | vertical-lr.
89 | scripts (list): List of scripts contained in the OCR records
90 | template (str): Selector for the serialization format. May be
91 | 'hocr' or 'alto'.
92 |
93 | Returns:
94 | (str) rendered template.
95 | """
96 | logger.info('Serialize {} records from {} with template {}.'.format(len(records), image_name, template))
97 | page = {'lines': [], 'size': image_size, 'name': image_name, 'writing_mode': writing_mode, 'scripts': scripts} # type: dict
98 | seg_idx = 0
99 | char_idx = 0
100 | for idx, record in enumerate(records):
101 | # skip empty records
102 | if not record.prediction:
103 | logger.debug('Empty record. Skipping')
104 | continue
105 | line = {'index': idx,
106 | 'bbox': max_bbox(record.cuts),
107 | 'cuts': record.cuts,
108 | 'confidences': record.confidences,
109 | 'recognition': []
110 | }
111 | splits = regex.split(r'(\s+)', record.prediction)
112 | line_offset = 0
113 | logger.debug('Record contains {} segments'.format(len(splits)))
114 | for segment in splits:
115 | if len(segment) == 0:
116 | continue
117 | seg_bbox = max_bbox(record.cuts[line_offset:line_offset + len(segment)])
118 |
119 | line['recognition'].extend([{'bbox': seg_bbox,
120 | 'confidences': record.confidences[line_offset:line_offset + len(segment)],
121 | 'cuts': record.cuts[line_offset:line_offset + len(segment)],
122 | 'text': segment,
123 | 'recognition': [{'bbox': cut, 'confidence': conf, 'text': char, 'index': cid}
124 | for conf, cut, char, cid in
125 | zip(record.confidences[line_offset:line_offset + len(segment)],
126 | record.cuts[line_offset:line_offset + len(segment)],
127 | segment,
128 | range(char_idx, char_idx + len(segment)))],
129 | 'index': seg_idx}])
130 | char_idx += len(segment)
131 | seg_idx += 1
132 | line_offset += len(segment)
133 | page['lines'].append(line)
134 | logger.debug('Initializing jinja environment.')
135 | env = Environment(loader=PackageLoader('kraken', 'templates'),
136 | trim_blocks=True,
137 | lstrip_blocks=True,
138 | autoescape=True)
139 | env.tests['whitespace'] = str.isspace
140 | env.filters['rescale'] = _rescale
141 | logger.debug('Retrieving template.')
142 | tmpl = env.get_template(template)
143 | logger.debug('Rendering data.')
144 | return tmpl.render(page=page)
145 |
146 |
147 | def render_report(model: str,
148 | chars: int,
149 | errors: int,
150 | char_confusions: Counter,
151 | scripts: Counter,
152 | insertions: Counter,
153 | deletions: int,
154 | substitutions: Counter) -> str:
155 | """
156 | Renders an accuracy report.
157 |
158 | Args:
159 | model (str): Model name.
160 | errors (int): Number of errors on test set.
161 | char_confusions (dict): Dictionary mapping a tuple (gt, pred) to a
162 | number of occurrences.
163 | scripts (dict): Dictionary counting character per script.
164 | insertions (dict): Dictionary counting insertion operations per Unicode
165 | script
166 | deletions (int): Number of deletions
167 | substitutions (dict): Dictionary counting substitution operations per
168 | Unicode script.
169 |
170 | Returns:
171 | A string containing the rendered report.
172 | """
173 | logger.info('Serializing report for {}'.format(model))
174 |
175 | report = {'model': model,
176 | 'chars': chars,
177 | 'errors': errors,
178 | 'accuracy': (chars-errors)/chars * 100,
179 | 'insertions': sum(insertions.values()),
180 | 'deletions': deletions,
181 | 'substitutions': sum(substitutions.values()),
182 | 'scripts': sorted([{'script': k,
183 | 'count': v,
184 | 'errors': insertions[k] + substitutions[k],
185 | 'accuracy': 100 * (v-(insertions[k] + substitutions[k]))/v} for k, v in scripts.items()],
186 | key=lambda x: x['accuracy'],
187 | reverse=True),
188 | 'counts': sorted([{'correct': make_printable(k[0]),
189 | 'generated': make_printable(k[1]),
190 | 'errors': v} for k, v in char_confusions.items() if k[0] != k[1]],
191 | key=lambda x: x['errors'],
192 | reverse=True)}
193 | logger.debug('Initializing jinja environment.')
194 | env = Environment(loader=PackageLoader('kraken', 'templates'),
195 | trim_blocks=True,
196 | lstrip_blocks=True,
197 | autoescape=True)
198 | logger.debug('Retrieving template.')
199 | tmpl = env.get_template('report')
200 | logger.debug('Rendering data.')
201 | return tmpl.render(report=report)
202 |
203 |
--------------------------------------------------------------------------------
/kraken/lib/clstm_pb2.py:
--------------------------------------------------------------------------------
1 | # Generated by the protocol buffer compiler. DO NOT EDIT!
2 | # source: clstm.proto
3 |
4 | import sys
5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
6 | from google.protobuf import descriptor as _descriptor
7 | from google.protobuf import message as _message
8 | from google.protobuf import reflection as _reflection
9 | from google.protobuf import symbol_database as _symbol_database
10 | from google.protobuf import descriptor_pb2
11 | # @@protoc_insertion_point(imports)
12 |
13 | _sym_db = _symbol_database.Default()
14 |
15 |
16 |
17 |
18 | DESCRIPTOR = _descriptor.FileDescriptor(
19 | name='clstm.proto',
20 | package='clstm',
21 | syntax='proto2',
22 | serialized_pb=_b('\n\x0b\x63lstm.proto\x12\x05\x63lstm\"&\n\x08KeyValue\x12\x0b\n\x03key\x18\x01 \x02(\t\x12\r\n\x05value\x18\x02 \x02(\t\"1\n\x05\x41rray\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0b\n\x03\x64im\x18\x02 \x03(\x05\x12\r\n\x05value\x18\x03 \x03(\x02\"\xcf\x01\n\x0cNetworkProto\x12\x0c\n\x04kind\x18\x01 \x02(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06ninput\x18\n \x02(\x05\x12\x0f\n\x07noutput\x18\x0b \x02(\x05\x12\x0e\n\x06icodec\x18\x0c \x03(\x05\x12\r\n\x05\x63odec\x18\r \x03(\x05\x12\"\n\tattribute\x18\x14 \x03(\x0b\x32\x0f.clstm.KeyValue\x12\x1d\n\x07weights\x18\x1e \x03(\x0b\x32\x0c.clstm.Array\x12 \n\x03sub\x18( \x03(\x0b\x32\x13.clstm.NetworkProto')
23 | )
24 |
25 |
26 |
27 |
28 | _KEYVALUE = _descriptor.Descriptor(
29 | name='KeyValue',
30 | full_name='clstm.KeyValue',
31 | filename=None,
32 | file=DESCRIPTOR,
33 | containing_type=None,
34 | fields=[
35 | _descriptor.FieldDescriptor(
36 | name='key', full_name='clstm.KeyValue.key', index=0,
37 | number=1, type=9, cpp_type=9, label=2,
38 | has_default_value=False, default_value=_b("").decode('utf-8'),
39 | message_type=None, enum_type=None, containing_type=None,
40 | is_extension=False, extension_scope=None,
41 | options=None),
42 | _descriptor.FieldDescriptor(
43 | name='value', full_name='clstm.KeyValue.value', index=1,
44 | number=2, type=9, cpp_type=9, label=2,
45 | has_default_value=False, default_value=_b("").decode('utf-8'),
46 | message_type=None, enum_type=None, containing_type=None,
47 | is_extension=False, extension_scope=None,
48 | options=None),
49 | ],
50 | extensions=[
51 | ],
52 | nested_types=[],
53 | enum_types=[
54 | ],
55 | options=None,
56 | is_extendable=False,
57 | syntax='proto2',
58 | extension_ranges=[],
59 | oneofs=[
60 | ],
61 | serialized_start=22,
62 | serialized_end=60,
63 | )
64 |
65 |
66 | _ARRAY = _descriptor.Descriptor(
67 | name='Array',
68 | full_name='clstm.Array',
69 | filename=None,
70 | file=DESCRIPTOR,
71 | containing_type=None,
72 | fields=[
73 | _descriptor.FieldDescriptor(
74 | name='name', full_name='clstm.Array.name', index=0,
75 | number=1, type=9, cpp_type=9, label=1,
76 | has_default_value=False, default_value=_b("").decode('utf-8'),
77 | message_type=None, enum_type=None, containing_type=None,
78 | is_extension=False, extension_scope=None,
79 | options=None),
80 | _descriptor.FieldDescriptor(
81 | name='dim', full_name='clstm.Array.dim', index=1,
82 | number=2, type=5, cpp_type=1, label=3,
83 | has_default_value=False, default_value=[],
84 | message_type=None, enum_type=None, containing_type=None,
85 | is_extension=False, extension_scope=None,
86 | options=None),
87 | _descriptor.FieldDescriptor(
88 | name='value', full_name='clstm.Array.value', index=2,
89 | number=3, type=2, cpp_type=6, label=3,
90 | has_default_value=False, default_value=[],
91 | message_type=None, enum_type=None, containing_type=None,
92 | is_extension=False, extension_scope=None,
93 | options=None),
94 | ],
95 | extensions=[
96 | ],
97 | nested_types=[],
98 | enum_types=[
99 | ],
100 | options=None,
101 | is_extendable=False,
102 | syntax='proto2',
103 | extension_ranges=[],
104 | oneofs=[
105 | ],
106 | serialized_start=62,
107 | serialized_end=111,
108 | )
109 |
110 |
111 | _NETWORKPROTO = _descriptor.Descriptor(
112 | name='NetworkProto',
113 | full_name='clstm.NetworkProto',
114 | filename=None,
115 | file=DESCRIPTOR,
116 | containing_type=None,
117 | fields=[
118 | _descriptor.FieldDescriptor(
119 | name='kind', full_name='clstm.NetworkProto.kind', index=0,
120 | number=1, type=9, cpp_type=9, label=2,
121 | has_default_value=False, default_value=_b("").decode('utf-8'),
122 | message_type=None, enum_type=None, containing_type=None,
123 | is_extension=False, extension_scope=None,
124 | options=None),
125 | _descriptor.FieldDescriptor(
126 | name='name', full_name='clstm.NetworkProto.name', index=1,
127 | number=2, type=9, cpp_type=9, label=1,
128 | has_default_value=False, default_value=_b("").decode('utf-8'),
129 | message_type=None, enum_type=None, containing_type=None,
130 | is_extension=False, extension_scope=None,
131 | options=None),
132 | _descriptor.FieldDescriptor(
133 | name='ninput', full_name='clstm.NetworkProto.ninput', index=2,
134 | number=10, type=5, cpp_type=1, label=2,
135 | has_default_value=False, default_value=0,
136 | message_type=None, enum_type=None, containing_type=None,
137 | is_extension=False, extension_scope=None,
138 | options=None),
139 | _descriptor.FieldDescriptor(
140 | name='noutput', full_name='clstm.NetworkProto.noutput', index=3,
141 | number=11, type=5, cpp_type=1, label=2,
142 | has_default_value=False, default_value=0,
143 | message_type=None, enum_type=None, containing_type=None,
144 | is_extension=False, extension_scope=None,
145 | options=None),
146 | _descriptor.FieldDescriptor(
147 | name='icodec', full_name='clstm.NetworkProto.icodec', index=4,
148 | number=12, type=5, cpp_type=1, label=3,
149 | has_default_value=False, default_value=[],
150 | message_type=None, enum_type=None, containing_type=None,
151 | is_extension=False, extension_scope=None,
152 | options=None),
153 | _descriptor.FieldDescriptor(
154 | name='codec', full_name='clstm.NetworkProto.codec', index=5,
155 | number=13, type=5, cpp_type=1, label=3,
156 | has_default_value=False, default_value=[],
157 | message_type=None, enum_type=None, containing_type=None,
158 | is_extension=False, extension_scope=None,
159 | options=None),
160 | _descriptor.FieldDescriptor(
161 | name='attribute', full_name='clstm.NetworkProto.attribute', index=6,
162 | number=20, type=11, cpp_type=10, label=3,
163 | has_default_value=False, default_value=[],
164 | message_type=None, enum_type=None, containing_type=None,
165 | is_extension=False, extension_scope=None,
166 | options=None),
167 | _descriptor.FieldDescriptor(
168 | name='weights', full_name='clstm.NetworkProto.weights', index=7,
169 | number=30, type=11, cpp_type=10, label=3,
170 | has_default_value=False, default_value=[],
171 | message_type=None, enum_type=None, containing_type=None,
172 | is_extension=False, extension_scope=None,
173 | options=None),
174 | _descriptor.FieldDescriptor(
175 | name='sub', full_name='clstm.NetworkProto.sub', index=8,
176 | number=40, type=11, cpp_type=10, label=3,
177 | has_default_value=False, default_value=[],
178 | message_type=None, enum_type=None, containing_type=None,
179 | is_extension=False, extension_scope=None,
180 | options=None),
181 | ],
182 | extensions=[
183 | ],
184 | nested_types=[],
185 | enum_types=[
186 | ],
187 | options=None,
188 | is_extendable=False,
189 | syntax='proto2',
190 | extension_ranges=[],
191 | oneofs=[
192 | ],
193 | serialized_start=114,
194 | serialized_end=321,
195 | )
196 |
197 | _NETWORKPROTO.fields_by_name['attribute'].message_type = _KEYVALUE
198 | _NETWORKPROTO.fields_by_name['weights'].message_type = _ARRAY
199 | _NETWORKPROTO.fields_by_name['sub'].message_type = _NETWORKPROTO
200 | DESCRIPTOR.message_types_by_name['KeyValue'] = _KEYVALUE
201 | DESCRIPTOR.message_types_by_name['Array'] = _ARRAY
202 | DESCRIPTOR.message_types_by_name['NetworkProto'] = _NETWORKPROTO
203 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
204 |
205 | KeyValue = _reflection.GeneratedProtocolMessageType('KeyValue', (_message.Message,), dict(
206 | DESCRIPTOR = _KEYVALUE,
207 | __module__ = 'clstm_pb2'
208 | # @@protoc_insertion_point(class_scope:clstm.KeyValue)
209 | ))
210 | _sym_db.RegisterMessage(KeyValue)
211 |
212 | Array = _reflection.GeneratedProtocolMessageType('Array', (_message.Message,), dict(
213 | DESCRIPTOR = _ARRAY,
214 | __module__ = 'clstm_pb2'
215 | # @@protoc_insertion_point(class_scope:clstm.Array)
216 | ))
217 | _sym_db.RegisterMessage(Array)
218 |
219 | NetworkProto = _reflection.GeneratedProtocolMessageType('NetworkProto', (_message.Message,), dict(
220 | DESCRIPTOR = _NETWORKPROTO,
221 | __module__ = 'clstm_pb2'
222 | # @@protoc_insertion_point(class_scope:clstm.NetworkProto)
223 | ))
224 | _sym_db.RegisterMessage(NetworkProto)
225 |
226 |
227 | # @@protoc_insertion_point(module_scope)
228 |
--------------------------------------------------------------------------------
/docs/advanced.rst:
--------------------------------------------------------------------------------
1 | .. _advanced:
2 |
3 | Advanced Usage
4 | ==============
5 |
6 | Optical character recognition is the serial execution of multiple steps, in the
7 | case of kraken binarization (converting color and grayscale images into bitonal
8 | ones), layout analysis/page segmentation (extracting topological text lines
9 | from an image), recognition (feeding text lines images into an classifiers),
10 | and finally serialization of results into an appropriate format such as hOCR or
11 | ALTO.
12 |
13 | Input Specification
14 | -------------------
15 |
16 | All kraken subcommands operating on input-output pairs, i.e. producing one
17 | output document for one input document follow the basic syntax:
18 |
19 | .. code-block:: console
20 |
21 | $ kraken -i input_1 output_1 -i input_2 output_2 ... subcommand_1 subcommand_2 ... subcommand_n
22 |
23 | In particular subcommands may be chained.
24 |
25 | Binarization
26 | ------------
27 |
28 | The binarization subcommand accepts almost the same parameters as
29 | ``ocropus-nlbin``. Only options not related to binarization, e.g. skew
30 | detection are missing. In addition, error checking (image sizes, inversion
31 | detection, grayscale enforcement) is always disabled and kraken will happily
32 | binarize any image that is thrown at it.
33 |
34 | Available parameters are:
35 |
36 | =========== ====
37 | option type
38 | =========== ====
39 | --threshold FLOAT
40 | --zoom FLOAT
41 | --escale FLOAT
42 | --border FLOAT
43 | --perc INTEGER RANGE
44 | --range INTEGER
45 | --low INTEGER RANGE
46 | --high INTEGER RANGE
47 | =========== ====
48 |
49 | Page Segmentation and Script Detection
50 | --------------------------------------
51 |
52 | The `segment` subcommand access two operations page segmentation into lines and
53 | script detection of those lines.
54 |
55 | Page segmentation is mostly parameterless, although a switch to change the
56 | color of column separators has been retained. The segmentation is written as a
57 | `JSON `_ file containing bounding boxes in reading order and
58 | the general text direction (horizontal, i.e. LTR or RTL text in top-to-bottom
59 | reading order or vertical-ltr/rtl for vertical lines read from left-to-right or
60 | right-to-left).
61 |
62 | The script detection splits extracted lines from the segmenter into strip
63 | sharing a particular script that can then be recognized by supplying
64 | appropriate models for each detected script to the `ocr` subcommand.
65 |
66 | Combined output from both consists of lists in the `boxes` field corresponding
67 | to a topographical line and containing one or more bounding boxes of a
68 | particular script. Identifiers are `ISO 15924
69 | `_ 4 character codes.
70 |
71 | .. code-block:: console
72 |
73 | $ kraken -i 14.tif lines.txt segment
74 | $ cat lines.json
75 | {
76 | "boxes" : [
77 | [
78 | ["Grek", [561, 216, 1626,309]]
79 | ],
80 | [
81 | ["Latn", [2172, 197, 2424, 244]]
82 | ],
83 | [
84 | ["Grek", [1678, 221, 2236, 320]],
85 | ["Arab", [2241, 221, 2302, 320]]
86 | ],
87 |
88 | ["Grek", [412, 318, 2215, 416]],
89 | ["Latn", [2208, 318, 2424, 416]]
90 | ],
91 | ...
92 | ],
93 | "text_direction" : "horizontal-tb"
94 | }
95 |
96 | Script detection is automatically enabled; by explicitly disabling script
97 | detection the `boxes` field will contain only a list of line bounding boxes:
98 |
99 | .. code-block:: console
100 |
101 | [546, 216, 1626, 309],
102 | [2169, 197, 2423, 244],
103 | [1676, 221, 2293, 320],
104 | ...
105 | [503, 2641, 848, 2681]
106 |
107 | Available page segmentation parameters are:
108 |
109 | =============================================== ======
110 | option action
111 | =============================================== ======
112 | -d, --text-direction Sets principal text direction. Valid values are `horizontal-lr`, `horizontal-rl`, `vertical-lr`, and `vertical-rl`.
113 | --scale FLOAT Estimate of the average line height on the page
114 | -m, --maxcolseps Maximum number of columns in the input document. Set to `0` for uni-column layouts.
115 | -b, --black-colseps / -w, --white-colseps Switch to black column separators.
116 | -r, --remove-hlines / -l, --hlines Disables prefiltering of small horizontal lines. Improves segmenter output on some Arabic texts.
117 | =============================================== ======
118 |
119 | The parameters specific to the script identification are:
120 |
121 | =============================================== ======
122 | option action
123 | =============================================== ======
124 | -s/-n Enables/disables script detection
125 | -a, --allowed-script Whitelists specific scripts for detection output. Other detected script runs are merged with their adjacent scripts, after a heuristic pre-merging step.
126 | =============================================== ======
127 |
128 | Model Repository
129 | ----------------
130 |
131 | There is a semi-curated `repository
132 | `_ of freely licensed recognition
133 | models that can be accessed from the command line using a few subcommands. For
134 | evaluating a series of models it is also possible to just clone the repository
135 | using the normal git client.
136 |
137 | The ``list`` subcommand retrieves a list of all models available and prints
138 | them including some additional information (identifier, type, and a short
139 | description):
140 |
141 | .. code-block:: console
142 |
143 | $ kraken list
144 | Retrieving model list ✓
145 | default (pyrnn) - A converted version of en-default.pyrnn.gz
146 | toy (clstm) - A toy model trained on 400 lines of the UW3 data set.
147 | ...
148 |
149 | To access more detailed information the ``show`` subcommand may be used:
150 |
151 | .. code-block:: console
152 |
153 | $ kraken show toy
154 | name: toy.clstm
155 |
156 | A toy model trained on 400 lines of the UW3 data set.
157 |
158 | author: Benjamin Kiessling (mittagessen@l.unchti.me)
159 | http://kraken.re
160 |
161 | If a suitable model has been decided upon it can be retrieved using the ``get``
162 | subcommand:
163 |
164 | .. code-block:: console
165 |
166 | $ kraken get toy
167 | Retrieving model ✓
168 |
169 | Models will be placed in $XDG_BASE_DIR and can be accessed using their name as
170 | shown by the ``show`` command, e.g.:
171 |
172 | .. code-block:: console
173 |
174 | $ kraken -i ... ... ocr -m toy
175 |
176 | Additions and updates to existing models are always welcome! Just open a pull
177 | request or write an email.
178 |
179 | Recognition
180 | -----------
181 |
182 | Recognition requires a grey-scale or binarized image, a page segmentation for
183 | that image, and a model file. In particular there is no requirement to use the
184 | page segmentation algorithm contained in the ``segment`` subcommand or the
185 | binarization provided by kraken.
186 |
187 | Multi-script recognition is possible by supplying a script-annotated
188 | segmentation and a mapping between scripts and models:
189 |
190 | .. code-block:: console
191 |
192 | $ kraken -i ... ... ocr -m Grek:porson.clstm -m Latn:antiqua.clstm
193 |
194 | All polytonic Greek text portions will be recognized using the `porson.clstm`
195 | model while Latin text will be fed into the `antiqua.clstm` model. It is
196 | possible to define a fallback model that other text will be fed to:
197 |
198 | .. code-block:: console
199 |
200 | $ kraken -i ... ... ocr -m ... -m ... -m default:porson.clstm
201 |
202 | It is also possible to disable recognition on a particular script by mapping to
203 | the special model keyword `ignore`. Ignored lines will still be serialized but
204 | will not contain any recognition results.
205 |
206 | The ``ocr`` subcommand is able to serialize the recognition results either as
207 | plain text (default), as `hOCR `_, into `ALTO
208 | `_, or abbyyXML containing additional
209 | metadata such as bounding boxes and confidences:
210 |
211 | .. code-block:: console
212 |
213 | $ kraken -i ... ... ocr -t # text output
214 | $ kraken -i ... ... ocr -h # hOCR output
215 | $ kraken -i ... ... ocr -a # ALTO output
216 | $ kraken -i ... ... ocr -y # abbyyXML output
217 |
218 | hOCR output is slightly different from hOCR files produced by ocropus. Each
219 | ``ocr_line`` span contains not only the bounding box of the line but also
220 | character boxes (``x_bboxes`` attribute) indicating the coordinates of each
221 | character. In each line alternating sequences of alphanumeric and
222 | non-alphanumeric (in the unicode sense) characters are put into ``ocrx_word``
223 | spans. Both have bounding boxes as attributes and the recognition confidence
224 | for each character in the ``x_conf`` attribute.
225 |
226 | Paragraph detection has been removed as it was deemed to be unduly dependent on
227 | certain typographic features which may not be valid for your input.
228 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # kraken documentation build configuration file, created by
4 | # sphinx-quickstart on Fri May 22 16:51:45 2015.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | from __future__ import absolute_import
16 |
17 | import sys
18 | import os
19 | import shlex
20 |
21 | from subprocess import Popen, PIPE
22 | # If extensions (or modules to document with autodoc) are in another directory,
23 | # add these directories to sys.path here. If the directory is relative to the
24 | # documentation root, use os.path.abspath to make it absolute, like shown here.
25 | #sys.path.insert(0, os.path.abspath('../kraken'))
26 |
27 | # -- General configuration ------------------------------------------------
28 |
29 | # If your documentation needs a minimal Sphinx version, state it here.
30 | #needs_sphinx = '1.0'
31 |
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 | 'sphinx.ext.autodoc',
37 | 'sphinx.ext.napoleon',
38 | ]
39 |
40 | # Add any paths that contain templates here, relative to this directory.
41 | templates_path = ['_templates']
42 |
43 | # The suffix(es) of source filenames.
44 | # You can specify multiple suffix as a list of string:
45 | # source_suffix = ['.rst', '.md']
46 | source_suffix = '.rst'
47 |
48 | # The encoding of source files.
49 | #source_encoding = 'utf-8-sig'
50 |
51 | # The master toctree document.
52 | master_doc = 'index'
53 |
54 | # General information about the project.
55 | project = u'kraken'
56 | copyright = u'2015, mittagessen'
57 | author = u'mittagessen'
58 |
59 | # The version info for the project you're documenting, acts as replacement for
60 | # |version| and |release|, also used in various other places throughout the
61 | # built documents.
62 | #
63 | # The short X.Y version.
64 | pipe = Popen('git describe --tags --always master', stdout=PIPE, shell=True)
65 | version = pipe.stdout.read().decode('utf-8')
66 | release = version
67 |
68 | # The language for content autogenerated by Sphinx. Refer to documentation
69 | # for a list of supported languages.
70 | #
71 | # This is also used if you do content translation via gettext catalogs.
72 | # Usually you set "language" from the command line for these cases.
73 | language = None
74 |
75 | # There are two options for replacing |today|: either, you set today to some
76 | # non-false value, then it is used:
77 | #today = ''
78 | # Else, today_fmt is used as the format for a strftime call.
79 | #today_fmt = '%B %d, %Y'
80 |
81 | # List of patterns, relative to source directory, that match files and
82 | # directories to ignore when looking for source files.
83 | exclude_patterns = ['_build']
84 |
85 | # The reST default role (used for this markup: `text`) to use for all
86 | # documents.
87 | #default_role = None
88 |
89 | # If true, '()' will be appended to :func: etc. cross-reference text.
90 | #add_function_parentheses = True
91 |
92 | # If true, the current module name will be prepended to all description
93 | # unit titles (such as .. function::).
94 | #add_module_names = True
95 |
96 | # If true, sectionauthor and moduleauthor directives will be shown in the
97 | # output. They are ignored by default.
98 | #show_authors = False
99 |
100 | # The name of the Pygments (syntax highlighting) style to use.
101 | pygments_style = 'sphinx'
102 |
103 | # A list of ignored prefixes for module index sorting.
104 | #modindex_common_prefix = []
105 |
106 | # If true, keep warnings as "system message" paragraphs in the built documents.
107 | #keep_warnings = False
108 |
109 | # If true, `todo` and `todoList` produce output, else they produce nothing.
110 | todo_include_todos = False
111 |
112 |
113 | # -- Options for HTML output ----------------------------------------------
114 |
115 | # The theme to use for HTML and HTML Help pages. See the documentation for
116 | # a list of builtin themes.
117 | html_theme = 'alabaster'
118 |
119 | # Theme options are theme-specific and customize the look and feel of a theme
120 | # further. For a list of options available for each theme, see the
121 | # documentation.
122 | html_theme_options = {
123 | 'github_user': 'mittagessen',
124 | 'github_repo': 'kraken',
125 | 'travis_button': 'true',
126 | }
127 |
128 | # Add any paths that contain custom themes here, relative to this directory.
129 | #html_theme_path = []
130 |
131 | # The name for this set of Sphinx documents. If None, it defaults to
132 | # " v documentation".
133 | #html_title = None
134 |
135 | # A shorter title for the navigation bar. Default is the same as html_title.
136 | #html_short_title = None
137 |
138 | # The name of an image file (relative to this directory) to place at the top
139 | # of the sidebar.
140 | html_logo = '_static/kraken.png'
141 |
142 | # The name of an image file (within the static path) to use as favicon of the
143 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
144 | # pixels large.
145 | #html_favicon = None
146 |
147 | # Add any paths that contain custom static files (such as style sheets) here,
148 | # relative to this directory. They are copied after the builtin static files,
149 | # so a file named "default.css" will overwrite the builtin "default.css".
150 | html_static_path = ['_static']
151 |
152 | # Add any extra paths that contain custom files (such as robots.txt or
153 | # .htaccess) here, relative to this directory. These files are copied
154 | # directly to the root of the documentation.
155 | #html_extra_path = []
156 |
157 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
158 | # using the given strftime format.
159 | #html_last_updated_fmt = '%b %d, %Y'
160 |
161 | # If true, SmartyPants will be used to convert quotes and dashes to
162 | # typographically correct entities.
163 | #html_use_smartypants = True
164 |
165 | # Custom sidebar templates, maps document names to template names.
166 | html_sidebars = {
167 | 'index': ['sidebarintro.html', 'navigation.html', 'searchbox.html', 'versions.html'],
168 | '**': ['localtoc.html', 'relations.html', 'searchbox.html']
169 | }
170 |
171 | # Additional templates that should be rendered to pages, maps page names to
172 | # template names.
173 | #html_additional_pages = {}
174 |
175 | # If false, no module index is generated.
176 | #html_domain_indices = True
177 |
178 | # If false, no index is generated.
179 | #html_use_index = True
180 |
181 | # If true, the index is split into individual pages for each letter.
182 | #html_split_index = False
183 |
184 | # If true, links to the reST sources are added to the pages.
185 | #html_show_sourcelink = True
186 |
187 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
188 | #html_show_sphinx = True
189 |
190 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
191 | #html_show_copyright = True
192 |
193 | # If true, an OpenSearch description file will be output, and all pages will
194 | # contain a tag referring to it. The value of this option must be the
195 | # base URL from which the finished HTML is served.
196 | #html_use_opensearch = ''
197 |
198 | # This is the file name suffix for HTML files (e.g. ".xhtml").
199 | #html_file_suffix = None
200 |
201 | # Language to be used for generating the HTML full-text search index.
202 | # Sphinx supports the following languages:
203 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
204 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
205 | #html_search_language = 'en'
206 |
207 | # A dictionary with options for the search language support, empty by default.
208 | # Now only 'ja' uses this config value
209 | #html_search_options = {'type': 'default'}
210 |
211 | # The name of a javascript file (relative to the configuration directory) that
212 | # implements a search results scorer. If empty, the default will be used.
213 | #html_search_scorer = 'scorer.js'
214 |
215 | # Output file base name for HTML help builder.
216 | htmlhelp_basename = 'krakendoc'
217 |
218 | # -- Options for LaTeX output ---------------------------------------------
219 |
220 | latex_elements = {
221 | # The paper size ('letterpaper' or 'a4paper').
222 | #'papersize': 'letterpaper',
223 |
224 | # The font size ('10pt', '11pt' or '12pt').
225 | #'pointsize': '10pt',
226 |
227 | # Additional stuff for the LaTeX preamble.
228 | #'preamble': '',
229 |
230 | # Latex figure (float) alignment
231 | #'figure_align': 'htbp',
232 | }
233 |
234 | # Grouping the document tree into LaTeX files. List of tuples
235 | # (source start file, target name, title,
236 | # author, documentclass [howto, manual, or own class]).
237 | latex_documents = [
238 | (master_doc, 'kraken.tex', u'kraken Documentation',
239 | u'mittagessen', 'manual'),
240 | ]
241 |
242 | # The name of an image file (relative to this directory) to place at the top of
243 | # the title page.
244 | #latex_logo = None
245 |
246 | # For "manual" documents, if this is true, then toplevel headings are parts,
247 | # not chapters.
248 | #latex_use_parts = False
249 |
250 | # If true, show page references after internal links.
251 | #latex_show_pagerefs = False
252 |
253 | # If true, show URL addresses after external links.
254 | #latex_show_urls = False
255 |
256 | # Documents to append as an appendix to all manuals.
257 | #latex_appendices = []
258 |
259 | # If false, no module index is generated.
260 | #latex_domain_indices = True
261 |
262 |
263 | # -- Options for manual page output ---------------------------------------
264 |
265 | # One entry per manual page. List of tuples
266 | # (source start file, name, description, authors, manual section).
267 | man_pages = [
268 | (master_doc, 'kraken', u'kraken Documentation',
269 | [author], 1)
270 | ]
271 |
272 | # If true, show URL addresses after external links.
273 | #man_show_urls = False
274 |
275 |
276 | # -- Options for Texinfo output -------------------------------------------
277 |
278 | # Grouping the document tree into Texinfo files. List of tuples
279 | # (source start file, target name, title, author,
280 | # dir menu entry, description, category)
281 | texinfo_documents = [
282 | (master_doc, 'kraken', u'kraken Documentation',
283 | author, 'kraken', 'One line description of project.',
284 | 'Miscellaneous'),
285 | ]
286 |
287 | # Documents to append as an appendix to all manuals.
288 | #texinfo_appendices = []
289 |
290 | # If false, no module index is generated.
291 | #texinfo_domain_indices = True
292 |
293 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
294 | #texinfo_show_urls = 'footnote'
295 |
296 | # If true, do not generate a @detailmenu in the "Top" node's menu.
297 | #texinfo_no_detailmenu = False
298 |
299 | scv_whitelist_branches = ('master',)
300 | import re
301 | scv_whitelist_tags = (re.compile(r'^\d+\.\d+\.0$'),)
302 |
303 | scv_greatest_tag = True
304 |
305 | scv_show_banner = True
306 | scv_banner_greatest_tag = True
307 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/kraken/lib/pyrnn_pb2.py:
--------------------------------------------------------------------------------
1 | # Generated by the protocol buffer compiler. DO NOT EDIT!
2 | # source: proto/pyrnn.proto
3 |
4 | from google.protobuf import descriptor as _descriptor
5 | from google.protobuf import message as _message
6 | from google.protobuf import reflection as _reflection
7 | from google.protobuf import symbol_database as _symbol_database
8 | from google.protobuf import descriptor_pb2
9 | # @@protoc_insertion_point(imports)
10 |
11 | _sym_db = _symbol_database.Default()
12 |
13 |
14 |
15 |
16 | DESCRIPTOR = _descriptor.FileDescriptor(
17 | name='proto/pyrnn.proto',
18 | package='kraken',
19 | syntax='proto2',
20 | serialized_pb=b'\n\x11proto/pyrnn.proto\x12\x06kraken\"\'\n\x05\x61rray\x12\x0b\n\x03\x64im\x18\x01 \x03(\r\x12\x11\n\x05value\x18\x02 \x03(\x02\x42\x02\x10\x01\"\xca\x01\n\x04lstm\x12\x1a\n\x03wgi\x18\x01 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wgf\x18\x02 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wgo\x18\x03 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wci\x18\x04 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wip\x18\x05 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wfp\x18\x06 \x02(\x0b\x32\r.kraken.array\x12\x1a\n\x03wop\x18\x07 \x02(\x0b\x32\r.kraken.array\"$\n\x07softmax\x12\x19\n\x02w2\x18\x01 \x02(\x0b\x32\r.kraken.array\"\xb1\x01\n\x05pyrnn\x12\x0c\n\x04kind\x18\x01 \x02(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06ninput\x18\n \x02(\r\x12\x0f\n\x07noutput\x18\x0b \x02(\r\x12\r\n\x05\x63odec\x18\x0c \x03(\t\x12\x1c\n\x06\x66wdnet\x18\r \x02(\x0b\x32\x0c.kraken.lstm\x12\x1c\n\x06revnet\x18\x0e \x02(\x0b\x32\x0c.kraken.lstm\x12 \n\x07softmax\x18\x0f \x02(\x0b\x32\x0f.kraken.softmax'
21 | )
22 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
23 |
24 |
25 |
26 |
27 | _ARRAY = _descriptor.Descriptor(
28 | name='array',
29 | full_name='kraken.array',
30 | filename=None,
31 | file=DESCRIPTOR,
32 | containing_type=None,
33 | fields=[
34 | _descriptor.FieldDescriptor(
35 | name='dim', full_name='kraken.array.dim', index=0,
36 | number=1, type=13, cpp_type=3, label=3,
37 | has_default_value=False, default_value=[],
38 | message_type=None, enum_type=None, containing_type=None,
39 | is_extension=False, extension_scope=None,
40 | options=None),
41 | _descriptor.FieldDescriptor(
42 | name='value', full_name='kraken.array.value', index=1,
43 | number=2, type=2, cpp_type=6, label=3,
44 | has_default_value=False, default_value=[],
45 | message_type=None, enum_type=None, containing_type=None,
46 | is_extension=False, extension_scope=None,
47 | options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), b'\020\001')),
48 | ],
49 | extensions=[
50 | ],
51 | nested_types=[],
52 | enum_types=[
53 | ],
54 | options=None,
55 | is_extendable=False,
56 | syntax='proto2',
57 | extension_ranges=[],
58 | oneofs=[
59 | ],
60 | serialized_start=29,
61 | serialized_end=68,
62 | )
63 |
64 |
65 | _LSTM = _descriptor.Descriptor(
66 | name='lstm',
67 | full_name='kraken.lstm',
68 | filename=None,
69 | file=DESCRIPTOR,
70 | containing_type=None,
71 | fields=[
72 | _descriptor.FieldDescriptor(
73 | name='wgi', full_name='kraken.lstm.wgi', index=0,
74 | number=1, type=11, cpp_type=10, label=2,
75 | has_default_value=False, default_value=None,
76 | message_type=None, enum_type=None, containing_type=None,
77 | is_extension=False, extension_scope=None,
78 | options=None),
79 | _descriptor.FieldDescriptor(
80 | name='wgf', full_name='kraken.lstm.wgf', index=1,
81 | number=2, type=11, cpp_type=10, label=2,
82 | has_default_value=False, default_value=None,
83 | message_type=None, enum_type=None, containing_type=None,
84 | is_extension=False, extension_scope=None,
85 | options=None),
86 | _descriptor.FieldDescriptor(
87 | name='wgo', full_name='kraken.lstm.wgo', index=2,
88 | number=3, type=11, cpp_type=10, label=2,
89 | has_default_value=False, default_value=None,
90 | message_type=None, enum_type=None, containing_type=None,
91 | is_extension=False, extension_scope=None,
92 | options=None),
93 | _descriptor.FieldDescriptor(
94 | name='wci', full_name='kraken.lstm.wci', index=3,
95 | number=4, type=11, cpp_type=10, label=2,
96 | has_default_value=False, default_value=None,
97 | message_type=None, enum_type=None, containing_type=None,
98 | is_extension=False, extension_scope=None,
99 | options=None),
100 | _descriptor.FieldDescriptor(
101 | name='wip', full_name='kraken.lstm.wip', index=4,
102 | number=5, type=11, cpp_type=10, label=2,
103 | has_default_value=False, default_value=None,
104 | message_type=None, enum_type=None, containing_type=None,
105 | is_extension=False, extension_scope=None,
106 | options=None),
107 | _descriptor.FieldDescriptor(
108 | name='wfp', full_name='kraken.lstm.wfp', index=5,
109 | number=6, type=11, cpp_type=10, label=2,
110 | has_default_value=False, default_value=None,
111 | message_type=None, enum_type=None, containing_type=None,
112 | is_extension=False, extension_scope=None,
113 | options=None),
114 | _descriptor.FieldDescriptor(
115 | name='wop', full_name='kraken.lstm.wop', index=6,
116 | number=7, type=11, cpp_type=10, label=2,
117 | has_default_value=False, default_value=None,
118 | message_type=None, enum_type=None, containing_type=None,
119 | is_extension=False, extension_scope=None,
120 | options=None),
121 | ],
122 | extensions=[
123 | ],
124 | nested_types=[],
125 | enum_types=[
126 | ],
127 | options=None,
128 | is_extendable=False,
129 | syntax='proto2',
130 | extension_ranges=[],
131 | oneofs=[
132 | ],
133 | serialized_start=71,
134 | serialized_end=273,
135 | )
136 |
137 |
138 | _SOFTMAX = _descriptor.Descriptor(
139 | name='softmax',
140 | full_name='kraken.softmax',
141 | filename=None,
142 | file=DESCRIPTOR,
143 | containing_type=None,
144 | fields=[
145 | _descriptor.FieldDescriptor(
146 | name='w2', full_name='kraken.softmax.w2', index=0,
147 | number=1, type=11, cpp_type=10, label=2,
148 | has_default_value=False, default_value=None,
149 | message_type=None, enum_type=None, containing_type=None,
150 | is_extension=False, extension_scope=None,
151 | options=None),
152 | ],
153 | extensions=[
154 | ],
155 | nested_types=[],
156 | enum_types=[
157 | ],
158 | options=None,
159 | is_extendable=False,
160 | syntax='proto2',
161 | extension_ranges=[],
162 | oneofs=[
163 | ],
164 | serialized_start=275,
165 | serialized_end=311,
166 | )
167 |
168 |
169 | _PYRNN = _descriptor.Descriptor(
170 | name='pyrnn',
171 | full_name='kraken.pyrnn',
172 | filename=None,
173 | file=DESCRIPTOR,
174 | containing_type=None,
175 | fields=[
176 | _descriptor.FieldDescriptor(
177 | name='kind', full_name='kraken.pyrnn.kind', index=0,
178 | number=1, type=9, cpp_type=9, label=2,
179 | has_default_value=False, default_value=b"".decode('utf-8'),
180 | message_type=None, enum_type=None, containing_type=None,
181 | is_extension=False, extension_scope=None,
182 | options=None),
183 | _descriptor.FieldDescriptor(
184 | name='name', full_name='kraken.pyrnn.name', index=1,
185 | number=2, type=9, cpp_type=9, label=1,
186 | has_default_value=False, default_value=b"".decode('utf-8'),
187 | message_type=None, enum_type=None, containing_type=None,
188 | is_extension=False, extension_scope=None,
189 | options=None),
190 | _descriptor.FieldDescriptor(
191 | name='ninput', full_name='kraken.pyrnn.ninput', index=2,
192 | number=10, type=13, cpp_type=3, label=2,
193 | has_default_value=False, default_value=0,
194 | message_type=None, enum_type=None, containing_type=None,
195 | is_extension=False, extension_scope=None,
196 | options=None),
197 | _descriptor.FieldDescriptor(
198 | name='noutput', full_name='kraken.pyrnn.noutput', index=3,
199 | number=11, type=13, cpp_type=3, label=2,
200 | has_default_value=False, default_value=0,
201 | message_type=None, enum_type=None, containing_type=None,
202 | is_extension=False, extension_scope=None,
203 | options=None),
204 | _descriptor.FieldDescriptor(
205 | name='codec', full_name='kraken.pyrnn.codec', index=4,
206 | number=12, type=9, cpp_type=9, label=3,
207 | has_default_value=False, default_value=[],
208 | message_type=None, enum_type=None, containing_type=None,
209 | is_extension=False, extension_scope=None,
210 | options=None),
211 | _descriptor.FieldDescriptor(
212 | name='fwdnet', full_name='kraken.pyrnn.fwdnet', index=5,
213 | number=13, type=11, cpp_type=10, label=2,
214 | has_default_value=False, default_value=None,
215 | message_type=None, enum_type=None, containing_type=None,
216 | is_extension=False, extension_scope=None,
217 | options=None),
218 | _descriptor.FieldDescriptor(
219 | name='revnet', full_name='kraken.pyrnn.revnet', index=6,
220 | number=14, type=11, cpp_type=10, label=2,
221 | has_default_value=False, default_value=None,
222 | message_type=None, enum_type=None, containing_type=None,
223 | is_extension=False, extension_scope=None,
224 | options=None),
225 | _descriptor.FieldDescriptor(
226 | name='softmax', full_name='kraken.pyrnn.softmax', index=7,
227 | number=15, type=11, cpp_type=10, label=2,
228 | has_default_value=False, default_value=None,
229 | message_type=None, enum_type=None, containing_type=None,
230 | is_extension=False, extension_scope=None,
231 | options=None),
232 | ],
233 | extensions=[
234 | ],
235 | nested_types=[],
236 | enum_types=[
237 | ],
238 | options=None,
239 | is_extendable=False,
240 | syntax='proto2',
241 | extension_ranges=[],
242 | oneofs=[
243 | ],
244 | serialized_start=314,
245 | serialized_end=491,
246 | )
247 |
248 | _LSTM.fields_by_name['wgi'].message_type = _ARRAY
249 | _LSTM.fields_by_name['wgf'].message_type = _ARRAY
250 | _LSTM.fields_by_name['wgo'].message_type = _ARRAY
251 | _LSTM.fields_by_name['wci'].message_type = _ARRAY
252 | _LSTM.fields_by_name['wip'].message_type = _ARRAY
253 | _LSTM.fields_by_name['wfp'].message_type = _ARRAY
254 | _LSTM.fields_by_name['wop'].message_type = _ARRAY
255 | _SOFTMAX.fields_by_name['w2'].message_type = _ARRAY
256 | _PYRNN.fields_by_name['fwdnet'].message_type = _LSTM
257 | _PYRNN.fields_by_name['revnet'].message_type = _LSTM
258 | _PYRNN.fields_by_name['softmax'].message_type = _SOFTMAX
259 | DESCRIPTOR.message_types_by_name['array'] = _ARRAY
260 | DESCRIPTOR.message_types_by_name['lstm'] = _LSTM
261 | DESCRIPTOR.message_types_by_name['softmax'] = _SOFTMAX
262 | DESCRIPTOR.message_types_by_name['pyrnn'] = _PYRNN
263 |
264 | array = _reflection.GeneratedProtocolMessageType('array', (_message.Message,), dict(
265 | DESCRIPTOR = _ARRAY,
266 | __module__ = 'proto.pyrnn_pb2'
267 | # @@protoc_insertion_point(class_scope:kraken.array)
268 | ))
269 | _sym_db.RegisterMessage(array)
270 |
271 | lstm = _reflection.GeneratedProtocolMessageType('lstm', (_message.Message,), dict(
272 | DESCRIPTOR = _LSTM,
273 | __module__ = 'proto.pyrnn_pb2'
274 | # @@protoc_insertion_point(class_scope:kraken.lstm)
275 | ))
276 | _sym_db.RegisterMessage(lstm)
277 |
278 | softmax = _reflection.GeneratedProtocolMessageType('softmax', (_message.Message,), dict(
279 | DESCRIPTOR = _SOFTMAX,
280 | __module__ = 'proto.pyrnn_pb2'
281 | # @@protoc_insertion_point(class_scope:kraken.softmax)
282 | ))
283 | _sym_db.RegisterMessage(softmax)
284 |
285 | pyrnn = _reflection.GeneratedProtocolMessageType('pyrnn', (_message.Message,), dict(
286 | DESCRIPTOR = _PYRNN,
287 | __module__ = 'proto.pyrnn_pb2'
288 | # @@protoc_insertion_point(class_scope:kraken.pyrnn)
289 | ))
290 | _sym_db.RegisterMessage(pyrnn)
291 |
292 |
293 | _ARRAY.fields_by_name['value'].has_options = True
294 | _ARRAY.fields_by_name['value']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), b'\020\001')
295 | # @@protoc_insertion_point(module_scope)
296 |
--------------------------------------------------------------------------------
/tests/test_codec.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import unittest
3 | import os
4 |
5 | from future.utils import PY2
6 | from nose.tools import raises
7 |
8 | from torch import IntTensor
9 |
10 | from kraken.lib import codec
11 | from kraken.lib.exceptions import KrakenEncodeException
12 |
13 | class TestCodec(unittest.TestCase):
14 |
15 | """
16 | Testing codec mapping routines
17 | """
18 |
19 | def setUp(self):
20 | # codec mapping one code point to one label
21 | self.o2o_codec = codec.PytorchCodec('ab')
22 | # codec mapping many code points to one label
23 | self.m2o_codec = codec.PytorchCodec(['aaa' , 'aa', 'a', 'b'])
24 | # codec mapping one code point to many labels
25 | self.o2m_codec = codec.PytorchCodec({'a': [10, 11, 12], 'b': [12, 45, 80]})
26 | # codec mapping many code points to many labels
27 | self.m2m_codec = codec.PytorchCodec({'aaa': [10, 11, 12], 'aa': [10, 10], 'a': [10], 'bb': [15], 'b': [12]})
28 |
29 | self.invalid_c_sequence = 'aaababbcaaa'
30 | self.valid_c_sequence = 'aaababbaaabbbb'
31 |
32 | self.invalid_l_sequence = [(45, 78, 778, 0.3793492615638364),
33 | (10, 203, 859, 0.9485075253700872),
34 | (11, 70, 601, 0.7885297329523855),
35 | (12, 251, 831, 0.7216817042926938),
36 | (900, 72, 950, 0.27609823017048707)]
37 |
38 | def test_o2o_encode(self):
39 | """
40 | Test correct encoding of one-to-one code point sequence
41 | """
42 | self.assertTrue(self.o2o_codec.encode(self.valid_c_sequence).eq(
43 | IntTensor([1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2])).all())
44 |
45 | def test_m2o_encode(self):
46 | """
47 | Test correct encoding of many-to-one code point sequence
48 | """
49 | self.assertTrue(self.m2o_codec.encode(self.valid_c_sequence).eq(
50 | IntTensor([3, 4, 1, 4, 4, 3, 4, 4, 4, 4])).all())
51 |
52 | def test_o2m_encode(self):
53 | """
54 | Test correct encoding of one-to-many code point sequence
55 | """
56 | self.assertTrue(self.o2m_codec.encode(self.valid_c_sequence).eq(
57 | IntTensor([10, 11, 12, 10, 11, 12, 10, 11, 12,
58 | 12, 45, 80, 10, 11, 12, 12, 45, 80, 12, 45,
59 | 80, 10, 11, 12, 10, 11, 12, 10, 11, 12, 12,
60 | 45, 80, 12, 45, 80, 12, 45, 80, 12, 45,
61 | 80])).all())
62 |
63 | def test_m2m_encode(self):
64 | """
65 | Test correct encoding of many-to-many code point sequence
66 | """
67 | self.assertTrue(self.m2m_codec.encode(self.valid_c_sequence).eq(
68 | IntTensor([10, 11, 12, 12, 10, 15, 10, 11, 12,
69 | 15, 15])).all())
70 |
71 | def test_o2o_decode(self):
72 | """
73 | Test correct decoding of one-to-one label sequence
74 | """
75 | self.assertEqual(''.join(x[0] for x in self.o2o_codec.decode([(1, 288, 652, 0.8537325587315542),
76 | (1, 120, 861, 0.4968470297302481),
77 | (1, 372, 629, 0.008650773294205938),
78 | (2, 406, 831, 0.15637985875540783),
79 | (1, 3, 824, 0.26475146828232776),
80 | (2, 228, 959, 0.3062689368044844),
81 | (2, 472, 679, 0.8677848554329698),
82 | (1, 482, 771, 0.6055591197109657),
83 | (1, 452, 606, 0.40744265053745055),
84 | (1, 166, 879, 0.7509269177978337),
85 | (2, 92, 729, 0.34554103785480306),
86 | (2, 227, 959, 0.3006394689033981),
87 | (2, 341, 699, 0.07798704843315862),
88 | (2, 142, 513, 0.9933850573241767)])),
89 | 'aaababbaaabbbb')
90 |
91 | def test_m2o_decode(self):
92 | """
93 | Test correct decoding of many-to-one label sequence
94 | """
95 | self.assertEqual(''.join(x[0] for x in self.m2o_codec.decode([(3, 28, 967, 0.07761440833942468),
96 | (4, 282, 565, 0.4946281412618093),
97 | (1, 411, 853, 0.7767301050586806),
98 | (4, 409, 501, 0.47915609540996495),
99 | (4, 299, 637, 0.7755889399450564),
100 | (3, 340, 834, 0.726656062406549),
101 | (4, 296, 846, 0.2274859668684881),
102 | (4, 238, 695, 0.32982930128257815),
103 | (4, 187, 970, 0.43354272748701805),
104 | (4, 376, 863, 0.24483897879550764)])),
105 | 'aaababbaaabbbb')
106 |
107 | def test_o2m_decode(self):
108 | """
109 | Test correct decoding of one-to-many label sequence
110 | """
111 | self.assertEqual(''.join(x[0] for x in self.o2m_codec.decode([(10, 35, 959, 0.43819571289990644),
112 | (11, 361, 904, 0.1801115018592916),
113 | (12, 15, 616, 0.5987506334315549),
114 | (10, 226, 577, 0.6178248939780698),
115 | (11, 227, 814, 0.31531097360327787),
116 | (12, 390, 826, 0.7706594984014595),
117 | (10, 251, 579, 0.9442530315305507),
118 | (11, 269, 870, 0.4475979925584944),
119 | (12, 456, 609, 0.9396137478409995),
120 | (12, 60, 757, 0.06416607235266458),
121 | (45, 318, 918, 0.8129458423341515),
122 | (80, 15, 914, 0.49773432435726517),
123 | (10, 211, 648, 0.7919220961861382),
124 | (11, 326, 804, 0.7852387442556333),
125 | (12, 93, 978, 0.9376801123379804),
126 | (12, 23, 698, 0.915543635886972),
127 | (45, 71, 599, 0.8137750423628737),
128 | (80, 167, 980, 0.6501035181890226),
129 | (12, 259, 823, 0.3122860659712233),
130 | (45, 312, 948, 0.20582589628806058),
131 | (80, 430, 694, 0.3528792552966924),
132 | (10, 470, 866, 0.0685524032330419),
133 | (11, 459, 826, 0.39354887700146846),
134 | (12, 392, 926, 0.4102018609185847),
135 | (10, 271, 592, 0.1877915301623876),
136 | (11, 206, 995, 0.21614062190981576),
137 | (12, 466, 648, 0.3106914763314057),
138 | (10, 368, 848, 0.28715379701274113),
139 | (11, 252, 962, 0.5535299604896257),
140 | (12, 387, 709, 0.844810014550603),
141 | (12, 156, 916, 0.9803695305965802),
142 | (45, 150, 555, 0.5969071330809561),
143 | (80, 381, 922, 0.5608300913697513),
144 | (12, 35, 762, 0.5227506455088722),
145 | (45, 364, 931, 0.7205481732247938),
146 | (80, 341, 580, 0.536934566913969),
147 | (12, 79, 919, 0.5136066153481802),
148 | (45, 377, 773, 0.6507467790760987),
149 | (80, 497, 931, 0.7635100185309783),
150 | (12, 76, 580, 0.9542477438586341),
151 | (45, 37, 904, 0.4299813924853797),
152 | (80, 425, 638, 0.6825047210425983)])),
153 | 'aaababbaaabbbb')
154 |
155 | def test_m2m_decode(self):
156 | """
157 | Test correct decoding of many-to-many label sequence
158 | """
159 | self.assertEqual(''.join(x[0] for x in self.m2m_codec.decode([(10, 313, 788, 0.9379917930525369),
160 | (11, 117, 793, 0.9974374577004185),
161 | (12, 50, 707, 0.020074164253385374),
162 | (12, 382, 669, 0.525910770170754),
163 | (10, 458, 833, 0.4292373233167248),
164 | (15, 45, 831, 0.5759709886686226),
165 | (10, 465, 729, 0.8492104897235935),
166 | (11, 78, 800, 0.24733538459309445),
167 | (12, 375, 872, 0.26908722769105353),
168 | (15, 296, 889, 0.44251812620463726),
169 | (15, 237, 930, 0.5456105208117391)])),
170 | 'aaababbaaabbbb')
171 |
172 | @raises(KrakenEncodeException)
173 | def test_o2o_decode_invalid(self):
174 | """
175 | Test correct handling of undecodable sequences (one-to-one decoder)
176 | """
177 | self.o2o_codec.decode(self.invalid_l_sequence)
178 |
179 | @raises(KrakenEncodeException)
180 | def test_m2o_decode_invalid(self):
181 | """
182 | Test correct handling of undecodable sequences (many-to-one decoder)
183 | """
184 | self.m2o_codec.decode(self.invalid_l_sequence)
185 |
186 | @raises(KrakenEncodeException)
187 | def test_o2m_decode_invalid(self):
188 | """
189 | Test correct handling of undecodable sequences (one-to-many decoder)
190 | """
191 | self.o2m_codec.decode(self.invalid_l_sequence)
192 |
193 | @raises(KrakenEncodeException)
194 | def test_m2m_decode_invalid(self):
195 | """
196 | Test correct handling of undecodable sequences (many-to-many decoder)
197 | """
198 | self.m2m_codec.decode(self.invalid_l_sequence)
199 |
200 | @raises(KrakenEncodeException)
201 | def test_o2o_encode_invalid(self):
202 | """
203 | Test correct handling of unencodable sequences (one-to-one encoder)
204 | """
205 | self.o2o_codec.encode(self.invalid_c_sequence)
206 |
207 | @raises(KrakenEncodeException)
208 | def test_m2o_encode_invalid(self):
209 | """
210 | Test correct handling of unencodable sequences (many-to-one encoder)
211 | """
212 | self.m2o_codec.encode(self.invalid_c_sequence)
213 |
214 | @raises(KrakenEncodeException)
215 | def test_o2m_encode_invalid(self):
216 | """
217 | Test correct handling of unencodable sequences (one-to-many encoder)
218 | """
219 | self.o2m_codec.encode(self.invalid_c_sequence)
220 |
221 | @raises(KrakenEncodeException)
222 | def test_m2m_encode_invalid(self):
223 | """
224 | Test correct handling of unencodable sequences (many-to-many encoder)
225 | """
226 | self.m2m_codec.encode(self.invalid_c_sequence)
227 |
--------------------------------------------------------------------------------