├── tests
    ├── __init__.py
    ├── test_opencv_utils.py
    ├── test_ocr.py
    ├── test_files.py
    └── test_grounding.py
├── MANIFEST.in
├── simpleocr
    ├── data
    │   ├── digits1.png
    │   ├── digits2.png
    │   ├── unicode1.png
    │   ├── unicode1.box
    │   ├── digits1.box
    │   └── digits2.box
    ├── pillow_utils.py
    ├── tesseract_utils.py
    ├── feature_extraction.py
    ├── numpy_utils.py
    ├── __init__.py
    ├── improver.py
    ├── classification.py
    ├── segmentation_filters.py
    ├── segmentation.py
    ├── ocr.py
    ├── opencv_utils.py
    ├── grounding.py
    ├── files.py
    ├── segmentation_aux.py
    └── processor.py
├── .travis.yml
├── example_grounding.py
├── examples
    ├── Readme.md
    └── OCRTraining.py
├── example.py
├── setup.py
├── .gitignore
├── README.md
└── LICENSE


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include AUTHORS
4 | recursive-include simpleocr data *
5 | 


--------------------------------------------------------------------------------
/simpleocr/data/digits1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitanat/simple-ocr-opencv/HEAD/simpleocr/data/digits1.png


--------------------------------------------------------------------------------
/simpleocr/data/digits2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitanat/simple-ocr-opencv/HEAD/simpleocr/data/digits2.png


--------------------------------------------------------------------------------
/simpleocr/data/unicode1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitanat/simple-ocr-opencv/HEAD/simpleocr/data/unicode1.png


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | required: sudo
 3 | python:
 4 |     "2.7"
 5 | virtualenv:
 6 |     system_site_packages: true
 7 | install:
 8 |   - sudo apt-get install python-opencv
 9 | script:
10 |   - python -m pip install .
11 |   - rm -R simpleocr
12 |   - python -m nose
13 | after_success:
14 |   - coverage run nosetests
15 |   - coveralls
16 | 


--------------------------------------------------------------------------------
/example_grounding.py:
--------------------------------------------------------------------------------
 1 | from simpleocr.files import open_image
 2 | from simpleocr.grounding import UserGrounder
 3 | from simpleocr.segmentation import ContourSegmenter
 4 | 
 5 | segmenter = ContourSegmenter(blur_y=5, blur_x=5, block_size=11, c=10)
 6 | new_image = open_image('digits1')
 7 | segments = segmenter.process(new_image.image)
 8 | 
 9 | grounder = UserGrounder()
10 | grounder.ground(new_image, segments)
11 | new_image.ground.write()
12 | 


--------------------------------------------------------------------------------
/examples/Readme.md:
--------------------------------------------------------------------------------
 1 | to begin this project the first thing is to install models neded:
 2 | first one is to import numpy, cv2 and os if work in ide like pycharm 
 3 | you can find it in setings exactly in python intrpreter or you do the instalation by 
 4 | executing:
 5 | python -m pip install numpy
 6 | pip install opencv-python
 7 | for now we can use this models just to detect and read images but for creating the neural network we need to 
 8 | install keras layers : pip install keras
 9 | and pip install matplotlib for matplitlib 
10 | the easy way is to use and ide for python and you can  find all the models and new version there 
11 | and also you can just write the code and run it and it will import your data and use it for recognate the images.


--------------------------------------------------------------------------------
/simpleocr/pillow_utils.py:
--------------------------------------------------------------------------------
 1 | from .files import Image
 2 | from PIL import Image
 3 | import numpy
 4 | import cv2
 5 | 
 6 | 
 7 | def image_to_pil(imagefile):
 8 |     """Convert an ImageFile or ImageBuffer object to a Pillow Image object
 9 |     :param imagefile: ImageFile object
10 |     :return: Image object
11 |     """
12 |     pillow = cv2.cvtColor(imagefile.image, cv2.COLOR_BGR2RGB)
13 |     return Image.fromarray(pillow)
14 | 
15 | 
16 | def pil_to_image(pillow):
17 |     """Convert a Pillow Image object to an ImageBuffer object"""
18 |     return Image.fromarray(pil_to_cv_array(pillow))
19 | 
20 | 
21 | def pil_to_cv_array(pillow):
22 |     """Convert a Pillow Image object to a cv compatible array"""
23 |     imagefile = numpy.array(pillow)
24 |     return imagefile[:, :, ::-1].copy()
25 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | from simpleocr.files import open_image
 2 | from simpleocr.segmentation import ContourSegmenter
 3 | from simpleocr.feature_extraction import SimpleFeatureExtractor
 4 | from simpleocr.classification import KNNClassifier
 5 | from simpleocr.ocr import OCR, accuracy, show_differences
 6 | 
 7 | segmenter = ContourSegmenter(blur_y=5, blur_x=5, block_size=11, c=10)
 8 | extractor = SimpleFeatureExtractor(feature_size=10, stretch=False)
 9 | classifier = KNNClassifier()
10 | ocr = OCR(segmenter, extractor, classifier)
11 | 
12 | ocr.train(open_image('digits1'))
13 | 
14 | test_image = open_image('digits2')
15 | test_chars, test_classes, test_segments = ocr.ocr(test_image, show_steps=True)
16 | 
17 | print("accuracy:", accuracy(test_image.ground.classes, test_classes))
18 | print("OCRed text:\n", test_chars)
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name="simpleocr",
 5 |     packages=["simpleocr"],
 6 |     version="0.1.0",
 7 |     description="A library for simple OCR in Python using OpenCV",
 8 |     author="The simple-ocr-opencv authors",
 9 |     url="https://www.github.com/goncalopp/simple-ocr-opencv",
10 |     download_url="https://www.github.com/goncalopp/simple-ocr-opencv/releases",
11 |     keywords=["OCR", "OpenCV"],
12 |     license="AGPL",
13 |     classifiers=["Programming Language :: Python :: 2.7",
14 |                  "Programming Language :: Python :: 3",
15 |                  "License :: OSI Approved :: GNU Affero General Public License v2 or later (AGPLv2+)"],
16 |     include_package_data=True,
17 |     package_data={"": ["simpleocr/data/*.box", "simpleocr/data/*.png"]},
18 |     install_requires=["six", "pillow", "numpy"]
19 | )
20 | 


--------------------------------------------------------------------------------
/simpleocr/tesseract_utils.py:
--------------------------------------------------------------------------------
 1 | from .classification import classes_from_numpy, classes_to_numpy
 2 | from .segmentation import segments_from_numpy, segments_to_numpy
 3 | import io
 4 | 
 5 | 
 6 | def read_boxfile(path):
 7 |     classes = []
 8 |     segments = []
 9 |     with io.open(path, encoding="utf-8") as f:
10 |         for line in f:
11 |             s = line.split(" ")
12 |             assert len(s) == 6
13 |             assert s[5] == '0\n'
14 |             classes.append(s[0])
15 |             segments.append(list(map(int, s[1:5])))
16 |     return classes_to_numpy(classes), segments_to_numpy(segments)
17 | 
18 | 
19 | def write_boxfile(path, classes, segments):
20 |     classes, segments = classes_from_numpy(classes), segments_from_numpy(segments)
21 |     with io.open(path, 'w') as f:
22 |         for c, s in zip(classes, segments):
23 |             f.write(c + ' ' + ' '.join(map(str, s)) + " 0\n")
24 | 


--------------------------------------------------------------------------------
/simpleocr/data/unicode1.box:
--------------------------------------------------------------------------------
 1 | ᚠ 10 11 11 27 0
 2 | ᛇ 26 11 13 27 0
 3 | ð 42 12 16 23 0
 4 | þ 60 11 16 31 0
 5 | η 78 18 15 24 0
 6 | γ 94 17 17 25 0
 7 | λ 111 11 17 25 0
 8 | σ 129 18 17 17 0
 9 | α 147 18 18 17 0
10 | д 165 18 19 22 0
11 | л 183 18 16 18 0
12 | ь 203 17 14 18 0
13 | г 9 50 13 19 0
14 | я 23 51 15 18 0
15 | ვ 39 50 14 25 0
16 | ე 53 50 14 25 0
17 | პ 67 42 14 27 0
18 | ი 81 50 14 19 0
19 | ს 96 42 14 27 0
20 | ய 110 53 20 16 0
21 | ç 132 51 14 23 0
22 | æ 147 51 26 17 0
23 | ɐ 176 51 15 17 0
24 | ɜ 192 51 13 17 0
25 | Կ 207 45 19 24 0
26 | ր 23 77 19 25 0
27 | ն 71 77 19 25 0
28 | ա 124 78 19 23 0
29 | մ 8 83 16 24 0
30 | ر 44 83 25 19 0
31 | ى 89 87 20 18 0
32 | එ 107 91 16 17 0
33 | ය 143 83 20 18 0
34 | න 164 84 25 18 0
35 | ດ 191 84 16 18 0
36 | ຍ 209 83 18 18 0
37 | 我 7 112 28 28 0
38 | 下 35 113 28 27 0
39 | 而 63 113 28 27 0
40 | 身 91 112 28 28 0
41 | ᕆ 147 114 15 24 0
42 | ᔭ 208 115 19 22 0
43 | ə 121 122 24 16 0
44 | œ 164 120 16 17 0
45 | € 181 120 27 17 0
46 | 


--------------------------------------------------------------------------------
/tests/test_opencv_utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from simpleocr import opencv_utils
 3 | from simpleocr.files import open_image
 4 | 
 5 | 
 6 | class TestOpenCVUtils(unittest.TestCase):
 7 |     def test_opencv_brightness_raise(self):
 8 |         image = open_image('digits1')
 9 |         processor = opencv_utils.BrightnessProcessor(brightness=2.0)
10 |         self.assertRaises(AssertionError, lambda: processor._process(image.image))
11 | 
12 |     def test_opencv_brightness(self):
13 |         image = open_image('digits1')
14 |         processor = opencv_utils.BrightnessProcessor(brightness=0.5)
15 |         processor._process(image.image)
16 |         # TODO: Add checking and try display() function
17 |         # TODO: Verify the result
18 | 
19 |     # TODO: Check other ImageProcessors
20 | 
21 |     def test_opencv_imageprocesser(self):
22 |         processor = opencv_utils.ImageProcessor()
23 |         self.assertRaises(NotImplementedError, lambda: processor._image_processing(object))
24 | 


--------------------------------------------------------------------------------
/tests/test_ocr.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from simpleocr.segmentation import ContourSegmenter
 3 | from simpleocr.feature_extraction import SimpleFeatureExtractor
 4 | from simpleocr.files import open_image
 5 | from simpleocr.classification import KNNClassifier
 6 | from simpleocr.ocr import OCR, reconstruct_chars
 7 | 
 8 | 
 9 | class TestOCR(unittest.TestCase):
10 |     def _test_ocr(self, train_file, test_file):
11 |         # get data from images
12 |         ground_truth = test_file.ground.classes
13 |         test_file.remove_ground()
14 |         # create OCR
15 |         segmenter = ContourSegmenter(blur_y=5, blur_x=5)
16 |         extractor = SimpleFeatureExtractor()
17 |         classifier = KNNClassifier()
18 |         ocr = OCR(segmenter, extractor, classifier)
19 |         # train and test
20 |         ocr.train(train_file)
21 |         chars, classes, _ = ocr.ocr(test_file, show_steps=False)
22 |         print(chars)
23 |         print(reconstruct_chars(ground_truth))
24 |         self.assertEqual(chars, reconstruct_chars(ground_truth))
25 |         self.assertEqual(list(classes), list(ground_truth))
26 | 
27 |     def test_ocr_digits(self):
28 |         self._test_ocr(open_image('digits1'), open_image('digits2'))
29 | 
30 |     def test_ocr_unicode(self):
31 |         self._test_ocr(open_image('unicode1'), open_image('unicode1'))
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | #PyCharm
104 | /.idea
105 | 


--------------------------------------------------------------------------------
/simpleocr/feature_extraction.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import cv2
 3 | from .segmentation import region_from_segment
 4 | from .opencv_utils import background_color
 5 | 
 6 | FEATURE_DATATYPE = numpy.float32
 7 | # FEATURE_SIZE is defined on the specific feature extractor instance
 8 | FEATURE_DIRECTION = 1  # horizontal - a COLUMN feature vector
 9 | FEATURES_DIRECTION = 0  # vertical - ROWS of feature vectors
10 | 
11 | 
12 | class FeatureExtractor(object):
13 |     """given a list of segments, returns a list of feature vectors"""
14 |     def extract(self, image, segments):
15 |         raise NotImplementedError()
16 | 
17 | 
18 | class SimpleFeatureExtractor(FeatureExtractor):
19 |     def __init__(self, feature_size=10, stretch=False):
20 |         self.feature_size = feature_size
21 |         self.stretch = stretch
22 | 
23 |     def extract(self, image, segments):
24 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
25 |         fs = self.feature_size
26 |         bg = background_color(image)
27 | 
28 |         regions = numpy.ndarray(shape=(0, fs), dtype=FEATURE_DATATYPE)
29 |         for segment in segments:
30 |             region = region_from_segment(image, segment)
31 |             if self.stretch:
32 |                 region = cv2.resize(region, (fs, fs))
33 |             else:
34 |                 x, y, w, h = segment
35 |                 proportion = float(min(h, w)) / max(w, h)
36 |                 new_size = (fs, int(fs * proportion)) if min(w, h) == h else (int(fs * proportion), fs)
37 |                 region = cv2.resize(region, new_size)
38 |                 s = region.shape
39 |                 newregion = numpy.ndarray((fs, fs), dtype=region.dtype)
40 |                 newregion[:, :] = bg
41 |                 newregion[:s[0], :s[1]] = region
42 |                 region = newregion
43 |             regions = numpy.append(regions, region, axis=0)
44 |         regions.shape = (len(segments), fs ** 2)
45 |         return regions
46 | 


--------------------------------------------------------------------------------
/tests/test_files.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | from PIL import Image as PillowImage
 4 | import simpleocr.files
 5 | from simpleocr.files import open_image
 6 | 
 7 | TEST_FILE = 'digits1'
 8 | TEST_FILE_EXT = 'digits1.png'
 9 | UNICODE_TEST_FILE = 'unicode1'
10 | 
11 | 
12 | class TestImageFile(unittest.TestCase):
13 |     def test_open_image(self):
14 |         # in data dir, no extension
15 |         open_image(TEST_FILE)
16 |         # in data dir, with extension
17 |         open_image(TEST_FILE_EXT)
18 |         # absolute path, no extension
19 |         data_dir = simpleocr.files.DATA_DIRECTORY
20 |         open_image(os.path.join(data_dir, TEST_FILE))
21 |         # absolute path, with extension
22 |         data_dir = simpleocr.files.DATA_DIRECTORY
23 |         open_image(os.path.join(data_dir, TEST_FILE_EXT))
24 |         #
25 |         data_dir_name = os.path.basename(data_dir)
26 |         old_cwd = os.getcwd()
27 |         os.chdir(os.path.dirname(data_dir)) # set cwd to one above data_dir
28 |         try:
29 |             # relative path, no extension
30 |             open_image(os.path.join(data_dir_name, TEST_FILE))
31 |             # relative path, with extension
32 |             open_image(os.path.join(data_dir_name, TEST_FILE_EXT))
33 |         finally:
34 |             os.chdir(old_cwd)
35 | 
36 |     def test_open_image_nonexistent(self):
37 |         with self.assertRaises(IOError):
38 |             open_image("inexistent")
39 | 
40 |     def test_ground(self):
41 |         imgf = open_image(TEST_FILE)
42 |         self.assertEqual(imgf.is_grounded, True)
43 |         imgf.set_ground(imgf.ground.segments, imgf.ground.classes, write_file=False)
44 |         self.assertEqual(imgf.is_grounded, True)
45 |         imgf.remove_ground(remove_file=False)
46 |         self.assertEqual(imgf.is_grounded, False)
47 | 
48 |     def test_ground_unicode(self):
49 |         imgf = open_image(UNICODE_TEST_FILE)
50 |         self.assertEqual(imgf.is_grounded, True)
51 |         imgf.set_ground(imgf.ground.segments, imgf.ground.classes, write_file=False)
52 |         self.assertEqual(imgf.is_grounded, True)
53 |         imgf.remove_ground(remove_file=False)
54 |         self.assertEqual(imgf.is_grounded, False)
55 | 


--------------------------------------------------------------------------------
/tests/test_grounding.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import mock
 3 | from simpleocr.files import open_image
 4 | from simpleocr.grounding import TextGrounder, TerminalGrounder, UserGrounder
 5 | from simpleocr.segmentation import ContourSegmenter
 6 | from simpleocr.ocr import reconstruct_chars
 7 | 
 8 | 
 9 | class TestGrounding(unittest.TestCase):
10 |     def setUp(self):
11 |         self.img = open_image('digits1')
12 |         self.img.remove_ground()
13 |         self.assertFalse(self.img.is_grounded)
14 |         self.segments = ContourSegmenter().process(self.img.image)
15 | 
16 |     def test_textgrounder(self):
17 |         grounder = TextGrounder()
18 |         characters = "0" * len(self.segments)
19 |         grounder.ground(self.img, self.segments, characters)
20 |         self.assertTrue(self.img.is_grounded)
21 |         self.assertEqual(reconstruct_chars(self.img.ground.classes), characters)
22 | 
23 |     def test_textgrounder_wrong_len(self):
24 |         grounder = TextGrounder()
25 |         characters = "0" * len(self.segments)
26 |         with self.assertRaises(ValueError):
27 |             grounder.ground(self.img, self.segments, characters[:-4])
28 |         self.assertFalse(self.img.is_grounded)
29 | 
30 |     def test_usergrounder(self):
31 |         ESC_KEY = 27
32 |         ZERO_KEY = 48
33 |         keys = [ZERO_KEY] * len(self.segments) + [ESC_KEY]
34 |         mock_generator = iter(keys)
35 | 
36 |         def mock_input(*args):
37 |             return next(mock_generator)
38 | 
39 |         grounder = UserGrounder()
40 |         with mock.patch('cv2.waitKey', mock_input):
41 |             with mock.patch('cv2.imshow'):
42 |                 grounder.ground(self.img, self.segments)
43 |         self.assertTrue(self.img.is_grounded)
44 |         self.assertEqual(reconstruct_chars(self.img.ground.classes), "0" * len(self.segments))
45 | 
46 |     def test_terminal_grounder(self):
47 |         terminal = TerminalGrounder()
48 |         characters = "0" * len(self.segments)
49 |         mock_input_gen = iter(characters)
50 | 
51 |         def mock_input(prompt):
52 |             return next(mock_input_gen)
53 | 
54 |         with mock.patch('six.moves.input', mock_input):
55 |             terminal.ground(self.img, self.segments)
56 | 
57 |         self.assertTrue(self.img.is_grounded)
58 |         self.assertEqual(reconstruct_chars(self.img.ground.classes), "0" * len(self.segments))
59 | 


--------------------------------------------------------------------------------
/simpleocr/numpy_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | 
 4 | class OverflowPreventer(object):
 5 |     """
 6 |     A context manager that exposes a numpy array preventing simple operations from overflowing
 7 |     Example:
 8 |     array= numpy.array( [255], dtype=numpy.uint8 )
 9 |     with OverflowPreventer( array ) as prevented:
10 |         prevented+=1
11 |     print array
12 |     """
13 | 
14 |     inverse_operator = {'__iadd__': '__sub__', '__isub__': '__add__', '__imul__': '__div__', '__idiv__': '__mul__'}
15 |     bypass_operators = ['__str__', '__repr__', '__getitem__']
16 | 
17 |     def __init__(self, matrix):
18 |         class CustomWrapper(object):
19 |             def __init__(self, matrix):
20 |                 assert matrix.dtype == numpy.uint8
21 |                 self.overflow_matrix = matrix
22 |                 self.overflow_lower_range = float(0)
23 |                 self.overflow_upper_range = float(2 ** 8 - 1)
24 |                 for op in OverflowPreventer.bypass_operators:
25 |                     setattr(CustomWrapper, op, getattr(self.overflow_matrix, op))
26 | 
27 |             def _overflow_operator(self, b, forward_operator):
28 |                 m, lr, ur = self.overflow_matrix, self.overflow_lower_range, self.overflow_upper_range
29 |                 assert type(b) in (int, float)
30 |                 reverse_operator = OverflowPreventer.inverse_operator[forward_operator]
31 |                 uro = getattr(ur, reverse_operator)
32 |                 lro = getattr(lr, reverse_operator)
33 |                 afo = getattr(m, forward_operator)
34 |                 overflows = m > uro(b)
35 |                 underflows = m < lro(b)
36 |                 afo(b)
37 |                 m[overflows] = ur
38 |                 m[underflows] = lr
39 |                 return self
40 | 
41 |             def __getattr__(self, attr):
42 |                 if hasattr(self.wrapped, attr):
43 |                     return getattr(self.wrapped, attr)
44 |                 else:
45 |                     raise AttributeError
46 | 
47 |         self.wrapper = CustomWrapper(matrix)
48 |         import functools
49 |         for op in OverflowPreventer.inverse_operator.keys():
50 |             setattr(CustomWrapper, op, functools.partial(self.wrapper._overflow_operator, forward_operator=op))
51 | 
52 |     def __enter__(self):
53 |         return self.wrapper
54 | 
55 |     def __exit__(self, type, value, tb):
56 |         pass
57 | 


--------------------------------------------------------------------------------
/simpleocr/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import cv2
 3 | except ImportError as e:
 4 |     import sys
 5 | 
 6 | 
 7 |     def is_python_3():
 8 |         return sys.version_info[0] == 3
 9 | 
10 |     # Valid values for sys.platform on Linux include "linux" and "linux2"
11 |     if "linux" in sys.platform:
12 |         if is_python_3():
13 |             print(
14 |                 "OpenCV-Python could not be imported. As your are running Linux and Python 3, you have the following "
15 |                 "options to install it:"
16 |                 "\n- Compile OpenCV-Python yourself"
17 |                 "\n- Install the unofficial \"opencv-python\" package from PyPI using pip"
18 |             )
19 |         else:
20 |             print(
21 |                 "OpenCV-Python could not be imported. As you are running Linux and Python 2, you have the following "
22 |                 "options to install it:"
23 |                 "\n- Compile OpenCV-Python yourself"
24 |                 "\n- Install the \"python-opencv\" package with your distro's package manager if it is available"
25 |                 "\n- Install the unofficial \"opencv-python\" package from PyPI using pip"
26 |             )
27 |     # The only valid value for Windows is "win32"
28 |     elif sys.platform == "win32":
29 |         print(
30 |             "OpenCV-Python could not be imported. As you are running Windows, you have the following options to "
31 |             "install it:"
32 |             "\n- Compile OpenCV-Python yourself"
33 |             "\n- Install the unofficial \"opencv-python\" package from PyPI using pip"
34 |         )
35 |     else:
36 |         print(
37 |             "OpenCV-Python could not be imported, but there are no installation instructions available for your OS."
38 |         )
39 |     raise
40 | 
41 | 
42 | # Classifiers
43 | from simpleocr.classification import KNNClassifier
44 | # Files
45 | from simpleocr.files import open_image, Image, ImageFile
46 | # Grounders
47 | from simpleocr.grounding import TerminalGrounder, TextGrounder, UserGrounder
48 | # Improver functions
49 | from simpleocr.improver import enhance_image, crop_image, image_to_pil
50 | # OCR functions
51 | from simpleocr.ocr import reconstruct_chars, show_differences, OCR
52 | # Segmenters
53 | from simpleocr.segmentation import RawContourSegmenter, ContourSegmenter
54 | # Extraction
55 | from simpleocr.feature_extraction import FeatureExtractor, SimpleFeatureExtractor
56 | # Pillow functions
57 | from simpleocr.pillow_utils import pil_to_image
58 | 


--------------------------------------------------------------------------------
/simpleocr/improver.py:
--------------------------------------------------------------------------------
 1 | from PIL import ImageEnhance, ImageOps
 2 | from .pillow_utils import image_to_pil, pil_to_cv_array
 3 | 
 4 | """
 5 | These functions are not suitable for use on images to be grounded and then trained, as the file on disk is not actually
 6 | modified. These functions are only to be used on ImageFile objects that are meant to be performed OCR on, nothing else.
 7 | These functions offer various improvement options to make the segmentation and classification of the segments in the
 8 | image easier. However, they are no miracle workers, images still need to be of decent quality and provide clear
 9 | characters to classify.
10 | """
11 | 
12 | 
13 | def enhance_image(imagefile, color=None, brightness=None, contrast=None, sharpness=None, invert=False):
14 |     """
15 |     Enhance an image to make the chance of success of performing OCR on it larger.
16 |     :param imagefile: ImageFile object
17 |     :param color: Color saturation increase, float
18 |     :param brightness: Brightness increase, float
19 |     :param contrast: Contrast increase, float
20 |     :param sharpness: Sharpness increase, float
21 |     :param invert: Invert the colors of the image, bool
22 |     :return: modified ImageFile object, with no changes written to the actual file
23 |     """
24 |     image = image_to_pil(imagefile)
25 |     if color is not None:
26 |         image = ImageEnhance.Color(image).enhance(color)
27 |     if brightness is not None:
28 |         image = ImageEnhance.Brightness(image).enhance(brightness)
29 |     if contrast is not None:
30 |         image = ImageEnhance.Contrast(image).enhance(contrast)
31 |     if sharpness is not None:
32 |         image = ImageEnhance.Sharpness(image).enhance(sharpness)
33 |     if invert:
34 |         image = ImageOps.invert(image)
35 |     imagefile.image = pil_to_cv_array(image)
36 |     return imagefile
37 | 
38 | 
39 | def crop_image(imagefile, box):
40 |     """
41 |     Crop an ImageFile object image to the box coordinates. This function is not suitable for use on images to be
42 |     grounded and then trained, as the file on disk is not actually modified.
43 |     :param imagefile: ImageFile object
44 |     :param box: (x, y, x, y) tuple
45 |     :return: modified ImageFile object
46 |     """
47 |     if not isinstance(box, tuple):
48 |         raise ValueError("The box parameter is not a tuple")
49 |     if not len(box) == 4:
50 |         raise ValueError("The box parameter does not have length 4")
51 |     image = image_to_pil(imagefile)
52 |     image.crop(box)
53 |     imagefile.image = pil_to_cv_array(image)
54 |     return imagefile
55 | 


--------------------------------------------------------------------------------
/simpleocr/data/digits1.box:
--------------------------------------------------------------------------------
  1 | 9 8 11 21 32 0
  2 | 8 32 11 21 32 0
  3 | 2 54 11 20 31 0
  4 | 1 80 10 13 32 0
  5 | 4 100 11 24 31 0
  6 | 8 125 11 21 32 0
  7 | 0 148 11 22 32 0
  8 | 8 172 11 21 32 0
  9 | 6 196 10 21 33 0
 10 | 5 219 11 20 32 0
 11 | 1 244 10 13 32 0
 12 | 3 265 11 20 32 0
 13 | 2 288 11 21 31 0
 14 | 8 313 11 21 32 0
 15 | 2 335 11 21 31 0
 16 | 3 359 11 20 32 0
 17 | 0 383 11 22 32 0
 18 | 6 407 10 21 33 0
 19 | 6 431 10 20 33 0
 20 | 4 452 11 23 31 0
 21 | 7 478 11 21 31 0
 22 | 0 500 11 22 32 0
 23 | 9 524 11 21 32 0
 24 | 3 546 11 20 32 0
 25 | 8 571 11 21 32 0
 26 | 4 7 62 23 31 0
 27 | 4 30 62 23 31 0
 28 | 6 56 61 20 33 0
 29 | 0 78 62 22 32 0
 30 | 9 102 62 21 32 0
 31 | 5 125 62 20 32 0
 32 | 5 148 62 20 32 0
 33 | 0 172 62 22 32 0
 34 | 5 195 62 20 32 0
 35 | 8 219 62 21 32 0
 36 | 2 241 62 21 31 0
 37 | 2 265 62 20 31 0
 38 | 3 288 62 20 32 0
 39 | 1 314 61 13 32 0
 40 | 7 337 62 21 31 0
 41 | 2 358 62 21 31 0
 42 | 5 383 62 20 32 0
 43 | 3 406 62 20 32 0
 44 | 5 430 62 20 32 0
 45 | 9 453 62 21 32 0
 46 | 4 476 62 23 31 0
 47 | 0 500 62 22 32 0
 48 | 8 524 62 21 32 0
 49 | 1 549 61 12 32 0
 50 | 2 569 62 21 31 0
 51 | 8 8 113 21 32 0
 52 | 4 30 113 23 31 0
 53 | 8 55 113 21 32 0
 54 | 1 80 112 13 32 0
 55 | 1 103 112 13 32 0
 56 | 1 127 112 13 32 0
 57 | 7 149 113 21 31 0
 58 | 4 171 113 23 31 0
 59 | 5 195 113 20 32 0
 60 | 0 219 112 22 33 0
 61 | 2 241 113 21 31 0
 62 | 8 266 113 21 32 0
 63 | 4 288 113 23 31 0
 64 | 1 314 112 13 32 0
 65 | 0 336 112 22 33 0
 66 | 2 358 113 21 31 0
 67 | 7 384 113 21 31 0
 68 | 0 406 113 22 32 0
 69 | 1 431 112 13 32 0
 70 | 9 453 113 21 32 0
 71 | 3 476 113 20 32 0
 72 | 8 500 113 22 32 0
 73 | 5 523 113 20 32 0
 74 | 2 546 113 21 31 0
 75 | 1 572 112 13 32 0
 76 | 1 10 163 12 32 0
 77 | 0 31 163 22 33 0
 78 | 5 55 164 20 32 0
 79 | 5 78 164 20 32 0
 80 | 5 101 164 20 32 0
 81 | 9 125 163 21 33 0
 82 | 6 149 163 21 33 0
 83 | 4 171 164 23 31 0
 84 | 4 194 164 23 31 0
 85 | 6 220 163 20 33 0
 86 | 2 241 164 21 31 0
 87 | 2 265 164 20 31 0
 88 | 9 289 163 21 33 0
 89 | 4 311 164 24 31 0
 90 | 8 336 164 21 32 0
 91 | 9 360 163 21 33 0
 92 | 5 383 164 20 32 0
 93 | 4 405 164 23 31 0
 94 | 9 430 163 21 33 0
 95 | 3 452 163 20 33 0
 96 | 0 476 163 23 33 0
 97 | 3 499 163 20 33 0
 98 | 8 524 163 21 33 0
 99 | 1 549 163 12 32 0
100 | 9 571 163 20 33 0
101 | 6 9 214 20 33 0
102 | 4 30 215 23 31 0
103 | 4 54 215 23 31 0
104 | 2 77 214 21 32 0
105 | 8 102 214 21 33 0
106 | 8 125 214 22 33 0
107 | 1 150 214 13 32 0
108 | 0 172 214 22 33 0
109 | 9 196 214 20 33 0
110 | 7 220 215 21 31 0
111 | 5 242 215 20 32 0
112 | 6 267 214 20 33 0
113 | 6 290 214 21 33 0
114 | 5 312 215 20 32 0
115 | 9 336 214 21 33 0
116 | 3 359 214 20 33 0
117 | 3 382 214 20 33 0
118 | 4 405 215 23 31 0
119 | 4 429 215 23 31 0
120 | 6 454 214 21 33 0
121 | 1 478 214 13 32 0
122 | 2 499 214 21 32 0
123 | 8 524 214 21 33 0
124 | 4 546 215 23 31 0
125 | 7 571 215 21 31 0
126 | 


--------------------------------------------------------------------------------
/simpleocr/classification.py:
--------------------------------------------------------------------------------
 1 | from .feature_extraction import FEATURE_DATATYPE
 2 | import numpy
 3 | import cv2
 4 | from .opencv_utils import get_opencv_version
 5 | from six import unichr
 6 | 
 7 | CLASS_DATATYPE = numpy.uint16
 8 | CLASS_SIZE = 1
 9 | CLASSES_DIRECTION = 0  # vertical - a classes COLUMN
10 | 
11 | BLANK_CLASS = unichr(35)  # marks unclassified elements
12 | 
13 | 
14 | def classes_to_numpy(classes):
15 |     """given a list of unicode chars, transforms it into a numpy array"""
16 |     import array
17 |     # utf-32 starts with constant ''\xff\xfe\x00\x00', then has little endian 32 bits chars
18 |     # this assumes little endian architecture!
19 |     assert unichr(15).encode('utf-32') == b'\xff\xfe\x00\x00\x0f\x00\x00\x00'
20 |     assert array.array("I").itemsize == 4
21 |     int_classes = array.array("I", "".join(classes).encode('utf-32')[4:])
22 |     assert len(int_classes) == len(classes)
23 |     classes = numpy.array(int_classes, dtype=CLASS_DATATYPE, ndmin=2)  # each class in a column. numpy is strange :(
24 |     classes = classes if CLASSES_DIRECTION == 1 else numpy.transpose(classes)
25 |     return classes
26 | 
27 | 
28 | def classes_from_numpy(classes):
29 |     """reverses classes_to_numpy"""
30 |     classes = classes if CLASSES_DIRECTION == 0 else classes.tranpose()
31 |     classes = list(map(unichr, classes))
32 |     return classes
33 | 
34 | 
35 | class Classifier(object):
36 |     def train(self, features, classes):
37 |         """trains the classifier with the classified feature vectors"""
38 |         raise NotImplementedError()
39 | 
40 |     @staticmethod
41 |     def _filter_unclassified(features, classes):
42 |         classified = (classes != classes_to_numpy(BLANK_CLASS)).reshape(-1)
43 |         return features[classified], classes[classified]
44 | 
45 |     def classify(self, features):
46 |         """returns the classes of the feature vectors"""
47 |         raise NotImplementedError
48 | 
49 | 
50 | class KNNClassifier(Classifier):
51 |     def __init__(self, k=1, debug=False):
52 |         if get_opencv_version() >= 3:
53 |             self.knn = cv2.ml.KNearest_create()
54 |         else:
55 |             self.knn = cv2.KNearest()
56 |         self.k = k
57 |         self.debug = debug
58 | 
59 |     def train(self, features, classes):
60 |         if FEATURE_DATATYPE != numpy.float32:
61 |             features = numpy.asarray(features, dtype=numpy.float32)
62 |         if CLASS_DATATYPE != numpy.float32:
63 |             classes = numpy.asarray(classes, dtype=numpy.float32)
64 |         features, classes = Classifier._filter_unclassified(features, classes)
65 |         if get_opencv_version() >= 3:
66 |             self.knn.train(features, cv2.ml.ROW_SAMPLE, classes)
67 |         else:
68 |             self.knn.train(features, classes)
69 | 
70 |     def classify(self, features):
71 |         if FEATURE_DATATYPE != numpy.float32:
72 |             features = numpy.asarray(features, dtype=numpy.float32)
73 |         if get_opencv_version() >= 3:
74 |             retval, result_classes, neigh_resp, dists = self.knn.findNearest(features, k=1)
75 |         else:
76 |             retval, result_classes, neigh_resp, dists = self.knn.find_nearest(features, k=1)
77 |         return result_classes
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Simple Python OCR
 2 | [![Build Status](https://travis-ci.org/goncalopp/simple-ocr-opencv.svg?branch=master)](https://travis-ci.org/goncalopp/simple-ocr-opencv)
 3 | 
 4 | A simple pythonic OCR engine using opencv and numpy.
 5 | 
 6 | Originally inspired by [this stackoverflow question](http://stackoverflow.com/questions/9413216/simple-digit-recognition-ocr-in-opencv-python)
 7 | 
 8 | ### Essential Concepts
 9 | 
10 | #### Segmentation
11 | 
12 | In order for OCR to be performed on a image, several steps must be 
13 | performed on the source image. Segmentation is the process of 
14 | identifying the regions of the image that represent characters. 
15 | 
16 | This project uses rectangles to model segments. 
17 | 
18 | #### Supervised learning with a classification problem
19 | 
20 | The [classification problem][] consists in identifying to which class a 
21 | observation belongs to (i.e.: which particular character is contained 
22 | in a segment).
23 | 
24 | [Supervised learning][] is a way of "teaching" a machine. Basically, an 
25 | algorithm is *trained* through *examples* (i.e.: this particular 
26 | segment contains the character `f`). After training, the machine 
27 | should be able to apply its acquired knowledge to new data.
28 | 
29 | The [k-NN algorithm], used in this project, is one of the simplest  
30 | classification algorithm.
31 | 
32 | #### Grounding
33 | 
34 | Creating a example image with already classified characters, for 
35 | training purposes.
36 | See [ground truth][].
37 | 
38 | [classification problem]: https://en.wikipedia.org/wiki/Statistical_classification
39 | [Supervised learning]: https://en.wikipedia.org/wiki/Supervised_learning
40 | [k-NN algorithm]: https://en.wikipedia.org/wiki/K-nearest_neighbors_classification
41 | [ground truth]: https://en.wikipedia.org/wiki/Ground_truth
42 | 
43 | #### How to understand this project
44 | 
45 | Unfortunately, documentation is a bit sparse at the moment (I 
46 | gladly accept contributions).
47 | The project is well-structured, and most classes and functions have 
48 | docstrings, so that's probably a good way to start.
49 | 
50 | If you need any help, don't hesitate to contact me. You can find my 
51 | email on my github profile.
52 | 
53 | 
54 | #### How to use
55 | 
56 | Please check `example.py` for basic usage with the existing pre-grounded images.
57 | 
58 | You can use your own images, by placing them on the `data` directory. 
59 | Grounding images interactively can be accomplished by using `grounding.UserGrounder`.
60 | For more details check `example_grounding.py`
61 | 
62 | #### Copyright and notices
63 | 
64 | This project is available under the [GNU AGPLv3 License](https://www.gnu.org/licenses/agpl-3.0.txt), a copy
65 | should be available in LICENSE. If not, check out the link to learn more.
66 |  
67 |     Copyright (C) 2012-2017 by the simple-ocr-opencv authors
68 |     All authors are the copyright owners of their respective additions
69 | 
70 |     This program is free software: you can redistribute it and/or modify
71 |     it under the terms of the GNU AGPLv3 License, as found in LICENSE.
72 | 
73 |     This program is distributed in the hope that it will be useful,
74 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
75 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
76 |     GNU General Public License for more details.
77 | 
78 |     You should have received a copy of the GNU Affero General Public License
79 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.    
80 | 


--------------------------------------------------------------------------------
/simpleocr/segmentation_filters.py:
--------------------------------------------------------------------------------
 1 | from .opencv_utils import show_image_and_wait_for_key, BrightnessProcessor, draw_segments
 2 | from .segmentation_aux import contained_segments_matrix, LineFinder, guess_segments_lines
 3 | from .processor import DisplayingProcessor, create_broadcast
 4 | import numpy
 5 | 
 6 | 
 7 | def create_default_filter_stack():
 8 |     stack = [LargeFilter(), SmallFilter(), LargeAreaFilter(), ContainedFilter(), LineFinder(), NearLineFilter()]
 9 |     stack[4].add_poshook(create_broadcast("lines_topmiddlebottoms", stack[5]))
10 |     return stack
11 | 
12 | 
13 | class Filter(DisplayingProcessor):
14 |     """A filter processes given segments, returning only the desirable ones"""
15 | 
16 |     PARAMETERS = DisplayingProcessor.PARAMETERS
17 | 
18 |     def display(self, display_before=False):
19 |         """shows the effect of this filter"""
20 |         try:
21 |             copy = self.image.copy()
22 |         except AttributeError:
23 |             raise Exception("You need to set the Filter.image attribute for displaying")
24 |         copy = BrightnessProcessor(brightness=0.6).process(copy)
25 |         s, g = self._input, self.good_segments_indexes
26 |         draw_segments(copy, s[g], (0, 255, 0))
27 |         draw_segments(copy, s[True ^ g], (0, 0, 255))
28 |         show_image_and_wait_for_key(copy, "segments filtered by " + self.__class__.__name__)
29 | 
30 |     def _good_segments(self, segments):
31 |         raise NotImplementedError
32 | 
33 |     def _process(self, segments):
34 |         good = self._good_segments(segments)
35 |         self.good_segments_indexes = good
36 |         segments = segments[good]
37 |         if not len(segments):
38 |             raise Exception("0 segments after filter " + self.__class__.__name__)
39 |         return segments
40 | 
41 | 
42 | class LargeFilter(Filter):
43 |     """desirable segments are larger than some width or height"""
44 |     PARAMETERS = Filter.PARAMETERS + {"min_width": 4, "min_height": 8}
45 | 
46 |     def _good_segments(self, segments):
47 |         good_width = segments[:, 2] >= self.min_width
48 |         good_height = segments[:, 3] >= self.min_height
49 |         return good_width * good_height  # AND
50 | 
51 | 
52 | class SmallFilter(Filter):
53 |     """desirable segments are smaller than some width or height"""
54 |     PARAMETERS = Filter.PARAMETERS + {"max_width": 30, "max_height": 50}
55 | 
56 |     def _good_segments(self, segments):
57 |         good_width = segments[:, 2] <= self.max_width
58 |         good_height = segments[:, 3] <= self.max_height
59 |         return good_width * good_height  # AND
60 | 
61 | 
62 | class LargeAreaFilter(Filter):
63 |     """desirable segments' area is larger than some"""
64 |     PARAMETERS = Filter.PARAMETERS + {"min_area": 45}
65 | 
66 |     def _good_segments(self, segments):
67 |         return (segments[:, 2] * segments[:, 3]) >= self.min_area
68 | 
69 | 
70 | class ContainedFilter(Filter):
71 |     """desirable segments are not contained by any other"""
72 | 
73 |     def _good_segments(self, segments):
74 |         m = contained_segments_matrix(segments)
75 |         return True ^ numpy.max(m, axis=1)
76 | 
77 | 
78 | class NearLineFilter(Filter):
79 |     PARAMETERS = Filter.PARAMETERS + {"nearline_tolerance": 5.0}  # percentage distance stddev
80 |     '''desirable segments have their y near a line'''
81 | 
82 |     def _good_segments(self, segments):
83 |         lines = guess_segments_lines(segments, self.lines_topmiddlebottoms, nearline_tolerance=self.nearline_tolerance)
84 |         good = lines != -1
85 |         return good
86 | 


--------------------------------------------------------------------------------
/simpleocr/segmentation.py:
--------------------------------------------------------------------------------
 1 | from .opencv_utils import show_image_and_wait_for_key, draw_segments, BlurProcessor, get_opencv_version
 2 | from .processor import DisplayingProcessor, DisplayingProcessorStack, create_broadcast
 3 | from .segmentation_aux import SegmentOrderer
 4 | from .segmentation_filters import create_default_filter_stack
 5 | import numpy
 6 | import cv2
 7 | 
 8 | SEGMENT_DATATYPE = numpy.uint16
 9 | SEGMENT_SIZE = 4
10 | SEGMENTS_DIRECTION = 0  # vertical axis in numpy
11 | 
12 | 
13 | def segments_from_numpy(segments):
14 |     """reverses segments_to_numpy"""
15 |     segments = segments if SEGMENTS_DIRECTION == 0 else segments.tranpose()
16 |     segments = [map(int, s) for s in segments]
17 |     return segments
18 | 
19 | 
20 | def segments_to_numpy(segments):
21 |     """given a list of 4-element tuples, transforms it into a numpy array"""
22 |     segments = numpy.array(segments, dtype=SEGMENT_DATATYPE, ndmin=2)  # each segment in a row
23 |     segments = segments if SEGMENTS_DIRECTION == 0 else numpy.transpose(segments)
24 |     return segments
25 | 
26 | 
27 | def region_from_segment(image, segment):
28 |     """given a segment (rectangle) and an image, returns it's corresponding subimage"""
29 |     x, y, w, h = segment
30 |     return image[y:y + h, x:x + w]
31 | 
32 | 
33 | class RawSegmenter(DisplayingProcessor):
34 |     """A image segmenter. input is image, output is segments"""
35 | 
36 |     def _segment(self, image):
37 |         """segments an opencv image for OCR. returns list of 4-element tuples (x,y,width, height)."""
38 |         # return segments
39 |         raise NotImplementedError()
40 | 
41 |     def _process(self, image):
42 |         segments = self._segment(image)
43 |         self.image, self.segments = image, segments
44 |         return segments
45 | 
46 | 
47 | class FullSegmenter(DisplayingProcessorStack):
48 |     pass
49 | 
50 | 
51 | class RawContourSegmenter(RawSegmenter):
52 |     PARAMETERS = RawSegmenter.PARAMETERS + {"block_size": 11, "c": 10}
53 | 
54 |     def _segment(self, image):
55 |         self.image = image
56 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
57 |         image = cv2.adaptiveThreshold(image, maxValue=255, adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
58 |                                       thresholdType=cv2.THRESH_BINARY, blockSize=self.block_size, C=self.c)
59 |         if get_opencv_version() == 3:
60 |             _, contours, hierarchy = cv2.findContours(image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
61 |         else:
62 |             contours, hierarchy = cv2.findContours(image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
63 |         segments = segments_to_numpy([cv2.boundingRect(c) for c in contours])
64 |         self.contours, self.hierarchy = contours, hierarchy  # store, may be needed for debugging
65 |         return segments
66 | 
67 |     def display(self, display_before=False):
68 |         copy = self.image.copy()
69 |         if display_before:
70 |             show_image_and_wait_for_key(copy, "image before segmentation")
71 |         copy.fill(255)
72 |         cv2.drawContours(copy, self.contours, contourIdx=-1, color=(0, 0, 0))
73 |         show_image_and_wait_for_key(copy, "ContourSegmenter contours")
74 |         copy = self.image.copy()
75 |         draw_segments(copy, self.segments)
76 |         show_image_and_wait_for_key(copy, "image after segmentation by " + self.__class__.__name__)
77 | 
78 | 
79 | class ContourSegmenter(FullSegmenter):
80 |     def __init__(self, **args):
81 |         filters = create_default_filter_stack()
82 |         stack = [BlurProcessor(), RawContourSegmenter()] + filters + [SegmentOrderer()]
83 |         FullSegmenter.__init__(self, stack, **args)
84 |         stack[0].add_prehook(create_broadcast("_input", filters, "image"))
85 | 


--------------------------------------------------------------------------------
/simpleocr/ocr.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from .opencv_utils import show_image_and_wait_for_key, draw_segments
 3 | from . import segmentation as segmenters
 4 | from . import classification as classifiers
 5 | from . import feature_extraction as extractors
 6 | from . import grounding as grounders
 7 | from .files import open_image, Image
 8 | from six import unichr
 9 | 
10 | SEGMENTERS = {
11 |     "contour": segmenters.ContourSegmenter,
12 |     "raw": segmenters.RawSegmenter,
13 |     "rawcontour": segmenters.RawContourSegmenter,
14 | }
15 | EXTRACTORS = {"simple": extractors.SimpleFeatureExtractor}
16 | CLASSIFIERS = {"knn": classifiers.KNNClassifier}
17 | GROUNDERS = {"user": grounders.UserGrounder, "text": grounders.TextGrounder}
18 | 
19 | 
20 | def show_differences(image, segments, ground_classes, result_classes):
21 |     image = image.copy()
22 |     good = (ground_classes == result_classes)
23 |     good.shape = (len(good),)  # transform nx1 matrix into vector
24 |     draw_segments(image, segments[good, :], (0, 255, 0))
25 |     draw_segments(image, segments[numpy.logical_not(good), :], (0, 0, 255))
26 |     show_image_and_wait_for_key(image, "differences")
27 | 
28 | 
29 | def reconstruct_chars(classes):
30 |     result_string = "".join(map(unichr, classes))
31 |     return result_string
32 | 
33 | 
34 | def accuracy(expected, result):
35 |     if expected.shape != result.shape:
36 |         raise Exception("expected " + str(expected.shape) + ", got " + str(result.shape))
37 |     correct = expected == result
38 |     return float(numpy.count_nonzero(correct)) / correct.shape[0]
39 | 
40 | 
41 | def get_instance_from(x, class_dict, default_key):
42 |     """Gets a instance of a class, given a class dict and x.
43 |     X can be either a instance (already), the key to the dict, or None.
44 |     If x is None, class_dict[default_key] will be instanciated"""
45 |     k = x or default_key
46 |     cls = class_dict.get(k)
47 |     instance = cls() if cls else x
48 |     return instance
49 | 
50 | 
51 | class OCR(object):
52 |     def __init__(self, segmenter=None, extractor=None, classifier=None, grounder=None):
53 |         self.segmenter = get_instance_from(segmenter, SEGMENTERS, "contour")
54 |         self.extractor = get_instance_from(extractor, EXTRACTORS, "simple")
55 |         self.classifier = get_instance_from(classifier, CLASSIFIERS, "knn")
56 |         self.grounder = get_instance_from(grounder, GROUNDERS, "text")
57 | 
58 |     def train(self, image_file):
59 |         """feeds the training data to the OCR"""
60 |         if not isinstance(image_file, Image):
61 |             image_file = open_image(image_file)
62 |         if not image_file.is_grounded:
63 |             raise Exception("The provided file is not grounded")
64 |         features = self.extractor.extract(image_file.image, image_file.ground.segments)
65 |         self.classifier.train(features, image_file.ground.classes)
66 | 
67 |     def ocr(self, image_file, show_steps=False):
68 |         """performs ocr used trained classifier"""
69 |         if not isinstance(image_file, Image):
70 |             image_file = open_image(image_file)
71 |         segments = self.segmenter.process(image_file.image)
72 |         if show_steps:
73 |             self.segmenter.display()
74 |         features = self.extractor.extract(image_file.image, segments)
75 |         classes = self.classifier.classify(features)
76 |         chars = reconstruct_chars(classes)
77 |         return chars, classes, segments
78 | 
79 |     def ground(self, image_file, text=None):
80 |         """
81 |         Ground an image file for use in the OCR object.
82 |         :param image_file: The name of the image file or an ImageFile object
83 |         :param text: The text, if self.grounder is a TextGrounder (defaults to None)
84 |         :return:
85 |         """
86 |         if not isinstance(image_file, Image):
87 |             image_file = open_image(image_file)
88 |         segments = self.segmenter.process(image_file.image)
89 |         if isinstance(self.grounder, grounders.TextGrounder):
90 |             if not text:
91 |                 raise ValueError("Trying to ground file with TextGrounder without specifying text argument.")
92 |             self.grounder.ground(image_file, segments, text)
93 |         else:
94 |             self.grounder.ground(image_file, segments)
95 |         image_file.ground.write()  # save to file
96 | 


--------------------------------------------------------------------------------
/simpleocr/opencv_utils.py:
--------------------------------------------------------------------------------
  1 | from .numpy_utils import OverflowPreventer
  2 | from .processor import DisplayingProcessor
  3 | import numpy
  4 | import cv2
  5 | 
  6 | 
  7 | class ImageProcessor(DisplayingProcessor):
  8 |     def display(self, display_before=True):
  9 |         if display_before:
 10 |             show_image_and_wait_for_key(self._input, "before " + self.__class__.__name__)
 11 |         show_image_and_wait_for_key(self._output, "after " + self.__class__.__name__)
 12 | 
 13 |     def _process(self, image):
 14 |         return self._image_processing(image)
 15 | 
 16 |     def _image_processing(self, image):
 17 |         raise NotImplementedError(str(self.__class__))
 18 | 
 19 | 
 20 | class BrightnessProcessor(ImageProcessor):
 21 |     """
 22 |     changes image brightness.
 23 |     A brightness of -1 will make the image all black;
 24 |     one of 1 will make the image all white
 25 |     """
 26 | 
 27 |     PARAMETERS = ImageProcessor.PARAMETERS + {"brightness": 0.0}
 28 | 
 29 |     def _image_processing(self, image):
 30 |         b = self.brightness
 31 |         assert image.dtype == numpy.uint8
 32 |         assert -1 <= b <= 1
 33 |         image = image.copy()
 34 |         with OverflowPreventer(image) as img:
 35 |             img += int(b * 256)
 36 |         return image
 37 | 
 38 | 
 39 | class ContrastProcessor(ImageProcessor):
 40 |     """changes image contrast. a scale of 1 will make no changes"""
 41 |     PARAMETERS = ImageProcessor.PARAMETERS + {"scale": 1.0, "center": 0.5}
 42 | 
 43 |     def _image_processing(self, image):
 44 |         assert image.dtype == numpy.uint8
 45 |         image = image.copy()
 46 |         s, c = self.scale, self.center
 47 |         c = int(c * 256)
 48 |         with OverflowPreventer(image) as img:
 49 |             if s <= 1:
 50 |                 img *= s
 51 |                 img += int(c * (1 - s))
 52 |             else:
 53 |                 img -= c * (1 - 1 / s)
 54 |                 img *= s
 55 |         return image
 56 | 
 57 | 
 58 | class BlurProcessor(ImageProcessor):
 59 |     """changes image contrast. a scale of 1 will make no changes"""
 60 |     PARAMETERS = ImageProcessor.PARAMETERS + {"blur_x": 0, "blur_y": 0}
 61 | 
 62 |     def _image_processing(self, image):
 63 |         assert image.dtype == numpy.uint8
 64 |         image = image.copy()
 65 |         x, y = self.blur_x, self.blur_y
 66 |         if x or y:
 67 |             x += (x + 1) % 2  # opencv needs a
 68 |             y += (y + 1) % 2  # odd number...
 69 |             image = cv2.GaussianBlur(image, (x, y), 0)
 70 |         return image
 71 | 
 72 | 
 73 | def ask_for_key(return_arrow_keys=True):
 74 |     key = 128
 75 |     while key > 127:
 76 |         key = cv2.waitKey(0)
 77 |         if return_arrow_keys:
 78 |             if key in (65362, 65364, 65361, 65363):  # up, down, left, right
 79 |                 return key
 80 |         key %= 256
 81 |     return key
 82 | 
 83 | 
 84 | def background_color(image, numpy_result=True):
 85 |     result = numpy.median(numpy.median(image, 0), 0).astype(numpy.int)
 86 |     if not numpy_result:
 87 |         try:
 88 |             result = tuple(map(int, result))
 89 |         except TypeError:
 90 |             result = (int(result),)
 91 |     return result
 92 | 
 93 | 
 94 | def show_image_and_wait_for_key(image, name="Image"):
 95 |     """
 96 |     Shows an image, outputting name. keygroups is a dictionary of keycodes to functions;
 97 |     they are executed when the corresponding keycode is pressed
 98 |     """
 99 | 
100 |     print("showing", name, "(waiting for input)")
101 |     cv2.imshow('norm', image)
102 |     return ask_for_key()
103 | 
104 | 
105 | def draw_segments(image, segments, color=(255, 0, 0), line_width=1):
106 |     """draws segments on image"""
107 |     for segment in segments:
108 |         x, y, w, h = segment
109 |         cv2.rectangle(image, (x, y), (x + w, y + h), color, line_width)
110 | 
111 | 
112 | def draw_lines(image, ys, color=(255, 0, 0), line_width=1):
113 |     """draws horizontal lines"""
114 |     for y in ys:
115 |         cv2.line(image, (0, int(y)), (image.shape[1], int(y)), color, line_width)
116 | 
117 | 
118 | def draw_classes(image, segments, classes):
119 |     assert len(segments) == len(classes)
120 |     for s, c in zip(segments, classes):
121 |         x, y, w, h = s
122 |         cv2.putText(image, c, (x, y), 0, 0.5, (128, 128, 128))
123 | 
124 | 
125 | def get_opencv_version():
126 |     """
127 |     Return the OpenCV version by checking cv2.__version__
128 |     :return: int
129 |     """
130 |     return int(cv2.__version__.split(".")[0])
131 | 
132 | 


--------------------------------------------------------------------------------
/simpleocr/grounding.py:
--------------------------------------------------------------------------------
  1 | """various classes for establishing ground truth"""
  2 | 
  3 | from .classification import classes_to_numpy, classes_from_numpy, BLANK_CLASS
  4 | from .opencv_utils import show_image_and_wait_for_key, draw_segments, draw_classes
  5 | import numpy
  6 | import string
  7 | from six import text_type, unichr, moves
  8 | 
  9 | NOT_A_SEGMENT = unichr(10)
 10 | 
 11 | 
 12 | class Grounder(object):
 13 |     def ground(self, imagefile, segments, external_data):
 14 |         """given an ImageFile, grounds it, through arbitrary data (better defined in subclasses)"""
 15 |         raise NotImplementedError()
 16 | 
 17 | 
 18 | class TerminalGrounder(Grounder):
 19 |     """
 20 |     Labels by using raw_input() to capture a character each line
 21 |     """
 22 | 
 23 |     def ground(self, imagefile, segments, _=None):
 24 |         classes = []
 25 |         character = ""
 26 |         print("Found %s segments to ground." % len(segments))
 27 |         print("Type 'exit' to stop grounding the file.")
 28 |         print("Type ' ' for anything that is not a character.")
 29 |         print("Grounding will exit automatically after all segments.")
 30 |         print("Going back to a previous segment is not possible at this time.")
 31 |         for num in range(len(segments)):
 32 |             while len(character) != 1:
 33 |                 character = moves.input("Please enter the value for segment #%s:  " % (num+1))
 34 |                 if character == "exit":
 35 |                     break
 36 |                 if len(character) != 1:
 37 |                     print("That is not a single character. Please try again.")
 38 |             if character == " ":
 39 |                 classes.append(NOT_A_SEGMENT)
 40 |             else:
 41 |                 classes.append(character)
 42 |             character = ""
 43 |         classes = classes_to_numpy(classes)
 44 |         imagefile.set_ground(segments, classes)
 45 | 
 46 | 
 47 | class TextGrounder(Grounder):
 48 |     """labels from a string"""
 49 | 
 50 |     def ground(self, imagefile, segments, text):
 51 |         """tries to grounds from a simple string"""
 52 |         text = text_type(text)
 53 |         text = [c for c in text if c in string.ascii_letters + string.digits]
 54 |         if len(segments) != len(text):
 55 |             raise ValueError("segments/text length mismatch")
 56 |         classes = classes_to_numpy(text)
 57 |         imagefile.set_ground(segments, classes)
 58 | 
 59 | 
 60 | class UserGrounder(Grounder):
 61 |     """labels by interactively asking the user"""
 62 | 
 63 |     def ground(self, imagefile, segments, _=None):
 64 |         """asks the user to label each segment as either a character or "<" for unknown"""
 65 |         print("For each shown segment, please write the character that it represents, or spacebar if it's not a "
 66 |               "character. To undo a classification, press backspace. Press ESC when completed, arrow keys to move")
 67 |         i = 0
 68 |         if imagefile.is_grounded:
 69 |             classes = classes_from_numpy(imagefile.ground.classes)
 70 |             segments = imagefile.ground.segments
 71 |         else:
 72 |             classes = [BLANK_CLASS] * len(segments)
 73 |         done = False
 74 |         allowed_chars = list(map(ord, string.digits + string.ascii_letters + string.punctuation))
 75 |         while not done:
 76 |             image = imagefile.image.copy()
 77 |             draw_segments(image, [segments[i]])
 78 |             draw_classes(image, segments, classes)
 79 |             key = show_image_and_wait_for_key(image, "segment " + str(i))
 80 |             if key == 27:  # ESC
 81 |                 break
 82 |             elif key == 8:  # backspace
 83 |                 classes[i] = BLANK_CLASS
 84 |                 i += 1
 85 |             elif key == 32:  # space
 86 |                 classes[i] = NOT_A_SEGMENT
 87 |                 i += 1
 88 |             elif key in (81, 65361):  # <-
 89 |                 i -= 1
 90 |             elif key in (83, 65363):  # ->
 91 |                 i += 1
 92 |             elif key in allowed_chars:
 93 |                 classes[i] = unichr(key)
 94 |                 i += 1
 95 |             if i >= len(classes):
 96 |                 i = 0
 97 |             if i < 0:
 98 |                 i = len(classes) - 1
 99 | 
100 |         classes = numpy.array(classes)
101 |         is_segment = classes != NOT_A_SEGMENT
102 |         classes = classes[is_segment]
103 |         segments = segments[is_segment]
104 |         classes = list(classes)
105 | 
106 |         classes = classes_to_numpy(classes)
107 |         print("classified ", numpy.count_nonzero(classes != classes_to_numpy(BLANK_CLASS)), "characters out of", max(
108 |             classes.shape))
109 |         imagefile.set_ground(segments, classes)
110 | 


--------------------------------------------------------------------------------
/simpleocr/data/digits2.box:
--------------------------------------------------------------------------------
  1 | 3 7 6 20 32 0
  2 | 1 33 6 13 31 0
  3 | 4 53 6 24 31 0
  4 | 1 80 6 13 31 0
  5 | 5 101 6 20 32 0
  6 | 9 125 6 21 32 0
  7 | 2 147 6 21 31 0
  8 | 6 173 5 21 33 0
  9 | 5 195 6 20 32 0
 10 | 3 218 6 20 32 0
 11 | 5 242 6 20 32 0
 12 | 8 266 6 21 32 0
 13 | 9 289 6 21 32 0
 14 | 7 314 6 20 31 0
 15 | 9 336 6 21 32 0
 16 | 3 359 6 20 32 0
 17 | 2 382 6 21 31 0
 18 | 3 406 6 20 32 0
 19 | 8 430 6 21 32 0
 20 | 4 452 6 23 31 0
 21 | 6 478 5 20 33 0
 22 | 2 499 6 21 31 0
 23 | 6 524 5 21 33 0
 24 | 4 546 6 23 31 0
 25 | 3 570 6 20 32 0
 26 | 3 7 57 20 32 0
 27 | 8 32 57 21 32 0
 28 | 3 54 57 20 32 0
 29 | 2 77 57 21 31 0
 30 | 7 103 57 21 31 0
 31 | 9 125 57 21 32 0
 32 | 5 148 57 20 32 0
 33 | 0 172 57 22 32 0
 34 | 2 194 57 21 31 0
 35 | 8 219 57 21 32 0
 36 | 8 243 57 21 32 0
 37 | 4 265 57 23 31 0
 38 | 1 291 56 13 32 0
 39 | 9 313 57 21 32 0
 40 | 7 337 57 21 31 0
 41 | 1 361 57 13 31 0
 42 | 6 384 56 20 33 0
 43 | 9 406 57 21 32 0
 44 | 3 429 57 20 32 0
 45 | 9 453 57 21 32 0
 46 | 9 477 57 21 32 0
 47 | 3 499 57 20 32 0
 48 | 7 524 57 21 31 0
 49 | 5 547 57 20 32 0
 50 | 1 572 57 13 31 0
 51 | 0 8 108 22 32 0
 52 | 5 31 108 20 32 0
 53 | 8 55 108 21 32 0
 54 | 2 77 108 21 31 0
 55 | 0 101 108 23 32 0
 56 | 9 125 108 21 32 0
 57 | 7 149 108 21 31 0
 58 | 4 171 108 23 31 0
 59 | 9 196 108 20 32 0
 60 | 4 218 108 23 31 0
 61 | 4 241 108 23 31 0
 62 | 5 265 108 20 32 0
 63 | 9 289 108 21 32 0
 64 | 2 311 108 21 31 0
 65 | 3 335 108 20 32 0
 66 | 0 359 108 22 32 0
 67 | 7 384 108 21 31 0
 68 | 8 407 108 21 32 0
 69 | 1 431 107 13 32 0
 70 | 6 454 107 21 33 0
 71 | 4 476 108 23 31 0
 72 | 0 500 108 22 32 0
 73 | 6 524 107 21 33 0
 74 | 2 546 108 21 31 0
 75 | 8 571 108 21 32 0
 76 | 6 9 158 20 33 0
 77 | 2 30 159 21 31 0
 78 | 0 55 159 22 32 0
 79 | 8 79 159 21 32 0
 80 | 9 102 159 21 32 0
 81 | 9 125 159 21 32 0
 82 | 8 149 159 21 32 0
 83 | 6 173 158 21 33 0
 84 | 2 194 159 21 31 0
 85 | 8 219 159 21 32 0
 86 | 0 242 159 22 32 0
 87 | 3 265 159 20 32 0
 88 | 4 288 159 23 31 0
 89 | 8 313 159 21 32 0
 90 | 2 335 159 21 31 0
 91 | 5 359 159 20 32 0
 92 | 3 382 159 20 32 0
 93 | 4 405 159 23 31 0
 94 | 2 429 159 20 31 0
 95 | 1 455 158 13 32 0
 96 | 1 478 158 13 32 0
 97 | 7 501 159 21 31 0
 98 | 0 523 159 22 32 0
 99 | 6 548 158 21 33 0
100 | 7 571 159 21 31 0
101 | 9 8 210 21 32 0
102 | 8 32 210 21 32 0
103 | 2 54 210 20 31 0
104 | 1 80 209 13 32 0
105 | 4 100 210 24 31 0
106 | 8 125 210 21 32 0
107 | 0 148 210 22 32 0
108 | 8 172 210 21 32 0
109 | 6 196 209 21 33 0
110 | 5 219 210 20 32 0
111 | 1 244 209 13 32 0
112 | 3 265 210 20 32 0
113 | 2 288 210 21 31 0
114 | 8 313 210 21 32 0
115 | 2 335 210 21 31 0
116 | 3 359 210 20 32 0
117 | 0 383 210 22 32 0
118 | 6 407 209 21 33 0
119 | 6 431 209 20 33 0
120 | 4 452 210 23 31 0
121 | 7 478 210 21 31 0
122 | 0 500 210 22 32 0
123 | 9 524 210 21 32 0
124 | 3 546 210 20 32 0
125 | 8 571 210 21 32 0
126 | 4 7 261 23 31 0
127 | 4 30 261 23 31 0
128 | 6 56 260 20 33 0
129 | 0 78 261 22 32 0
130 | 9 102 261 21 32 0
131 | 5 125 261 20 32 0
132 | 5 148 261 20 32 0
133 | 0 172 261 22 32 0
134 | 5 195 261 20 32 0
135 | 8 219 261 21 32 0
136 | 2 241 261 21 31 0
137 | 2 265 261 20 31 0
138 | 3 288 261 20 32 0
139 | 1 314 260 13 32 0
140 | 7 337 261 21 31 0
141 | 2 358 261 21 31 0
142 | 5 383 261 20 32 0
143 | 3 406 261 20 32 0
144 | 5 430 261 20 32 0
145 | 9 453 261 21 32 0
146 | 4 476 261 23 31 0
147 | 0 500 261 22 32 0
148 | 8 524 261 21 32 0
149 | 1 549 260 12 32 0
150 | 2 569 261 21 31 0
151 | 8 8 312 21 32 0
152 | 4 30 312 23 31 0
153 | 8 55 312 21 32 0
154 | 1 80 311 13 32 0
155 | 1 103 311 13 32 0
156 | 1 127 311 13 32 0
157 | 7 149 312 21 31 0
158 | 4 171 312 23 31 0
159 | 5 195 312 20 32 0
160 | 0 219 311 22 33 0
161 | 2 241 312 21 31 0
162 | 8 266 312 21 32 0
163 | 4 288 312 23 31 0
164 | 1 314 311 13 32 0
165 | 0 336 311 22 33 0
166 | 2 358 312 21 31 0
167 | 7 384 312 21 31 0
168 | 0 406 312 22 32 0
169 | 1 431 311 13 32 0
170 | 9 453 312 21 32 0
171 | 3 476 312 20 32 0
172 | 8 500 312 22 32 0
173 | 5 523 312 20 32 0
174 | 2 546 312 21 31 0
175 | 1 572 311 13 32 0
176 | 1 10 362 12 32 0
177 | 0 31 362 22 33 0
178 | 5 55 363 20 32 0
179 | 5 78 363 20 32 0
180 | 5 101 363 20 32 0
181 | 9 125 362 21 33 0
182 | 6 149 362 21 33 0
183 | 4 171 363 23 31 0
184 | 4 194 363 23 31 0
185 | 6 220 362 20 33 0
186 | 2 241 363 21 31 0
187 | 2 265 363 20 31 0
188 | 9 289 362 21 33 0
189 | 4 311 363 24 31 0
190 | 8 336 363 21 32 0
191 | 9 360 362 21 33 0
192 | 5 383 363 20 32 0
193 | 4 405 363 23 31 0
194 | 9 430 362 21 33 0
195 | 3 452 362 20 33 0
196 | 0 476 362 23 33 0
197 | 3 499 362 20 33 0
198 | 8 524 362 21 33 0
199 | 1 549 362 12 32 0
200 | 9 571 362 20 33 0
201 | 6 9 413 20 33 0
202 | 4 30 414 23 31 0
203 | 4 54 414 23 31 0
204 | 2 77 413 21 32 0
205 | 8 102 413 21 33 0
206 | 8 125 413 22 33 0
207 | 1 150 413 13 32 0
208 | 0 172 413 22 33 0
209 | 9 196 413 20 33 0
210 | 7 220 414 21 31 0
211 | 5 242 414 20 32 0
212 | 6 267 413 20 33 0
213 | 6 290 413 21 33 0
214 | 5 312 414 20 32 0
215 | 9 336 413 21 33 0
216 | 3 359 413 20 33 0
217 | 3 382 413 20 33 0
218 | 4 405 414 23 31 0
219 | 4 429 414 23 31 0
220 | 6 454 413 21 33 0
221 | 1 478 413 13 32 0
222 | 2 499 413 21 32 0
223 | 8 524 413 21 33 0
224 | 4 546 414 23 31 0
225 | 7 571 414 21 31 0
226 | 


--------------------------------------------------------------------------------
/simpleocr/files.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pkg_resources import resource_filename
  3 | import cv2
  4 | from .tesseract_utils import read_boxfile, write_boxfile
  5 | 
  6 | IMAGE_EXTENSIONS = ['.png', '.tif', '.jpg', '.jpeg']
  7 | DATA_DIRECTORY = resource_filename("simpleocr", "data")
  8 | GROUND_EXTENSIONS = ['.box']
  9 | GROUND_EXTENSIONS_DEFAULT = GROUND_EXTENSIONS[0]
 10 | 
 11 | 
 12 | def try_extensions(extensions, path):
 13 |     """Checks for various extensions of a path exist if the extension is appended"""
 14 |     for ext in [""] + extensions:
 15 |         if os.path.exists(path + ext):
 16 |             return path + ext
 17 |     return None
 18 | 
 19 | 
 20 | def open_image(path):
 21 |     return ImageFile(get_file_path(path))
 22 | 
 23 | 
 24 | def get_file_path(path, ground=False):
 25 |     """Get the absolute path for an image or ground file.
 26 |     The path can be either absolute, relative to the CWD or relative to the
 27 |     DATA_DIRECTORY. The file extension may be omitted.
 28 |     :param path: image path (str)
 29 |     :param ground: whether the file must be a ground file
 30 |     :return: The absolute path to the file requested
 31 |     """
 32 |     extensions = GROUND_EXTENSIONS if ground else IMAGE_EXTENSIONS
 33 |     # If the path exists, return the path, but make sure it's an absolute path first
 34 |     if os.path.exists(path):
 35 |         return os.path.abspath(path)
 36 |     # Try to find the file with the passed path with the various extensions
 37 |     image_with_extension = try_extensions(extensions, os.path.splitext(path)[0])
 38 |     if image_with_extension:
 39 |         return os.path.abspath(image_with_extension)
 40 |     # The file must be in the data directory if it has not yet been found
 41 |     image_datadir = try_extensions(extensions, os.path.join(DATA_DIRECTORY, path))
 42 |     if image_datadir:
 43 |         return os.path.abspath(image_datadir)
 44 |     raise IOError # file not found
 45 | 
 46 | 
 47 | class Ground(object):
 48 |     """Data class that includes labeled characters of an Image and their positions"""
 49 |     def __init__(self, segments, classes):
 50 |         self.segments = segments
 51 |         self.classes = classes
 52 | 
 53 | 
 54 | class GroundFile(Ground):
 55 |     """Ground with file support. This class can write the data
 56 |     to a box file so it can be restored when the image file the ground data belongs
 57 |     to is opened again.
 58 |     """
 59 |     def __init__(self, path, segments, classes):
 60 |         Ground.__init__(self, segments, classes)
 61 |         self.path = path
 62 | 
 63 |     def read(self):
 64 |         """Update the ground data stored by reading the box file from disk"""
 65 |         self.classes, self.segments = read_boxfile(self.path)
 66 | 
 67 |     def write(self):
 68 |         """Write a new box file to disk containing the stored ground data"""
 69 |         write_boxfile(self.path, self.classes, self.segments)
 70 | 
 71 | 
 72 | class Image(object):
 73 |     """An image stored in memory. It optionally contains a Ground"""
 74 |     def __init__(self, array):
 75 |         """:param array: array with image data, must be OpenCV compatible
 76 |         """
 77 |         self._image = array
 78 |         self._ground = None
 79 | 
 80 |     def set_ground(self, segments, classes):
 81 |         """Creates the ground data"""
 82 |         self._ground = Ground(segments=segments, classes=classes)
 83 | 
 84 |     def remove_ground(self):
 85 |         """Removes the grounding data for the Image"""
 86 |         self._ground = None
 87 | 
 88 |     # These properties prevent the user from altering the attributes stored within
 89 |     # the object and thus emphasize the immutability of the object
 90 |     @property
 91 |     def image(self):
 92 |         return self._image
 93 | 
 94 |     @property
 95 |     def is_grounded(self):
 96 |         return not (self._ground is None)
 97 | 
 98 |     @property
 99 |     def ground(self):
100 |         return self._ground
101 | 
102 | 
103 | class ImageFile(Image):
104 |     """
105 |     Complete class that contains functions for creation from file.
106 |     Also supports grounding in memory.
107 |     """
108 |     def __init__(self, path):
109 |         """
110 |         :param path: path to the image to read, must be valid and absolute
111 |         """
112 |         if not os.path.isabs(path):
113 |             raise ValueError("path value is not absolute: {0}".format(path))
114 |         array = cv2.imread(path)
115 |         Image.__init__(self, array)
116 |         self._path = path
117 |         basepath = os.path.splitext(path)[0]
118 |         self._ground_path = try_extensions(GROUND_EXTENSIONS, basepath)
119 |         if self._ground_path:
120 |             self._ground = GroundFile(self._ground_path, None, None)
121 |             self._ground.read()
122 |         else:
123 |             self._ground_path = basepath + GROUND_EXTENSIONS_DEFAULT
124 |             self._ground = None
125 | 
126 |     def set_ground(self, segments, classes, write_file=False):
127 |         """Creates the ground, saves it to a file"""
128 |         self._ground = GroundFile(self._ground_path, segments=segments, classes=classes)
129 |         if write_file:
130 |             self.ground.write()
131 | 
132 |     def remove_ground(self, remove_file=False):
133 |         """Removes ground, optionally deleting it's file"""
134 |         self._ground = None
135 |         if remove_file:
136 |             os.remove(self._ground_path)
137 | 
138 |     @property
139 |     def path(self):
140 |         return self._path
141 | 
142 |     @property
143 |     def ground_path(self):
144 |         return self._ground_path
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/examples/OCRTraining.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os
  4 | from keras.layers import Dense, Flatten
  5 | from keras.layers import Dropout
  6 | from keras.layers.convolutional import Conv2D,MaxPooling2D
  7 | from keras.layers.core import flatten
  8 | from keras.models import Sequential
  9 | from keras.optimizer_v1 import Adam
 10 | from sklearn.model_selection import train_test_split
 11 | import matplotlib.pyplot as plt
 12 | from keras.preprocessing.image import ImageDataGenerator
 13 | from keras.utils.np_utils import to_categorical
 14 | 
 15 | 
 16 | 
 17 | 
 18 | import pickle
 19 | 
 20 | 
 21 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 22 | 
 23 | path = 'myData'
 24 | testRatio  = 0.2
 25 | valRation = 0.2
 26 | imageDimensions = (32,32,3)
 27 | 
 28 | batchSizeVal = 50
 29 | epochsVal = 1
 30 | stepsPerEpoch = 2000
 31 | 
 32 | 
 33 | count = 0
 34 | images = []
 35 | classNo = []
 36 | myList = os.listdir(path)
 37 | print("total No of classes detected",len(myList))
 38 | noOfclasses = len(myList)
 39 | print("importing classes")
 40 | for x in range(0,noOfclasses):
 41 |     myPicliste = os.listdir(path+"/"+str(x))
 42 |     for y in myPicliste:
 43 |         curImg = cv2.imread(path+"/"+str(x)+"/"+y)
 44 |         curImg = cv2.resize(curImg,(imageDimensions[0],imageDimensions[1]))
 45 |         images.append(curImg)
 46 |         classNo.append(x)
 47 |     print(x,end= " ")
 48 | print(" ")
 49 | 
 50 | 
 51 | images = np.array(images)
 52 | classNo = np.array(classNo)
 53 | 
 54 | #print(images.shape)
 55 | #print(classNo.shape)
 56 | 
 57 | ##### spliting the data ###
 58 | 
 59 | 
 60 | X_train,X_test,y_train,y_test = train_test_split(images,classNo,test_size = testRatio )
 61 | X_train,X_validation,y_train,y_validation = train_test_split(X_train,y_train,test_size = valRation )
 62 | print(X_train.shape)
 63 | print(X_test.shape)
 64 | print(X_validation.shape)
 65 | 
 66 | 
 67 | numOfSamples = []
 68 | for x in range(0,noOfclasses):
 69 |     #print(len(np.where(y_train == 0)[0]))
 70 |     numOfSamples.append(len(np.where(y_train == 0)[0]))
 71 | print(numOfSamples)
 72 | 
 73 | plt.figure(figsize=(10,5))
 74 | plt.bar(range(0,noOfclasses),numOfSamples)
 75 | plt.title("NO of images for each class")
 76 | plt.xlabel("class ID")
 77 | plt.ylabel("number of images")
 78 | plt.show()
 79 | print(X_train[0].shape)
 80 | 
 81 | 
 82 | def preProcessing(img):
 83 |     img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
 84 |     img = cv2.equalizeHist(img)
 85 |     img = img/255
 86 |     return img
 87 | #img = preProcessing(X_train[0])
 88 | #img = cv2.resize(img,(300,300))
 89 | #cv2.imshow("preProcessed",img)
 90 | #cv2.waitKey(0)
 91 | 
 92 | 
 93 | X_train = np.array(list(map(preProcessing,X_train)))
 94 | X_test = np.array(list(map(preProcessing,X_test)))
 95 | X_validation = np.array(list(map(preProcessing,X_validation)))
 96 | 
 97 | X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],X_train.shape[2],1)
 98 | X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],X_test.shape[2],1)
 99 | X_validation = X_validation.reshape(X_validation.shape[0],X_validation.shape[1],X_validation.shape[2],1)
100 | 
101 | dataGen = ImageDataGenerator(width_shift_range=0.1,
102 |                              height_shift_range=0.1,
103 |                              zoom_range=0.2,
104 |                              shear_range=0.1,
105 |                              rotation_range=10)
106 | dataGen.fit(X_train)
107 | 
108 | y_train = to_categorical(y_train,noOfclasses)
109 | y_test = to_categorical(y_test,noOfclasses)
110 | y_validation = to_categorical(y_validation,noOfclasses)
111 | 
112 | 
113 | 
114 | 
115 | def myModel():
116 |     noOfFilters = 60
117 |     sizeOfFilters1 = (5,5)
118 |     sizeOfFilters2 = (3,3)
119 |     sizeofPool = (2,2)
120 |     noOfNode = 500
121 | 
122 |     model = Sequential()
123 |     model.add((Conv2D(noOfFilters,sizeOfFilters1,input_shape=(imageDimensions[0],
124 |                                                               imageDimensions[1],
125 |                                                               1),activation='relu',
126 |                                                                 )))
127 |     model.add((Conv2D(noOfFilters,sizeOfFilters1,activation='relu')))
128 |     model.add(MaxPooling2D(pool_size = sizeofPool))
129 |     model.add((Conv2D(noOfFilters//2,sizeOfFilters2,activation='relu')))
130 |     model.add((Conv2D(noOfFilters//2,sizeOfFilters2,activation='relu')))
131 |     model.add(MaxPooling2D(pool_size=sizeofPool))
132 |     model.add(Dropout(0.5))
133 | 
134 | 
135 |     model.add(Flatten())
136 |     model.add(Dense(noOfNode,activation = 'relu'))
137 |     model.add(Dropout(0.5))
138 |     model.add(Dense(noOfclasses,activation = 'softmax' ))
139 |     model.compile(Adam(learning_rate=0.001),loss = 'categorical_crossentropy',
140 |                   metrics= ['accuracy'])
141 |     return model
142 | 
143 | model = myModel()
144 | print(model.summary())
145 | 
146 | 
147 | 
148 | history = model.fit_generator(dataGen.flow(X_train,y_train,
149 |                                  batch_size=batchSizeVal),
150 |                                  steps_per_epoch=stepsPerEpoch,
151 |                                  epochs=epochsVal,
152 |                                  validation_data=(X_validation,y_validation),
153 |                                  shuffle =1 )
154 | 
155 | plt.figure(1)
156 | plt.plot(history.history['loss'])
157 | plt.plot(history.history['val_loss'])
158 | plt.legend(['training', 'validation'])
159 | plt.title('loss')
160 | plt.xlabel('epoch')
161 | 
162 | plt.figure(2)
163 | plt.plot(history.history['accuracy'])
164 | plt.plot(history.history['val_accuracy'])
165 | plt.legend(['training', 'validation'])
166 | plt.title('Accuracy')
167 | plt.xlabel('epoch')
168 | plt.show()
169 | score = model.evaluate(X_test,y_test,verbose=0)
170 | print('test score = ',score[0])
171 | print('test Accuracy = ',score[1])
172 | 
173 | pickle_out = open("model_trained.p","wb")
174 | pickle.dump(model,pickle_out)
175 | pickle_out.close()
176 | 
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/simpleocr/segmentation_aux.py:
--------------------------------------------------------------------------------
  1 | from .processor import Processor, DisplayingProcessor
  2 | from .opencv_utils import draw_lines, show_image_and_wait_for_key
  3 | import numpy
  4 | import cv2
  5 | from functools import reduce
  6 | 
  7 | 
  8 | class SegmentOrderer(Processor):
  9 |     PARAMETERS = Processor.PARAMETERS + {"max_line_height": 20, "max_line_width": 10000}
 10 | 
 11 |     def _process(self, segments):
 12 |         """sort segments in read order - left to right, up to down"""
 13 |         # sort_f= lambda r: max_line_width*(r[1]/max_line_height)+r[0]
 14 |         # segments= sorted(segments, key=sort_f)
 15 |         # segments= segments_to_numpy( segments )
 16 |         # return segments
 17 |         mlh, mlw = self.max_line_height, self.max_line_width
 18 |         s = segments.astype(numpy.uint32)  # prevent overflows
 19 |         order = mlw * (s[:, 1] // mlh) + s[:, 0]
 20 |         sort_order = numpy.argsort(order)
 21 |         return segments[sort_order]
 22 | 
 23 | 
 24 | class LineFinder(DisplayingProcessor):
 25 |     @staticmethod
 26 |     def _guess_lines(ys, max_lines=50, confidence_minimum=0.0):
 27 |         """guesses and returns text inter-line distance, number of lines, y_position of first line"""
 28 |         ys = ys.astype(numpy.float32)
 29 |         compactness_list, means_list, diffs, deviations = [], [], [], []
 30 |         start_n = 1
 31 |         for k in range(start_n, min(len(ys), max_lines)):
 32 |             compactness, classified_points, means = cv2.kmeans(data=ys, K=k, bestLabels=None, criteria=(
 33 |             cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_MAX_ITER, 1, 10), attempts=2, flags=cv2.KMEANS_PP_CENTERS)
 34 |             means = numpy.sort(means, axis=0)
 35 |             means_list.append(means)
 36 |             compactness_list.append(compactness)
 37 |             if k < 3:
 38 |                 tmp1 = [1, 2, 500, 550]  # forge data for bad clusters
 39 |             else:
 40 |                 # calculate the center of each cluster. Assuming lines are equally spaced...
 41 |                 tmp1 = numpy.diff(means, axis=0)  # diff will be equal or very similar
 42 |             tmp2 = numpy.std(tmp1) / numpy.mean(means)  # so variance is minimal
 43 |             tmp3 = numpy.sum((tmp1 - numpy.mean(tmp1)) ** 2)  # root mean square deviation, more sensitive than std
 44 |             diffs.append(tmp1)
 45 |             deviations.append(tmp3)
 46 | 
 47 |         compactness_list = numpy.diff(
 48 |             numpy.log(numpy.array(compactness_list) + 0.01))  # sum small amount to avoid log(0)
 49 |         deviations = numpy.array(deviations[1:])
 50 |         deviations[0] = numpy.mean(deviations[1:])
 51 |         compactness_list = (compactness_list - numpy.mean(compactness_list)) / numpy.std(compactness_list)
 52 |         deviations = (deviations - numpy.mean(deviations)) / numpy.std(deviations)
 53 |         aglomerated_metric = 0.1 * compactness_list + 0.9 * deviations
 54 | 
 55 |         i = numpy.argmin(aglomerated_metric) + 1
 56 |         lines = means_list[i]
 57 | 
 58 |         # calculate confidence
 59 |         betterness = numpy.sort(aglomerated_metric, axis=0)
 60 |         confidence = (betterness[1] - betterness[0]) / (betterness[2] - betterness[1])
 61 |         if confidence < confidence_minimum:
 62 |             raise Exception("low confidence")
 63 |         return lines  # still floating points
 64 | 
 65 |     def _process(self, segments):
 66 |         segment_tops = segments[:, 1]
 67 |         segment_bottoms = segment_tops + segments[:, 3]
 68 |         tops = self._guess_lines(segment_tops)
 69 |         bottoms = self._guess_lines(segment_bottoms)
 70 |         if len(tops) != len(bottoms):
 71 |             raise Exception("different number of lines")
 72 |         middles = (tops + bottoms) / 2
 73 |         topbottoms = numpy.sort(numpy.append(tops, bottoms))
 74 |         topmiddlebottoms = numpy.sort(reduce(numpy.append, (tops, middles, bottoms)))
 75 |         self.lines_tops = tops
 76 |         self.lines_bottoms = bottoms
 77 |         self.lines_topbottoms = topbottoms
 78 |         self.lines_topmiddlebottoms = topmiddlebottoms
 79 |         return segments
 80 | 
 81 |     def display(self, display_before=False):
 82 |         copy = self.image.copy()
 83 |         draw_lines(copy, self.lines_tops, (0, 0, 255))
 84 |         draw_lines(copy, self.lines_bottoms, (0, 255, 0))
 85 |         show_image_and_wait_for_key(copy, "line starts and ends")
 86 | 
 87 | 
 88 | def guess_segments_lines(segments, lines, nearline_tolerance=5.0):
 89 |     """
 90 |     given segments, outputs a array of line numbers, or -1 if it
 91 |     doesn't belong to any
 92 |     """
 93 |     ys = segments[:, 1]
 94 |     closeness = numpy.abs(numpy.subtract.outer(ys, lines))  # each row a y, each collumn a distance to each line
 95 |     line_of_y = numpy.argmin(closeness, axis=1)
 96 |     distance = numpy.min(closeness, axis=1)
 97 |     bad = distance > numpy.mean(distance) + nearline_tolerance * numpy.std(distance)
 98 |     line_of_y[bad] = -1
 99 |     return line_of_y
100 | 
101 | 
102 | def contained_segments_matrix(segments):
103 |     """
104 |     givens a n*n matrix m, n=len(segments), in which m[i,j] means
105 |     segments[i] is contained inside segments[j]
106 |     """
107 |     x1, y1 = segments[:, 0], segments[:, 1]
108 |     x2, y2 = x1 + segments[:, 2], y1 + segments[:, 3]
109 |     n = len(segments)
110 | 
111 |     x1so, x2so, y1so, y2so = list(map(numpy.argsort, (x1, x2, y1, y2)))
112 |     x1soi, x2soi, y1soi, y2soi = list(map(numpy.argsort, (x1so, x2so, y1so, y2so)))  # inverse transformations
113 |     # let rows be x1 and collumns be x2. this array represents where x1<x2
114 |     o1 = numpy.triu(numpy.ones((n, n)), k=1).astype(bool)
115 |     # let rows be x1 and collumns be x2. this array represents where x1>x2
116 |     o2 = numpy.tril(numpy.ones((n, n)), k=0).astype(bool)
117 |     a_inside_b_x = o2[x1soi][:, x1soi] * o1[x2soi][:, x2soi]  # (x1[a]>x1[b] and x2[a]<x2[b])
118 |     a_inside_b_y = o2[y1soi][:, y1soi] * o1[y2soi][:, y2soi]  # (y1[a]>y1[b] and y2[a]<y2[b])
119 |     a_inside_b = a_inside_b_x * a_inside_b_y
120 |     return a_inside_b
121 | 


--------------------------------------------------------------------------------
/simpleocr/processor.py:
--------------------------------------------------------------------------------
  1 | def _same_type(a, b):
  2 |     type_correct = False
  3 |     if type(a) == type(b):
  4 |         type_correct = True
  5 |     try:
  6 |         if isinstance(a, b):
  7 |             type_correct = True
  8 |     except TypeError:  # v may not be a class or type, but an int, a string, etc
  9 |         pass
 10 |     return type_correct
 11 | 
 12 | 
 13 | def _broadcast(src_processor, src_atr_name, dest_processors, dest_atr_name, transform_function):
 14 |     """
 15 |     To be used exclusively by create_broadcast.
 16 |     A broadcast function gets an attribute on the src_processor and
 17 |     sets it (possibly under a different name) on dest_processors
 18 |     """
 19 |     value = getattr(src_processor, src_atr_name)
 20 |     value = transform_function(value)
 21 |     for d in dest_processors:
 22 |         setattr(d, dest_atr_name, value)
 23 | 
 24 | 
 25 | def create_broadcast(src_atr_name, dest_processors, dest_atr_name=None, transform_function=lambda x: x):
 26 |     """
 27 |     This method creates a function, intended to be called as a
 28 |     Processor posthook, that copies some of the processor's attributes
 29 |     to other processors
 30 |     """
 31 |     from functools import partial
 32 |     if dest_atr_name == None:
 33 |         dest_atr_name = src_atr_name
 34 |     if not hasattr(dest_processors, "__iter__"):  # a single processor was given instead
 35 |         dest_processors = [dest_processors]
 36 |     return partial(_broadcast, src_atr_name=src_atr_name, dest_processors=dest_processors, dest_atr_name=dest_atr_name,
 37 |                    transform_function=transform_function)
 38 | 
 39 | 
 40 | class Parameters(dict):
 41 |     def __add__(self, other):
 42 |         d3 = Parameters()
 43 |         d3.update(self)
 44 |         d3.update(other)
 45 |         return d3
 46 | 
 47 | 
 48 | class Processor(object):
 49 |     """
 50 |     In goes something, out goes another. Processor.process() models
 51 |     the behaviour of a function, where there are some stored parameters
 52 |     in the Processor instance. Further, it optionally calls arbitrary
 53 |     functions before and after processing (prehooks, posthooks)
 54 |     """
 55 | 
 56 |     PARAMETERS = Parameters()
 57 | 
 58 |     def __init__(self, **args):
 59 |         """sets default parameters"""
 60 |         for k, v in self.PARAMETERS.items():
 61 |             setattr(self, k, v)
 62 |         self.set_parameters(**args)
 63 |         self._prehooks = []  # functions (on input) to be executed before processing
 64 |         self._poshooks = []  # functions (on output) to be executed after processing
 65 | 
 66 |     def get_parameters(self):
 67 |         """returns a dictionary with the processor's stored parameters"""
 68 |         parameter_names = self.PARAMETERS.keys()
 69 |         # TODO: Unresolved reference for processor
 70 |         parameter_values = [getattr(processor, n) for n in parameter_names]
 71 |         return dict(zip(parameter_names, parameter_values))
 72 | 
 73 |     def set_parameters(self, **args):
 74 |         """sets the processor stored parameters"""
 75 |         for k, v in self.PARAMETERS.items():
 76 |             new_value = args.get(k)
 77 |             if new_value != None:
 78 |                 if not _same_type(new_value, v):
 79 |                     raise Exception(
 80 |                         "On processor {0}, argument {1} takes something like {2}, but {3} was given".format(self, k, v,
 81 |                                                                                                             new_value))
 82 |                 setattr(self, k, new_value)
 83 |         not_used = set(args.keys()).difference(set(self.PARAMETERS.keys()))
 84 |         not_given = set(self.PARAMETERS.keys()).difference(set(args.keys()))
 85 |         return not_used, not_given
 86 | 
 87 |     def _process(self, arguments):
 88 |         raise NotImplementedError(str(self.__class__) + "." + "_process")
 89 | 
 90 |     def add_prehook(self, prehook_function):
 91 |         self._prehooks.append(prehook_function)
 92 | 
 93 |     def add_poshook(self, poshook_function):
 94 |         self._poshooks.append(poshook_function)
 95 | 
 96 |     def process(self, arguments):
 97 |         self._input = arguments
 98 |         for prehook in self._prehooks:
 99 |             prehook(self)
100 |         output = self._process(arguments)
101 |         self._output = output
102 |         for poshook in self._poshooks:
103 |             poshook(self)
104 |         return output
105 | 
106 | 
107 | class DisplayingProcessor(Processor):
108 |     def display(self, display_before=False):
109 |         """
110 |         Show the last effect this processor had - on a GUI, for
111 |         example. If show_before is True, show the "state before
112 |         processor"
113 |         """
114 |         raise NotImplementedError
115 | 
116 | 
117 | class ProcessorStack(Processor):
118 |     """a stack of processors. Each processor's output is fed to the next"""
119 | 
120 |     def __init__(self, processor_instances=[], **args):
121 |         self.set_processor_stack(processor_instances)
122 |         Processor.__init__(self, **args)
123 | 
124 |     def set_processor_stack(self, processor_instances):
125 |         assert all(isinstance(x, Processor) for x in processor_instances)
126 |         self.processors = processor_instances
127 | 
128 |     def get_parameters(self):
129 |         """gets from all wrapped processors"""
130 |         d = {}
131 |         for p in self.processors:
132 |             parameter_names = list(p.PARAMETERS.keys())
133 |             parameter_values = [getattr(p, n) for n in parameter_names]
134 |             d.update(dict(zip(parameter_names, parameter_values)))
135 |         return d
136 | 
137 |     def set_parameters(self, **args):
138 |         """sets to all wrapped processors"""
139 |         not_used = set()
140 |         not_given = set()
141 |         for p in self.processors:
142 |             nu, ng = p.set_parameters(**args)
143 |             not_used = not_used.union(nu)
144 |             not_given = not_given.union(ng)
145 |         return not_used, not_given
146 | 
147 |     def _process(self, arguments):
148 |         for p in self.processors:
149 |             arguments = p.process(arguments)
150 |         return arguments
151 | 
152 | 
153 | class DisplayingProcessorStack(ProcessorStack):
154 |     def display(self, display_before=False):
155 |         if display_before:
156 |             pr = self.processors[1:]
157 |             self.processors.display(display_before=True)
158 |         else:
159 |             pr = self.processors
160 |         for p in pr:
161 |             if hasattr(p, "display"):
162 |                 p.display(display_before=False)
163 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published by
637 |     the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <http://www.gnu.org/licenses/>.
662 | 


--------------------------------------------------------------------------------