├── test ├── __init__.py ├── HMM │ ├── __init__.py │ ├── test_clusterer.py │ ├── test_hmm.py │ ├── test_hmm_result_reconstructor.py │ └── test_depix_hmm.py ├── training_pipeline │ ├── __init__.py │ ├── test_text_generator.py │ ├── test_training_pipeline.py │ ├── test_windows.py │ ├── test_original_image.py │ └── test_pixelize_image.py ├── test_depix_hmm.py └── utils.py ├── experiments ├── __init__.py ├── issue_001 │ ├── __init__.py │ ├── re-mosaiced.png │ ├── experiment_issue_001.png │ └── experiment_issue_001.py ├── experiment_accuracy.py ├── experiment_bank_account_numbers.py └── experiment_generate_sample_images.py ├── resources ├── __init__.py ├── fonts │ ├── arial.ttf │ ├── micrenc.ttf │ └── __init__.py └── images │ └── arial_50 │ ├── 123456789_blocksize-1.png │ ├── 123456789_blocksize-2.png │ ├── 123456789_blocksize-4.png │ ├── 123456789_blocksize-6.png │ ├── 123456789_blocksize-8.png │ ├── 123456789_blocksize-10.png │ ├── 123456789_blocksize-12.png │ ├── 123456789_blocksize-14.png │ ├── 123456789_blocksize-16.png │ ├── 123456789_blocksize-18.png │ └── 123456789_blocksize-20.png ├── text_depixelizer ├── __init__.py ├── HMM │ ├── __init__.py │ ├── clusterer.py │ ├── hmm_result_reconstructor.py │ ├── hmm.py │ └── depix_hmm.py ├── training_pipeline │ ├── __init__.py │ ├── text_generator.py │ ├── windows.py │ ├── original_image.py │ ├── pixelized_image.py │ └── training_pipeline.py ├── parameters.py ├── preprocessing.py └── depix_hmm.py ├── requirements.txt ├── documentation ├── hmm.png ├── H_pixelized.PNG ├── correct_crop.png ├── training_data.png ├── picture_parameters.png ├── H E L L O_blocksize-9.png ├── original_image_text_ascent_descent.png ├── pixelized_image_text_ascent_descent.png ├── correct_crop.svg ├── picture_parameters.svg ├── training_data.svg └── hmm.svg ├── examples └── arial_50_blocksize-8 │ ├── pixelized.png │ ├── pixelized_cropped.png │ ├── depix.py │ └── depix_gridsearch.py ├── LICENSE ├── .gitignore └── README.md /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/HMM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /text_depixelizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/issue_001/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /text_depixelizer/HMM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/training_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /text_depixelizer/training_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.5 2 | Pillow==8.3.2 3 | rstr==3.0.0 4 | scikit-learn==0.24.2 -------------------------------------------------------------------------------- /documentation/hmm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/documentation/hmm.png -------------------------------------------------------------------------------- /resources/fonts/arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/fonts/arial.ttf -------------------------------------------------------------------------------- /resources/fonts/micrenc.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/fonts/micrenc.ttf -------------------------------------------------------------------------------- /documentation/H_pixelized.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/documentation/H_pixelized.PNG -------------------------------------------------------------------------------- /documentation/correct_crop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/documentation/correct_crop.png -------------------------------------------------------------------------------- /documentation/training_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/documentation/training_data.png -------------------------------------------------------------------------------- /documentation/picture_parameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/documentation/picture_parameters.png -------------------------------------------------------------------------------- /experiments/issue_001/re-mosaiced.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/experiments/issue_001/re-mosaiced.png -------------------------------------------------------------------------------- /documentation/H E L L O_blocksize-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/documentation/H E L L O_blocksize-9.png -------------------------------------------------------------------------------- /examples/arial_50_blocksize-8/pixelized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/examples/arial_50_blocksize-8/pixelized.png -------------------------------------------------------------------------------- /experiments/issue_001/experiment_issue_001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/experiments/issue_001/experiment_issue_001.png -------------------------------------------------------------------------------- /examples/arial_50_blocksize-8/pixelized_cropped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/examples/arial_50_blocksize-8/pixelized_cropped.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-1.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-2.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-4.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-6.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-8.png -------------------------------------------------------------------------------- /documentation/original_image_text_ascent_descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/documentation/original_image_text_ascent_descent.png -------------------------------------------------------------------------------- /documentation/pixelized_image_text_ascent_descent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/documentation/pixelized_image_text_ascent_descent.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-10.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-12.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-14.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-16.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-18.png -------------------------------------------------------------------------------- /resources/images/arial_50/123456789_blocksize-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonasSchatz/DepixHMM/HEAD/resources/images/arial_50/123456789_blocksize-20.png -------------------------------------------------------------------------------- /resources/fonts/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass 3 | from pathlib import Path 4 | 5 | 6 | @dataclass 7 | class DemoFontPaths: 8 | arial: Path = Path(__file__).parent / 'arial.ttf' 9 | micr: Path = Path(__file__).parent / 'micrenc.ttf' 10 | -------------------------------------------------------------------------------- /test/HMM/test_clusterer.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from test.utils import demo_picture_parameters 4 | from text_depixelizer.HMM.clusterer import KmeansClusterer 5 | from text_depixelizer.training_pipeline.training_pipeline import create_training_data 6 | 7 | 8 | class TestKmeansClusterer(TestCase): 9 | 10 | def test_kmeans_fit(self): 11 | # Arrange 12 | _, _, _, windows = create_training_data(n_img=1, picture_parameters=demo_picture_parameters) 13 | 14 | # Act 15 | kmeans_clusterer: KmeansClusterer = KmeansClusterer(windows=windows[0], k=5) 16 | 17 | # Assert 18 | self.assertEqual(kmeans_clusterer.kmeans.n_clusters, 5) 19 | -------------------------------------------------------------------------------- /text_depixelizer/training_pipeline/text_generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | from abc import ABC, abstractmethod 4 | 5 | import rstr 6 | 7 | 8 | class TextGenerator(ABC): 9 | @abstractmethod 10 | def generate_text(self) -> str: 11 | pass 12 | 13 | 14 | class RegexTextGenerator(TextGenerator): 15 | 16 | def __init__(self, pattern: str): 17 | self.pattern = pattern 18 | 19 | def generate_text(self) -> str: 20 | return rstr.xeger(self.pattern) 21 | 22 | 23 | class NumberTextGenerator(TextGenerator): 24 | 25 | def __init__(self, text_length: int): 26 | self.text_length = text_length 27 | 28 | def generate_text(self) -> str: 29 | digits = string.digits 30 | return ''.join(random.choice(digits) for i in range(self.text_length)) 31 | -------------------------------------------------------------------------------- /experiments/experiment_accuracy.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | 4 | from PIL import ImageFont 5 | 6 | from resources.fonts import DemoFontPaths 7 | from text_depixelizer.depix_hmm import depix_hmm 8 | from text_depixelizer.parameters import PictureParameters, TrainingParameters, LoggingParameters 9 | 10 | 11 | class PipelineExperiments(unittest.TestCase): 12 | 13 | def test_increasing_sample_images(self): 14 | picture_parameters: PictureParameters = PictureParameters( 15 | block_size=6, 16 | pattern=r'\d{8,12}', 17 | font=ImageFont.truetype(str(DemoFontPaths.arial), 50) 18 | ) 19 | 20 | training_parameters: TrainingParameters = TrainingParameters( 21 | n_img_train=10, 22 | n_img_test=3, 23 | n_clusters=100 24 | ) 25 | 26 | logging_parameters: LoggingParameters = LoggingParameters( 27 | timer_log_level=logging.INFO 28 | ) 29 | 30 | depix_hmm(picture_parameters=picture_parameters, training_parameters=training_parameters, logging_parameters=logging_parameters) 31 | -------------------------------------------------------------------------------- /test/training_pipeline/test_text_generator.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from unittest import TestCase 3 | 4 | from text_depixelizer.training_pipeline.text_generator import RegexTextGenerator, NumberTextGenerator 5 | 6 | 7 | class TestRegexTextGenerator(TestCase): 8 | def test_regex_text_generator_digits(self): 9 | # Arrange 10 | pattern: str = r'\d{1,5}' 11 | text_generator: RegexTextGenerator = RegexTextGenerator(pattern=pattern) 12 | 13 | # Act 14 | random_texts: List[str] = [text_generator.generate_text() for _ in range(100)] 15 | 16 | # Assert 17 | for random_text in random_texts: 18 | self.assertRegex(random_text, pattern) 19 | 20 | 21 | class TestNumberTextGenerator(TestCase): 22 | def test_number_text_generator(self): 23 | # Arrange 24 | text_length: int = 5 25 | text_generator: NumberTextGenerator = NumberTextGenerator(text_length=text_length) 26 | 27 | # Act 28 | random_text: str = text_generator.generate_text() 29 | 30 | # Assert 31 | self.assertEqual(len(random_text), text_length) 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Jonas Schatz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /text_depixelizer/parameters.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | import logging 3 | from typing import List, Tuple 4 | 5 | from PIL.ImageFont import FreeTypeFont 6 | 7 | 8 | @dataclass 9 | class PictureParameters: 10 | pattern: str 11 | font: FreeTypeFont 12 | font_color: Tuple[int, int, int] = (0, 0, 0) 13 | background_color: Tuple[int, int, int] = (255, 255, 255) 14 | block_size: int = None # ToDo: can be inferred 15 | randomize_pixelization_origin_x: bool = False 16 | window_size: int = 5 17 | offset_y: int = 0 18 | 19 | 20 | @dataclass 21 | class PictureParametersGridSearch(PictureParameters): 22 | window_size: List[int] = field(default_factory=lambda: [5]) 23 | offset_y: List[int] = field(default_factory=lambda: [0]) 24 | 25 | 26 | @dataclass 27 | class TrainingParameters: 28 | n_img_train: int 29 | n_img_test: int 30 | n_clusters: int 31 | 32 | 33 | @dataclass 34 | class TrainingParametersGridSearch(TrainingParameters): 35 | n_img_train: List[int] 36 | n_clusters: List[int] 37 | 38 | 39 | @dataclass 40 | class LoggingParameters: 41 | timer_log_level: int = logging.INFO 42 | module_log_level: int = logging.INFO 43 | -------------------------------------------------------------------------------- /test/test_depix_hmm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from unittest import TestCase 3 | 4 | from PIL import ImageFont 5 | from PIL.ImageFont import FreeTypeFont 6 | 7 | from resources.fonts import DemoFontPaths 8 | from text_depixelizer.depix_hmm import depix_hmm_grid_search 9 | from text_depixelizer.parameters import PictureParametersGridSearch, TrainingParametersGridSearch, LoggingParameters 10 | 11 | 12 | class TestDepixHmm(TestCase): 13 | 14 | def test_depix_hmm_grid_search(self): 15 | # Arrange 16 | picture_parameters: PictureParametersGridSearch = PictureParametersGridSearch( 17 | pattern=r'\d{8,12}', 18 | font=ImageFont.truetype(str(DemoFontPaths.arial), 50), 19 | block_size=6, 20 | window_size=[4, 5] 21 | ) 22 | 23 | training_parameters: TrainingParametersGridSearch = TrainingParametersGridSearch( 24 | n_img_test=150, 25 | n_clusters=[50, 100], 26 | n_img_train=[100] 27 | ) 28 | 29 | logging_parameters: LoggingParameters = LoggingParameters( 30 | module_log_level=logging.INFO, 31 | timer_log_level=logging.INFO 32 | ) 33 | 34 | # Act 35 | depix_hmm_grid_search(picture_parameters, training_parameters, logging_parameters) 36 | 37 | # Assert 38 | pass 39 | -------------------------------------------------------------------------------- /text_depixelizer/HMM/clusterer.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | 4 | import numpy as np 5 | from sklearn.cluster import KMeans 6 | 7 | from text_depixelizer.training_pipeline.windows import Window 8 | 9 | 10 | class Clusterer(ABC): 11 | centroids: List[np.ndarray] 12 | 13 | @abstractmethod 14 | def map_windows_to_cluster(self, windows: List[Window]) -> List[Window]: 15 | pass 16 | 17 | @abstractmethod 18 | def map_values_to_cluster(self, values: List[np.array]) -> List[int]: 19 | pass 20 | 21 | class KmeansClusterer(Clusterer): 22 | kmeans: KMeans 23 | 24 | def __init__(self, windows: List[Window], k: int): 25 | X = np.array([window.values for window in windows]) 26 | kmeans = KMeans(n_clusters=k) 27 | kmeans.fit(X) 28 | self.kmeans = kmeans 29 | 30 | def map_windows_to_cluster(self, windows: List[Window]) -> List[Window]: 31 | k_values: List[int] = self.map_values_to_cluster([window.values for window in windows]) 32 | for window, k_value in zip(windows, k_values): 33 | window.k = k_value 34 | return windows 35 | 36 | def map_values_to_cluster(self, values: List[np.array]) -> List[int]: 37 | k_values: List[int] = self.kmeans.predict(np.array(values)) 38 | return k_values 39 | -------------------------------------------------------------------------------- /examples/arial_50_blocksize-8/depix.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Optional 4 | 5 | from PIL import ImageFont 6 | 7 | from resources.fonts import DemoFontPaths 8 | from text_depixelizer.depix_hmm import depix_hmm 9 | from text_depixelizer.parameters import PictureParameters, TrainingParameters, LoggingParameters 10 | from text_depixelizer.preprocessing import show_font_metrics 11 | 12 | picture_parameters: PictureParameters = PictureParameters( 13 | pattern=r'\d{9}', 14 | window_size=3, 15 | randomize_pixelization_origin_x=True, 16 | font=ImageFont.truetype(str(DemoFontPaths.arial), 50), 17 | block_size=8, 18 | offset_y=7 19 | ) 20 | 21 | #show_font_metrics(picture_parameters) 22 | 23 | training_parameters: TrainingParameters = TrainingParameters( 24 | n_clusters=250, 25 | n_img_test=500, 26 | n_img_train=5000 27 | ) 28 | 29 | logging_parameters: LoggingParameters = LoggingParameters( 30 | module_log_level=logging.DEBUG, 31 | timer_log_level=logging.DEBUG 32 | ) 33 | 34 | img_path: Path = Path(__file__).parent / 'pixelized_cropped.png' 35 | 36 | reconstructed_string: Optional[str] = depix_hmm( 37 | picture_parameters=picture_parameters, 38 | training_parameters=training_parameters, 39 | logging_parameters=logging_parameters, 40 | img_path=img_path) 41 | 42 | print(reconstructed_string) 43 | -------------------------------------------------------------------------------- /examples/arial_50_blocksize-8/depix_gridsearch.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Optional 4 | 5 | from PIL import ImageFont 6 | 7 | from resources.fonts import DemoFontPaths 8 | from text_depixelizer.depix_hmm import depix_hmm_grid_search 9 | from text_depixelizer.parameters import PictureParametersGridSearch, TrainingParametersGridSearch, LoggingParameters 10 | from text_depixelizer.preprocessing import show_font_metrics 11 | 12 | picture_parameters: PictureParametersGridSearch = PictureParametersGridSearch( 13 | pattern=r'\d{9}', 14 | window_size=[3], 15 | randomize_pixelization_origin_x=True, 16 | font=ImageFont.truetype(str(DemoFontPaths.arial), 50), 17 | block_size=8, 18 | offset_y=[0, 1, 2, 3, 4, 5, 6, 7] 19 | ) 20 | 21 | #show_font_metrics(picture_parameters) 22 | 23 | training_parameters: TrainingParametersGridSearch = TrainingParametersGridSearch( 24 | n_clusters=[250], 25 | n_img_test=1000, 26 | n_img_train=[10000] 27 | ) 28 | 29 | logging_parameters: LoggingParameters = LoggingParameters( 30 | module_log_level=logging.INFO, 31 | timer_log_level=logging.INFO 32 | ) 33 | 34 | img_path: Path = Path(__file__).parent / 'pixelized_cropped.png' 35 | 36 | reconstructed_string: Optional[str] = depix_hmm_grid_search( 37 | picture_parameters_grid_search=picture_parameters, 38 | training_parameters_grid_search=training_parameters, 39 | logging_parameters=logging_parameters, 40 | img_path=img_path) 41 | 42 | print(reconstructed_string) 43 | -------------------------------------------------------------------------------- /experiments/experiment_bank_account_numbers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | 4 | from PIL import FreeTypeFont 5 | 6 | from resources.fonts import DemoFontPaths 7 | from text_depixelizer.depix_hmm import depix_hmm 8 | from text_depixelizer.parameters import PictureParameters, TrainingParameters, LoggingParameters 9 | 10 | 11 | class BackAccountExperiments(unittest.TestCase): 12 | 13 | def test_bank_account_experiment_original(self): 14 | """ 15 | Repeating the experiments from Ch. 3.3 of the original publication. 16 | It is one of the simplest tasks: Redacted text consists of exactly 7 evenly spaced digits 17 | """ 18 | 19 | picture_parameters: PictureParameters = PictureParameters( 20 | block_size=6, 21 | pattern=r'\d{7}', 22 | font=FreeTypeFont.truetype(str(DemoFontPaths.arial), 24), 23 | window_size=2, 24 | randomize_pixelization_origin_x=True 25 | ) 26 | 27 | training_parameters: TrainingParameters = TrainingParameters( 28 | n_img_train=10000, 29 | n_img_test=20, 30 | n_clusters=300 31 | ) 32 | 33 | logging_parameters: LoggingParameters = LoggingParameters( 34 | timer_log_level=logging.INFO, 35 | module_log_level=logging.DEBUG 36 | ) 37 | 38 | depix_hmm( 39 | picture_parameters=picture_parameters, 40 | training_parameters=training_parameters, 41 | logging_parameters=logging_parameters 42 | ) 43 | -------------------------------------------------------------------------------- /test/training_pipeline/test_training_pipeline.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | 4 | from test.utils import demo_picture_parameters 5 | from text_depixelizer.parameters import PictureParameters 6 | from text_depixelizer.training_pipeline.training_pipeline import create_training_data 7 | 8 | 9 | class TestTrainingPipeline(TestCase): 10 | 11 | def test_create_training_data(self): 12 | # Arrange 13 | n_img = 1 14 | picture_parameters: PictureParameters = demo_picture_parameters 15 | 16 | # Act 17 | texts, original_images, pixelized_images, windows = create_training_data(n_img, picture_parameters) 18 | 19 | # Assert: Texts 20 | self.assertEqual(len(texts), n_img) 21 | self.assertTrue(texts[0], '123456789') 22 | 23 | # Assert: Original Images 24 | self.assertEqual(len(original_images), n_img) 25 | 26 | # Assert: Pixelized Images 27 | self.assertEqual(len(pixelized_images), n_img) 28 | 29 | # Assert: Windows 30 | self.assertEqual(len(windows), n_img) 31 | self.assertEqual(windows[0][0].window_index, 0) 32 | 33 | def test_create_training_data_random_offset(self): 34 | # Arrange 35 | n_img = 10 36 | picture_parameters: PictureParameters = PictureParameters( 37 | block_size=6, 38 | pattern=r'123456789', 39 | font=demo_picture_parameters.font, 40 | randomize_pixelization_origin_x=True 41 | ) 42 | 43 | # Act 44 | _, _, pixelized_images, windows = create_training_data(n_img, picture_parameters) 45 | 46 | # Assert 47 | self.assertGreater(len(set([p.origin for p in pixelized_images])), 1) 48 | -------------------------------------------------------------------------------- /experiments/experiment_generate_sample_images.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import unittest 3 | from typing import List, Tuple 4 | 5 | from PIL import ImageFont 6 | 7 | from resources.fonts import DemoFontPaths 8 | from text_depixelizer.parameters import PictureParameters 9 | from text_depixelizer.training_pipeline.training_pipeline import create_training_data 10 | 11 | 12 | class GenerateSampleImages(unittest.TestCase): 13 | 14 | def test_generate_sample_images(self): 15 | 16 | # Editable parameters 17 | font_size: int = 30 18 | block_sizes: List[int] = [10] # [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20] 19 | offset_ys: List[int] = list(range(8)) 20 | font_path: str = str(DemoFontPaths.arial) 21 | text: str = 'v Abjkly 123Bac' 22 | font_color: Tuple[int, int, int] = (255, 255, 255) 23 | background_color: Tuple[int, int, int] = (39, 48, 70) 24 | 25 | # Act 26 | folder_name = f'{Path(font_path).stem}_{font_size}' 27 | output_path = Path(__file__).parent.parent / 'resources' / 'images' / folder_name 28 | output_path.mkdir(parents=True, exist_ok=True) 29 | 30 | for block_size in block_sizes: 31 | for offset_y in offset_ys: 32 | picture_parameters: PictureParameters = PictureParameters( 33 | pattern=rf'{text}', 34 | block_size=block_size, 35 | randomize_pixelization_origin_x=False, 36 | font=ImageFont.truetype(font_path, font_size), 37 | offset_y=offset_y, 38 | font_color=font_color, 39 | background_color=background_color 40 | ) 41 | 42 | _, _, pixelized_images, _ = create_training_data(n_img=1, picture_parameters=picture_parameters) 43 | pixelized_images[0].image.save(output_path / f'{text}_blocksize-{block_size}_offset_y-{offset_y}.png') 44 | -------------------------------------------------------------------------------- /test/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Tuple 3 | 4 | from PIL import Image, ImageDraw, ImageFont 5 | 6 | from resources.fonts import DemoFontPaths 7 | from text_depixelizer.parameters import PictureParameters, TrainingParameters 8 | from text_depixelizer.training_pipeline.original_image import ImageCreationOptions, OriginalImage, generate_image_from_text 9 | 10 | 11 | def create_random_mosaic(img_size: Tuple[int, int], block_size: int): 12 | """ 13 | Create an image of size img_size that consists of blocks of size block_size. 14 | It is assured that all blocks have a different color 15 | """ 16 | n_tiles = (int(img_size[0] / block_size), int(img_size[1] / block_size)) 17 | 18 | img: Image = Image.new('RGB', img_size, (255, 255, 255)) 19 | draw: ImageDraw = ImageDraw.Draw(img) 20 | 21 | for i in range(n_tiles[0]): 22 | for j in range(n_tiles[1]): 23 | left: int = i * block_size 24 | right: int = left + block_size - 1 25 | top: int = j * block_size 26 | bottom: int = top + block_size - 1 27 | 28 | draw.rectangle( 29 | (left, top, right, bottom), 30 | fill=(int(255 / img_size[0] * i), int(255 / img_size[1] * j), random.randint(0, 255)) 31 | ) 32 | 33 | return img 34 | 35 | 36 | def create_image(text: str, padding: Tuple[int, int] = (30, 30), font_size: int = 50) -> OriginalImage: 37 | default_font: ImageFont = ImageFont.truetype(str(DemoFontPaths.arial), font_size) 38 | options: ImageCreationOptions = ImageCreationOptions(padding, default_font) 39 | return generate_image_from_text(text, options) 40 | 41 | 42 | demo_picture_parameters: PictureParameters = PictureParameters( 43 | block_size=6, 44 | pattern=r'123456789', 45 | font=ImageFont.truetype(str(DemoFontPaths.arial), 50) 46 | ) 47 | 48 | demo_training_parameters: TrainingParameters = TrainingParameters( 49 | n_img_train=7, 50 | n_img_test=3, 51 | n_clusters=3 52 | ) -------------------------------------------------------------------------------- /experiments/issue_001/experiment_issue_001.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | from pathlib import Path 4 | from typing import Tuple 5 | 6 | from PIL import ImageFont 7 | 8 | from resources.fonts import DemoFontPaths 9 | from text_depixelizer.depix_hmm import depix_hmm 10 | from text_depixelizer.parameters import PictureParameters, TrainingParameters, LoggingParameters 11 | 12 | 13 | class Issue001Experiments(unittest.TestCase): 14 | 15 | def test_issue_001(self): 16 | 17 | font_size: int = 30 18 | block_size: int = 10 19 | font_path: str = str(DemoFontPaths.arial) 20 | window_size: int = 1 21 | pattern: str = r'[a-zA-Z ]{10,15}' 22 | font_color: Tuple[int, int, int] = (255, 255, 255) 23 | background_color: Tuple[int, int, int] = (39, 48, 70) 24 | 25 | img_path: Path = Path(__file__).parent / 're-mosaiced.png' #'experiment_issue_001.png' 26 | 27 | picture_parameters: PictureParameters = PictureParameters( 28 | block_size=block_size, 29 | pattern=pattern, 30 | font=ImageFont.truetype(font_path, font_size), 31 | window_size=window_size, 32 | randomize_pixelization_origin_x=True, 33 | font_color=font_color, 34 | background_color=background_color 35 | ) 36 | 37 | training_parameters: TrainingParameters = TrainingParameters( 38 | n_img_train=50000, 39 | n_img_test=20, 40 | n_clusters=500 41 | ) 42 | 43 | logging_parameters: LoggingParameters = LoggingParameters( 44 | timer_log_level=logging.INFO, 45 | module_log_level=logging.DEBUG 46 | ) 47 | 48 | reconstructed_string: str = depix_hmm( 49 | picture_parameters=picture_parameters, 50 | training_parameters=training_parameters, 51 | logging_parameters=logging_parameters, 52 | img_path=img_path 53 | ) 54 | 55 | print(reconstructed_string) 56 | -------------------------------------------------------------------------------- /text_depixelizer/preprocessing.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from PIL import ImageDraw 4 | 5 | from text_depixelizer.parameters import PictureParameters 6 | from text_depixelizer.training_pipeline.original_image import OriginalImage 7 | from text_depixelizer.training_pipeline.pixelized_image import PixelizedImage 8 | from text_depixelizer.training_pipeline.training_pipeline import create_training_data 9 | 10 | 11 | def show_font_metrics(picture_parameters: PictureParameters): 12 | # Create one training example 13 | texts, original_images, pixelized_images, windows = create_training_data(1, picture_parameters) 14 | 15 | # Extract all relevant information 16 | original_image: OriginalImage = original_images[0] 17 | pixelized_image: PixelizedImage = pixelized_images[0] 18 | 19 | ascent, descent = original_image.image_creation_options.font.getmetrics() 20 | padding = original_image.image_creation_options.padding 21 | width: int = original_image.img.size[0] 22 | n_tiles: Tuple[int, int] = pixelized_images[0].n_tiles 23 | block_size: int = pixelized_images[0].block_size 24 | pixelization_origin: Tuple[int, int] = pixelized_images[0].origin 25 | 26 | # Derive additional information 27 | baseline_y = padding[1] + ascent 28 | 29 | # Draw 30 | original_draw = ImageDraw.Draw(original_image.img) 31 | pixelized_draw = ImageDraw.Draw(pixelized_image.image) 32 | 33 | # Draw baseline 34 | original_draw.line((padding[0], baseline_y, width - padding[0], baseline_y), fill='red', width=1) 35 | pixelized_draw.line((padding[0], baseline_y, width - padding[0], baseline_y), fill='red', width=1) 36 | 37 | # Draw ascent and descent 38 | original_draw.rectangle([(padding[1], padding[0]), (width - padding[0], padding[0] + ascent + descent)], outline='blue') 39 | pixelized_draw.rectangle([(padding[1], padding[0]), (width - padding[0], padding[0] + ascent + descent)], outline='blue') 40 | 41 | # Draw pixelization bounding box 42 | pixelized_draw.rectangle([pixelization_origin, (pixelization_origin[0] + n_tiles[0]*block_size, pixelization_origin[1] + n_tiles[1]*block_size)], outline='green') 43 | 44 | # Show 45 | original_image.img.show() 46 | pixelized_image.image.show() 47 | -------------------------------------------------------------------------------- /text_depixelizer/training_pipeline/windows.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Tuple, Optional, List 3 | 4 | import numpy as np 5 | 6 | from text_depixelizer.training_pipeline.original_image import OriginalImage 7 | from text_depixelizer.training_pipeline.pixelized_image import PixelizedImage 8 | 9 | 10 | @dataclass 11 | class Window: 12 | characters: Tuple[str, ...] 13 | values: np.ndarray 14 | window_index: int 15 | k: Optional[int] = None 16 | 17 | 18 | @dataclass 19 | class WindowOptions: 20 | window_size: int 21 | character_threshold: int = 0 22 | 23 | 24 | def interval_overlap(a: Tuple[int, int], b: Tuple[int, int]) -> int: 25 | """ 26 | Calculate the overlap between two intervals 27 | Example: a=(10, 30) and b=(20, 40) gives an overlap of 10 28 | """ 29 | return max(0, min(a[1], b[1]) - max(a[0], b[0])) 30 | 31 | 32 | def create_windows_from_image(original_image: OriginalImage, pixelized_image: PixelizedImage, window_options: WindowOptions) -> List[Window]: 33 | windows: List[Window] = [] 34 | block_size: int = pixelized_image.block_size 35 | 36 | window_width: int = window_options.window_size*block_size 37 | 38 | for window_index in range(pixelized_image.n_tiles[0] - window_options.window_size + 1): 39 | window_left: int = pixelized_image.origin[0] + window_index*block_size 40 | window_right: int = window_left + window_width - 1 41 | window_top: int = pixelized_image.origin[1] 42 | window_bottom: int = window_top + pixelized_image.n_tiles[1]*block_size - 1 43 | 44 | characters: Tuple[str, ...] = tuple( 45 | cbb.char for cbb in original_image.character_bounding_boxes if 46 | interval_overlap((cbb.left, cbb.right), (window_left, window_right)) > window_options.character_threshold 47 | ) 48 | 49 | values: np.array = \ 50 | np.asarray( 51 | pixelized_image.image 52 | )[ 53 | window_top:window_bottom:block_size, 54 | window_left:window_right:block_size, 55 | : 56 | ].flatten() 57 | 58 | window: Window = Window(characters, values, window_index) 59 | windows.append(window) 60 | 61 | return windows 62 | -------------------------------------------------------------------------------- /test/training_pipeline/test_windows.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | from unittest import TestCase 3 | 4 | from test import utils 5 | from text_depixelizer.training_pipeline.original_image import OriginalImage 6 | from text_depixelizer.training_pipeline.pixelized_image import PixelizationOptions, PixelizedImage, pixelize_image 7 | from text_depixelizer.training_pipeline.windows import create_windows_from_image, Window, interval_overlap, WindowOptions 8 | 9 | 10 | class TestWindows(TestCase): 11 | 12 | def test_interval_overlap(self): 13 | # Arrange: a, b, overlap 14 | data = [ 15 | [(0, 30), (10, 20), 10], 16 | [(10, 20), (0, 30), 10], 17 | [(20, 40), (10, 30), 10], 18 | [(10, 30), (20, 40), 10], 19 | [(20, 40), (30, 50), 10], 20 | [(30, 50), (20, 40), 10], 21 | [(10, 20), (30, 40), 0] 22 | ] 23 | 24 | # Act 25 | result: List[int] = [interval_overlap(line[0], line[1]) for line in data] 26 | 27 | # Assert 28 | self.assertListEqual([line[2] for line in data], result) 29 | 30 | def test_window_creation(self): 31 | # Arrange 32 | text: str = 'Asdfjklö' 33 | block_size: int = 8 34 | offset: Tuple[int, int] = (0, 0) 35 | window_size: int = 4 36 | character_threshold: int = 0 37 | 38 | original_image: OriginalImage = utils.create_image(text=text) 39 | pixelization_options: PixelizationOptions = PixelizationOptions(block_size, offset) 40 | pixelized_image: PixelizedImage = pixelize_image(original_image, pixelization_options) 41 | window_options: WindowOptions = WindowOptions(window_size, character_threshold) 42 | 43 | # Act 44 | windows: List[Window] = create_windows_from_image(original_image, pixelized_image, window_options) 45 | 46 | # Assert: The first character in the first window is the first character of the original text 47 | self.assertEqual(windows[0].characters[0], text[0]) 48 | 49 | # Assert: The amount of values in each window is correct, one for each tiles 50 | self.assertEqual(len(windows[0].values), window_size * 3 * pixelized_image.n_tiles[1]) 51 | 52 | # Assert: The window index is correctly set 53 | self.assertEqual(windows[0].window_index, 0) 54 | 55 | # Assert: The number of windows is correct 56 | self.assertEqual(len(windows), pixelized_image.n_tiles[0] - window_options.window_size + 1) 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /test/training_pipeline/test_original_image.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List 2 | from unittest import TestCase, skip 3 | 4 | from PIL import ImageFont, Image 5 | 6 | from resources.fonts import DemoFontPaths 7 | from text_depixelizer.training_pipeline.original_image import ImageCreationOptions, generate_image_from_text, \ 8 | OriginalImage, draw_character_bounding_boxes, generate_character_bounding_boxes, CharacterBoundingBox 9 | 10 | 11 | class TestOriginalImage(TestCase): 12 | default_font_size: int = 30 13 | default_font_color: Tuple[int, int, int] = (255, 255, 255) 14 | default_background_color: Tuple[int, int, int] = (0, 0, 0) 15 | default_font: ImageFont = ImageFont.truetype(str(DemoFontPaths.arial), default_font_size) 16 | default_padding: Tuple[int, int] = (30, 30) 17 | 18 | def test_create_image(self): 19 | # Arrange 20 | options: ImageCreationOptions = ImageCreationOptions( 21 | self.default_padding, self.default_font, self.default_font_color, self.default_background_color 22 | ) 23 | text: str = '123456789' 24 | 25 | # Act 26 | original_image: OriginalImage = generate_image_from_text(text, options) 27 | 28 | # Assert: Character bounding boxes are added 29 | self.assertEqual(len(original_image.character_bounding_boxes), len(text)) 30 | 31 | def test_generate_character_bounding_boxes(self): 32 | # Arrange 33 | options: ImageCreationOptions = ImageCreationOptions(self.default_padding, self.default_font) 34 | text: str = 'Asdf' 35 | 36 | # Act 37 | character_bounding_boxes: List[CharacterBoundingBox] = generate_character_bounding_boxes(text, options) 38 | 39 | # Assert 40 | self.assertEqual(len(character_bounding_boxes), len(text)) 41 | self.assertEqual(character_bounding_boxes[0].left, self.default_padding[0]) 42 | self.assertTrue(character_bounding_boxes[0].right > character_bounding_boxes[0].left) 43 | self.assertTrue(character_bounding_boxes[0].top >= self.default_padding[1]) 44 | self.assertTrue(character_bounding_boxes[0].bottom <= self.default_padding[1] + self.default_font_size) 45 | 46 | #@skip('Only needed for visualization') 47 | def test_draw_character_bounding_boxes(self): 48 | # Arrange 49 | background_color: Tuple[int, int, int] = (255, 255, 255) 50 | font_color: Tuple[int, int, int] = (150, 0, 0) 51 | padding: Tuple[int, int] = (0, 0) 52 | options: ImageCreationOptions = ImageCreationOptions(padding, self.default_font, font_color, background_color) 53 | text: str = 'agagA' 54 | original_image: OriginalImage = generate_image_from_text(text, options) 55 | 56 | # Act 57 | image_with_bounding_boxes: Image = draw_character_bounding_boxes(original_image) 58 | 59 | # Assert 60 | image_with_bounding_boxes.show() 61 | pass 62 | -------------------------------------------------------------------------------- /text_depixelizer/HMM/hmm_result_reconstructor.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | from PIL import ImageFont 4 | 5 | 6 | def reconstruct_string_from_window_characters(window_characters: List[Tuple[str]], block_size: int, font: ImageFont) -> str: 7 | """ 8 | Reconstruct the string from the HMM results, e.g. 9 | [('a', 'b'), ('b', 'c')] -> 'abc' 10 | """ 11 | 12 | reconstructed_result: List[str] = [] 13 | estimated_positions: List[Tuple[int, int]] = [] 14 | 15 | for index, characters_in_one_window in enumerate(window_characters): 16 | block_start_position: int = index * block_size 17 | possible_overlap_area = [ 18 | char 19 | for char, pos 20 | in zip(reconstructed_result, estimated_positions) 21 | if pos[1] >= (block_start_position - font.getsize(characters_in_one_window[0])[0])] 22 | overlap: int = get_overlap(possible_overlap_area, characters_in_one_window) 23 | 24 | offset: int = 0 25 | for i in range(overlap, len(list(characters_in_one_window))): 26 | character_to_be_added = characters_in_one_window[i] 27 | estimated_start: int = block_start_position + offset 28 | estimated_end: int = block_start_position + font.getsize(character_to_be_added)[0] + offset 29 | estimated_positions.append((estimated_start, estimated_end)) 30 | reconstructed_result.append(characters_in_one_window[i]) 31 | 32 | offset = offset + font.getsize(character_to_be_added)[0] 33 | 34 | reconstructed_string: str = ''.join(reconstructed_result) 35 | return reconstructed_string 36 | 37 | 38 | def get_overlap(reconstructed_data: List[str], new_characters: Tuple[str, ...]) -> int: 39 | largest_overlap = 0 40 | for possible_overlap in range(1, len(new_characters) + 1): 41 | if reconstructed_data[-possible_overlap:] == list(new_characters)[:possible_overlap]: 42 | largest_overlap = possible_overlap 43 | return largest_overlap 44 | 45 | 46 | def string_similarity(original_string: str, recovered_string: str) -> float: 47 | """ 48 | Modified edit distance, normalizing the Levenshtein distance between 0 and 1, 49 | where 1 indicates a perfect match of the recovered string to the original string 50 | """ 51 | return 1 - levenshteinDistance(original_string, recovered_string)/len(original_string) 52 | 53 | 54 | def levenshteinDistance(s1: str, s2: str) -> int: 55 | """ 56 | https://stackoverflow.com/questions/2460177/edit-distance-in-python 57 | """ 58 | if len(s1) > len(s2): 59 | s1, s2 = s2, s1 60 | 61 | distances = range(len(s1) + 1) 62 | for i2, c2 in enumerate(s2): 63 | distances_ = [i2+1] 64 | for i1, c1 in enumerate(s1): 65 | if c1 == c2: 66 | distances_.append(distances[i1]) 67 | else: 68 | distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1]))) 69 | distances = distances_ 70 | return distances[-1] 71 | -------------------------------------------------------------------------------- /text_depixelizer/training_pipeline/original_image.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Tuple 3 | 4 | from PIL import Image, ImageDraw 5 | from PIL.ImageFont import FreeTypeFont 6 | 7 | 8 | @dataclass 9 | class ImageCreationOptions: 10 | padding: Tuple[int, int] 11 | font: FreeTypeFont 12 | background_color: Tuple[int, int, int] = (255, 255, 255) 13 | font_color: Tuple[int, int, int] = (0, 0, 0) 14 | 15 | 16 | @dataclass 17 | class CharacterBoundingBox: 18 | char: str 19 | top: int 20 | bottom: int 21 | left: int 22 | right: int 23 | 24 | 25 | @dataclass 26 | class OriginalImage: 27 | text: str 28 | img: Image 29 | character_bounding_boxes: List[CharacterBoundingBox] 30 | image_creation_options: ImageCreationOptions 31 | 32 | @property 33 | def text_size(self) -> Tuple[int, int]: 34 | return self.image_creation_options.font.getsize(self.text) 35 | 36 | @property 37 | def font_metrics(self) -> Tuple[int, int]: 38 | ascent, descent = self.image_creation_options.font.getmetrics() 39 | return ascent, descent 40 | 41 | 42 | def generate_image_from_text(text: str, options: ImageCreationOptions) -> OriginalImage: 43 | width, height = options.font.getsize(text) 44 | ascent, descent = options.font.getmetrics() 45 | image_size: Tuple[int, int] = (2*options.padding[0] + width, 2*options.padding[1] + ascent + descent) 46 | font = options.font 47 | 48 | image: Image = Image.new('RGB', image_size, options.background_color) 49 | draw = ImageDraw.Draw(image) 50 | draw.text(options.padding, text, font=font, fill=options.font_color) 51 | character_bounding_boxes: List[CharacterBoundingBox] = generate_character_bounding_boxes(text, options) 52 | 53 | return OriginalImage(text=text, img=image, character_bounding_boxes=character_bounding_boxes, image_creation_options=options) 54 | 55 | 56 | def generate_character_bounding_boxes(text: str, options: ImageCreationOptions) -> List[CharacterBoundingBox]: 57 | """ 58 | Calculate the bounding boxes for every character. 59 | Source: https://github.com/python-pillow/Pillow/issues/3921 60 | """ 61 | character_bounding_boxes: List[CharacterBoundingBox] = [] 62 | for i, char in enumerate(text): 63 | bottom_1 = options.font.getsize(text[i])[1] 64 | right, bottom_2 = options.font.getsize(text[:i + 1]) 65 | bottom = bottom_1 if bottom_1 < bottom_2 else bottom_2 66 | width, height = options.font.getmask(char).size 67 | right += options.padding[0] 68 | bottom += options.padding[1] 69 | top = bottom-height 70 | left = right-width 71 | bb: CharacterBoundingBox = CharacterBoundingBox(char=char, top=top, bottom=bottom, left=left, right=right) 72 | character_bounding_boxes.append(bb) 73 | return character_bounding_boxes 74 | 75 | 76 | def draw_character_bounding_boxes(original_image: OriginalImage) -> Image: 77 | """ 78 | Return a copy of an original image with the character bounding boxes drawn onto it for visualization 79 | """ 80 | image: Image = original_image.img.copy() 81 | draw = ImageDraw.Draw(image) 82 | bbs: List[CharacterBoundingBox] = original_image.character_bounding_boxes 83 | for bb in bbs: 84 | draw.rectangle((bb.left, bb.top, bb.right, bb.bottom), fill=None, outline=(255, 0, 0)) 85 | return image 86 | -------------------------------------------------------------------------------- /text_depixelizer/training_pipeline/pixelized_image.py: -------------------------------------------------------------------------------- 1 | import math 2 | from dataclasses import dataclass 3 | from typing import Tuple 4 | 5 | import numpy as np 6 | from PIL import Image, ImageDraw 7 | 8 | from text_depixelizer.training_pipeline.original_image import OriginalImage 9 | 10 | 11 | @dataclass 12 | class PixelizationOptions: 13 | block_size: int 14 | offset: Tuple[int, int] 15 | 16 | 17 | @dataclass 18 | class PixelizedImage: 19 | n_tiles: Tuple[int, int] 20 | block_size: int 21 | origin: Tuple[int, int] 22 | image: Image 23 | 24 | 25 | def determine_number_of_tiles(text_width, font_metrics, offset: Tuple[int, int], block_size: int) -> Tuple[int, int]: 26 | tiles_y_above_baseline: int = math.ceil((font_metrics[0] - offset[1]) / block_size) 27 | tiles_y_below_baseline: int = math.ceil((font_metrics[1] + offset[1]) / block_size) 28 | tiles_x = math.ceil((text_width + offset[0]) / block_size) 29 | return tiles_x, tiles_y_above_baseline + tiles_y_below_baseline 30 | 31 | 32 | def determine_origin(padding: Tuple[int, int], font_metrics: Tuple[int, int], offset: Tuple[int, int], block_size: int) -> Tuple[int, int]: 33 | origin_x: int = padding[0] - offset[0] 34 | tiles_y_above_baseline: int = math.ceil((font_metrics[0] - offset[1]) / block_size) 35 | origin_y: int = padding[1] + font_metrics[0] - (offset[1] + tiles_y_above_baseline*block_size) 36 | return origin_x, origin_y 37 | 38 | 39 | def get_average_color(img: Image) -> Tuple[int, int, int]: 40 | return tuple(np.rint(np.mean(img, axis=(0, 1))).astype(int)) 41 | 42 | 43 | def pixelize_image(original_image: OriginalImage, pixelization_options: PixelizationOptions) -> Image: 44 | n_tiles: Tuple[int, int] = determine_number_of_tiles( 45 | text_width=original_image.text_size[0], 46 | font_metrics=original_image.font_metrics, 47 | offset=(pixelization_options.offset[0] % pixelization_options.block_size, pixelization_options.offset[1] % pixelization_options.block_size), 48 | block_size=pixelization_options.block_size 49 | ) 50 | 51 | origin: Tuple[int, int] = determine_origin( 52 | padding=original_image.image_creation_options.padding, 53 | font_metrics=original_image.font_metrics, 54 | offset=(pixelization_options.offset[0] % pixelization_options.block_size, pixelization_options.offset[1] % pixelization_options.block_size), 55 | block_size=pixelization_options.block_size 56 | ) 57 | 58 | pixelized_image: Image = pixelize_area( 59 | image=original_image.img, 60 | block_size=pixelization_options.block_size, 61 | origin=origin, 62 | n_tiles=n_tiles 63 | ) 64 | 65 | return PixelizedImage( 66 | n_tiles=n_tiles, 67 | block_size=pixelization_options.block_size, 68 | origin=origin, 69 | image=pixelized_image 70 | ) 71 | 72 | 73 | def pixelize_area(image: Image, block_size: int, origin: Tuple[int, int], n_tiles: Tuple[int, int]) -> Image: 74 | """ 75 | Pixelize an area of an image, given the parameters 76 | """ 77 | 78 | pixelized_image: Image = image.copy() 79 | draw = ImageDraw.Draw(pixelized_image) 80 | for i in range(n_tiles[0]): 81 | for j in range(n_tiles[1]): 82 | left: int = origin[0] + i*block_size 83 | right: int = left + block_size - 1 84 | top: int = origin[1] + j*block_size 85 | bottom: int = top + block_size - 1 86 | draw.rectangle((left, top, right, bottom), fill=get_average_color(image.crop((left, top, right+1, bottom+1)))) 87 | return pixelized_image 88 | -------------------------------------------------------------------------------- /text_depixelizer/HMM/hmm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dataclasses import dataclass 3 | from functools import cached_property 4 | from typing import List, Optional, Any 5 | 6 | import numpy as np 7 | 8 | 9 | class HmmAttributeException(Exception): 10 | pass 11 | 12 | 13 | @dataclass 14 | class HMM: 15 | observations: List[Any] 16 | states: List[Any] 17 | starting_probabilities: np.ndarray 18 | transition_probabilities: np.ndarray 19 | emission_probabilities: np.ndarray 20 | 21 | @cached_property 22 | def log_starting_probabilities(self) -> np.ndarray: 23 | return np.log(self.starting_probabilities) 24 | 25 | @cached_property 26 | def log_transition_probabilities(self) -> np.ndarray: 27 | return np.log(self.transition_probabilities) 28 | 29 | @cached_property 30 | def log_emission_probabilities(self) -> np.ndarray: 31 | return np.log(self.emission_probabilities) 32 | 33 | def validate_attributes(self) -> None: 34 | if len(self.starting_probabilities) != len(self.states): 35 | raise HmmAttributeException('Starting probabilities must have one entry for each state!') 36 | 37 | if self.transition_probabilities.shape != (len(self.states), len(self.states)): 38 | raise HmmAttributeException('Transition probabilities must have shape (n_states, n_shapes)') 39 | 40 | if not all(np.sum(self.transition_probabilities, axis=1) == 1): 41 | logging.warning('Careful, transition probabilities not properly normalized') 42 | 43 | if self.emission_probabilities.shape != (len(self.states), len(self.observations)): 44 | raise HmmAttributeException('Emission probabilities must have shape (n_states, n_observations)') 45 | 46 | if not all(np.sum(self.emission_probabilities, axis=1) == 1): 47 | logging.warning('Careful, transition probabilities not properly normalized') 48 | 49 | def viterbi(self, sequence: List[Any]): 50 | # Initialize tables 51 | v: np.ndarray[float, float] = np.zeros((len(self.states), len(sequence))) 52 | pointers: np.ndarray[Optional[float], Optional[float]] = np.empty(v.shape) 53 | 54 | for i, observation in enumerate(sequence): 55 | if i==0: 56 | v[:, i] = self.starting_probabilities * self.emission_probabilities[:, observation] 57 | pointers[:, i] = 0 58 | 59 | else: 60 | v[:, i] = np.max(v[:, i-1] * self.transition_probabilities.T * self.emission_probabilities[np.newaxis, :, sequence[i]].T, 1) 61 | pointers[:, i] = np.argmax(v[:, i-1] * self.transition_probabilities.T, 1) 62 | 63 | x = np.empty(len(sequence), 'B') 64 | x[-1] = np.argmax(v[:, len(sequence)-1]) 65 | for i in reversed(range(1, len(sequence))): 66 | x[i-1] = pointers[x[i], i] 67 | 68 | return [self.states[i] for i in x] 69 | 70 | def log_viterbi(self, sequence: List[Any]): 71 | # Initialize tables 72 | v: np.ndarray[float, float] = np.zeros((len(self.states), len(sequence))) 73 | pointers: np.ndarray[Optional[float], Optional[float]] = np.empty(v.shape) 74 | 75 | for i, observation in enumerate(sequence): 76 | if i==0: 77 | v[:, i] = self.log_starting_probabilities + self.log_emission_probabilities[:, observation] 78 | pointers[:, i] = 0 79 | 80 | else: 81 | v[:, i] = np.max(v[:, i-1] + self.log_transition_probabilities.T + self.log_emission_probabilities[np.newaxis, :, sequence[i]].T, 1) 82 | pointers[:, i] = np.argmax(v[:, i-1] + self.log_transition_probabilities.T, 1) 83 | 84 | x = np.empty(len(sequence), 'B') 85 | x[-1] = np.argmax(v[:, len(sequence)-1]) 86 | for i in reversed(range(1, len(sequence))): 87 | x[i-1] = pointers[x[i], i] 88 | 89 | return [self.states[i] for i in x] 90 | 91 | -------------------------------------------------------------------------------- /test/HMM/test_hmm.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from unittest import TestCase 3 | 4 | import numpy as np 5 | 6 | from text_depixelizer.HMM.hmm import HMM 7 | 8 | 9 | class TestHmm(TestCase): 10 | 11 | def create_random_hmm(self, observations, possible_states, possible_observations) -> HMM: 12 | """ 13 | Returns a HMM object with random probabilities 14 | """ 15 | 16 | n_possible_states: int = len(possible_states) 17 | n_possible_observations: int = len(possible_observations) 18 | 19 | starting_probabilities: np.ndarray = np.random.rand(n_possible_states) 20 | starting_probabilities = starting_probabilities / sum(starting_probabilities) 21 | 22 | transition_probabilities: np.ndarray = np.random.rand(n_possible_states, n_possible_states) 23 | transition_probabilities_normalized = transition_probabilities / np.sum(transition_probabilities, axis=1)[:, np.newaxis] 24 | 25 | emission_probabilities: np.ndarray = np.random.rand(n_possible_states, n_possible_observations) 26 | emission_probabilities_normalized = emission_probabilities / np.sum(emission_probabilities, axis=1)[:, np.newaxis] 27 | 28 | hmm: HMM = HMM( 29 | observations=observations, 30 | states=possible_states, 31 | starting_probabilities=starting_probabilities, 32 | transition_probabilities=transition_probabilities_normalized, 33 | emission_probabilities=emission_probabilities_normalized 34 | ) 35 | 36 | return hmm 37 | 38 | def test_viterbi(self): 39 | # Arrange 40 | hmm: HMM = HMM( 41 | observations=[0, 1, 2], 42 | states=[('A', 'b'), ('b', )], 43 | starting_probabilities=np.array([0.7, 0.3]), 44 | transition_probabilities=np.array([[0.9, 0.1], [0.1, 0.9]]), 45 | emission_probabilities=np.array([[0.1, 0.4, 0.5], [0.3, 0.7, 0.0]]) 46 | ) 47 | 48 | sequence: List[int] = [2, 2, 2, 2, 2, 2, 2] 49 | 50 | # Act 51 | result = hmm.viterbi(sequence) 52 | 53 | # Assert 54 | self.assertEqual(len(result), len(sequence)) 55 | self.assertTrue(all([r in hmm.states for r in result])) 56 | 57 | def test_viterbi_fail_for_numerical_underflow(self): 58 | """ 59 | When the observation sequence gets too long, the regular viterbi will fail due to numerical underflow 60 | """ 61 | 62 | # Parameters 63 | n_possible_observations: int = 100 64 | n_possible_states: int = 25 65 | observation_length: int = 10000 66 | 67 | # Arrange 68 | possible_observations: List[int] = list(range(n_possible_observations)) 69 | possible_states: List[int] = list(range(n_possible_states)) 70 | 71 | observations: List[int] = np.random.choice(possible_observations, size=observation_length) 72 | hmm: HMM = self.create_random_hmm(observations, possible_states, possible_observations) 73 | 74 | # Act 75 | result_viterbi = hmm.viterbi(observations) 76 | result_log_viterbi: List[int] = hmm.log_viterbi(observations) 77 | 78 | # Assert 79 | self.assertNotEqual(result_viterbi, result_log_viterbi) 80 | 81 | def test_compare_viterbi_and_log(self): 82 | """ 83 | Regular viterbi and log-viterbi should return the same values (for shorter sequences) 84 | """ 85 | np.random.seed(0) 86 | 87 | # Set parameters 88 | iterations: int = 50 89 | n_possible_observations: int = 100 90 | n_possible_states: int = 25 91 | max_observation_length: int = 100 92 | 93 | # Arrange (1) 94 | possible_observations: List[int] = list(range(n_possible_observations)) 95 | possible_states: List[int] = list(range(n_possible_states)) 96 | 97 | for i in range(iterations): 98 | # Arrange (2) 99 | observation_length: int = np.random.randint(1, max_observation_length) 100 | observations: List[int] = np.random.choice(possible_observations, size=observation_length) 101 | hmm: HMM = self.create_random_hmm(observations, possible_states, possible_observations) 102 | 103 | # Act 104 | result_viterbi = hmm.viterbi(observations) 105 | result_log_viterbi: List[int] = hmm.log_viterbi(observations) 106 | 107 | # Assert 108 | self.assertListEqual(result_viterbi, result_log_viterbi) 109 | -------------------------------------------------------------------------------- /test/HMM/test_hmm_result_reconstructor.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from PIL import ImageFont 4 | 5 | from resources.fonts import DemoFontPaths 6 | from text_depixelizer.HMM.hmm_result_reconstructor import get_overlap, reconstruct_string_from_window_characters 7 | 8 | 9 | class HmmResultReconstructorTests(unittest.TestCase): 10 | 11 | def test_reconstruct_string_from_window_characters_two_duplicates(self): 12 | # Arrange 13 | window_characters = [('8', '1'), ('8', '1'), ('8', '1'), ('8', '1'), ('8', '1'), ('1', '2'), ('1', '2'), ('1', '2'), ('1', '2'), ('1', '2'), ('2', '9'), ('2', '9'), ('2', '9'), ('2', '9'), ('9', '2'), ('9', '2'), ('9', '2'),('9', '2'), ('9', '2'), ('2', '7'), ('2', '7'), ('2', '7'), ('2', '7'), ('2', '7'), ('7', '7'), ('7', '7'), ('7', '7'), ('7', '7'), ('7', '2'), ('7', '2'), ('7', '2'), ('7', '2'), ('7', '2'), ('2', '0'), ('2', '0'),('2', '0'), ('2', '0'), ('2', '0'), ('0', '2'), ('0', '2'), ('0', '2'), ('0', '2'), ('2',)] 14 | expected_reconstructed_string: str = '8129277202' 15 | font_size: int = 50 16 | block_size: int = 6 17 | font = ImageFont.truetype(str(DemoFontPaths.arial), font_size) 18 | 19 | # Act 20 | reconstructed_string: str = reconstruct_string_from_window_characters( 21 | window_characters=window_characters, 22 | block_size=block_size, 23 | font=font 24 | ) 25 | 26 | # Assert 27 | self.assertEqual(reconstructed_string, expected_reconstructed_string) 28 | 29 | def test_reconstruct_string_from_window_characters_seven_duplicates(self): 30 | # Arrange 31 | window_characters = [('1', '2'), ('1', '2'), ('1', '2'), ('1', '2'), ('1', '2'), ('2', '3'), ('2', '3'), ('2', '3'), ('2', '3'), ('2', '3'), ('3', '4'), ('3', '4'), ('3', '4'), ('3', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '4'), ('4', '5'), ('4', '5'), ('4', '5'), ('4', '5'), ('4', '5'), ('5',)] 32 | expected_reconstructed_string: str = '12344444445' 33 | font_size: int = 50 34 | block_size: int = 6 35 | font = ImageFont.truetype(str(DemoFontPaths.arial), font_size) 36 | 37 | # Act 38 | reconstructed_string: str = reconstruct_string_from_window_characters( 39 | window_characters=window_characters, 40 | block_size=block_size, 41 | font=font 42 | ) 43 | 44 | # Assert 45 | self.assertEqual(reconstructed_string, expected_reconstructed_string) 46 | 47 | def test_check_overlap_complete(self): 48 | # Arrange 49 | reconstructed_data = ['1', '2', '3'] 50 | new_characters = ('2', '3') 51 | expected_overlap = 2 52 | 53 | # Act 54 | actual_overlap: int = get_overlap(reconstructed_data, new_characters) 55 | 56 | # Assert 57 | self.assertEqual(actual_overlap, expected_overlap) 58 | 59 | def test_check_overlap_partial(self): 60 | # Arrange 61 | reconstructed_data = ['1', '2', '3'] 62 | new_characters = ('3', '4') 63 | expected_overlap = 1 64 | 65 | # Act 66 | actual_overlap: int = get_overlap(reconstructed_data, new_characters) 67 | 68 | # Assert 69 | self.assertEqual(actual_overlap, expected_overlap) 70 | 71 | def test_check_overlap_no_overlap(self): 72 | # Arrange 73 | reconstructed_data = ['1', '2', '3'] 74 | new_characters = ('4',) 75 | expected_overlap = 0 76 | 77 | # Act 78 | actual_overlap: int = get_overlap(reconstructed_data, new_characters) 79 | 80 | # Assert 81 | self.assertEqual(actual_overlap, expected_overlap) 82 | 83 | def test_check_overlap_empty_reconstructed_data(self): 84 | # Arrange 85 | reconstructed_data = [] 86 | new_characters = ('4',) 87 | expected_overlap = 0 88 | 89 | # Act 90 | actual_overlap: int = get_overlap(reconstructed_data, new_characters) 91 | 92 | # Assert 93 | self.assertEqual(actual_overlap, expected_overlap) 94 | 95 | def test_check_overlap_partially_empty_reconstructed_data(self): 96 | # Arrange 97 | reconstructed_data = ['4'] 98 | new_characters = ('4', '5') 99 | expected_overlap = 1 100 | 101 | # Act 102 | actual_overlap: int = get_overlap(reconstructed_data, new_characters) 103 | 104 | # Assert 105 | self.assertEqual(actual_overlap, expected_overlap) 106 | -------------------------------------------------------------------------------- /test/HMM/test_depix_hmm.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pathlib import Path 3 | from typing import List 4 | 5 | import numpy as np 6 | from PIL import Image, ImageFont 7 | 8 | from resources.fonts import DemoFontPaths 9 | from test.utils import demo_training_parameters, demo_picture_parameters 10 | from text_depixelizer.HMM.depix_hmm import DepixHMM 11 | from text_depixelizer.parameters import PictureParameters, TrainingParameters 12 | from text_depixelizer.training_pipeline.windows import Window 13 | 14 | 15 | class TestDepixHmm(unittest.TestCase): 16 | 17 | demo_picture_parameters: PictureParameters = PictureParameters( 18 | block_size=6, 19 | pattern=r'\d{8,12}', 20 | font=ImageFont.truetype(str(DemoFontPaths.arial), 50) 21 | ) 22 | 23 | def test_train(self): 24 | # Arrange 25 | training_parameters: TrainingParameters = demo_training_parameters 26 | depix_hmm: DepixHMM = DepixHMM(self.demo_picture_parameters, demo_training_parameters) 27 | 28 | # Act 29 | depix_hmm.train() 30 | 31 | # Assert 32 | self.assertEqual(depix_hmm.emission_probabilities.shape[1], training_parameters.n_clusters) 33 | self.assertTrue(len(depix_hmm.states) > 5) 34 | self.assertEqual(depix_hmm.emission_probabilities.shape, depix_hmm.log_emission_probabilities.shape) 35 | 36 | def test_evaluate(self): 37 | # Arrange 38 | depix_hmm: DepixHMM = DepixHMM(self.demo_picture_parameters, demo_training_parameters) 39 | depix_hmm.train() 40 | 41 | # Act 42 | accuracy, average_distance = depix_hmm.evaluate() 43 | 44 | # Assert 45 | self.assertGreaterEqual(accuracy, 0) 46 | self.assertLessEqual(accuracy, 1) 47 | self.assertIsInstance(accuracy, float) 48 | self.assertIsInstance(average_distance, float) 49 | 50 | def test_get_starting_probabilities(self): 51 | # Arrange 52 | windows: List[Window] = [ 53 | Window(characters=('A', 'b'), values=np.ndarray([1, 2, 3]), window_index=0, k=0), 54 | Window(characters=('b',), values=np.ndarray([2, 3, 4]), window_index=1, k=0), 55 | Window(characters=('b',), values=np.ndarray([3, 4, 5]), window_index=2, k=1), 56 | Window(characters=('b', 'c'), values=np.ndarray([4, 5, 6]), window_index=3, k=1), 57 | Window(characters=('d',), values=np.ndarray([5, 6, 7]), window_index=4, k=2), 58 | Window(characters=('X',), values=np.ndarray([6, 7, 8]), window_index=0, k=3) 59 | ] 60 | depix_hmm: DepixHMM = DepixHMM(demo_picture_parameters, demo_training_parameters) 61 | 62 | # Act 63 | depix_hmm.calculate_hmm_properties(windows_train=windows) 64 | 65 | # Assert: Observations 66 | self.assertCountEqual(depix_hmm.observations, (0, 1, 2, 3)) 67 | 68 | # Assert: States 69 | self.assertCountEqual(depix_hmm.states, (('A', 'b'), ('b',), ('b', 'c'), ('d',), ('X',))) 70 | 71 | # Assert: Starting probabilities 72 | self.assertEqual(depix_hmm.starting_probabilities[depix_hmm.states.index(('A', 'b'))], 0.5) 73 | self.assertEqual(depix_hmm.starting_probabilities[depix_hmm.states.index(('b',))], 0.0) 74 | 75 | # Assert: Transition Probabilities 76 | self.assertEqual(depix_hmm.transition_probabilities.shape, (len(depix_hmm.states), len(depix_hmm.states))) 77 | self.assertNotEqual(depix_hmm.transition_probabilities[depix_hmm.states.index(('b',)), depix_hmm.states.index(('b',))], 0) 78 | for s in depix_hmm.transition_probabilities.sum(axis=1): 79 | self.assertAlmostEqual(s, 1.0, places=3) 80 | 81 | # Assert Emission Probabilities 82 | self.assertEqual(depix_hmm.emission_probabilities.shape, (len(depix_hmm.states), len(depix_hmm.observations))) 83 | for s in depix_hmm.emission_probabilities.sum(axis=1): 84 | self.assertAlmostEqual(s, 1.0, places=3) 85 | 86 | def test_test_image(self): 87 | # Arrange 88 | img_path: Path = Path(__file__).parent.parent.parent / 'examples' / 'arial_50_blocksize-8' / 'pixelized_cropped.png' 89 | 90 | picture_parameters: PictureParameters = PictureParameters( 91 | pattern=r'\d{9}', 92 | font=ImageFont.truetype(str(DemoFontPaths.arial), 50), 93 | block_size=8, 94 | window_size=4 95 | ) 96 | 97 | training_parameters: TrainingParameters = TrainingParameters( 98 | n_img_train=100, 99 | n_img_test=1, 100 | n_clusters=150 101 | ) 102 | depix_hmm: DepixHMM = DepixHMM(picture_parameters, training_parameters) 103 | depix_hmm.train() 104 | 105 | # Act 106 | with Image.open(img_path) as img: 107 | reconstructed_string: str = depix_hmm.test_image(img) 108 | 109 | # Assert 110 | self.assertIsInstance(reconstructed_string, str) 111 | -------------------------------------------------------------------------------- /text_depixelizer/depix_hmm.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import logging 3 | from pathlib import Path 4 | from typing import Optional 5 | 6 | from PIL import ImageFont, Image 7 | 8 | from resources.fonts import DemoFontPaths 9 | from text_depixelizer.HMM.depix_hmm import DepixHMM 10 | from text_depixelizer.parameters import PictureParameters, TrainingParameters, LoggingParameters, \ 11 | PictureParametersGridSearch, TrainingParametersGridSearch 12 | 13 | 14 | def init_logging(logging_parameters: LoggingParameters): 15 | logging.basicConfig(level=logging_parameters.module_log_level) 16 | time_logger: logging.Logger = logging.getLogger('time_logger') 17 | time_logger.setLevel(logging_parameters.timer_log_level) 18 | 19 | 20 | def depix_hmm(picture_parameters: PictureParameters, 21 | training_parameters: TrainingParameters, 22 | logging_parameters: LoggingParameters = None, 23 | img_path: Path = None) -> Optional[str]: 24 | 25 | if logging_parameters: 26 | init_logging(logging_parameters) 27 | 28 | # Train and evaluate the HMM 29 | hmm: DepixHMM = DepixHMM(picture_parameters, training_parameters) 30 | hmm.train() 31 | accuracy, average_distance = hmm.evaluate() 32 | logging.info(f'Accuracy: {accuracy}, Avg. Distance: {average_distance}') 33 | 34 | # If a path to an image was given, analyze the image 35 | if img_path: 36 | with Image.open(img_path) as img: 37 | reconstructed_string: str = hmm.test_image(img) 38 | return reconstructed_string 39 | 40 | return None 41 | 42 | 43 | def depix_hmm_grid_search(picture_parameters_grid_search: PictureParametersGridSearch, 44 | training_parameters_grid_search: TrainingParametersGridSearch, 45 | logging_parameters: LoggingParameters = None, 46 | img_path: Path = None) -> Optional[str]: 47 | if logging_parameters: 48 | init_logging(logging_parameters) 49 | 50 | best_hmm: Optional[DepixHMM] = None 51 | best_accuracy: float = 0.0 52 | best_avg_distance: float = 1.0 53 | 54 | # Iterate through grid and find best 55 | for window_size, n_clusters, n_img_train, offset_y in itertools.product( 56 | *[picture_parameters_grid_search.window_size, 57 | training_parameters_grid_search.n_clusters, 58 | training_parameters_grid_search.n_img_train, 59 | picture_parameters_grid_search.offset_y]): 60 | 61 | picture_parameters: PictureParameters = PictureParameters( 62 | pattern=picture_parameters_grid_search.pattern, 63 | font=picture_parameters_grid_search.font, 64 | block_size=picture_parameters_grid_search.block_size, 65 | window_size=window_size, 66 | offset_y=offset_y 67 | ) 68 | 69 | training_parameters: TrainingParameters = TrainingParameters( 70 | n_img_test=training_parameters_grid_search.n_img_test, 71 | n_img_train=n_img_train, 72 | n_clusters=n_clusters 73 | ) 74 | 75 | hmm: DepixHMM = DepixHMM(picture_parameters, training_parameters) 76 | hmm.train() 77 | accuracy, average_distance = hmm.evaluate() 78 | logging.info(f'Window Size: {window_size}, Clusters: {n_clusters}, Training Images: {n_img_train}, Offset Y: {offset_y}') 79 | logging.info(f'Accuracy: {accuracy}, Avg. Distance: {average_distance} \n') 80 | 81 | if img_path: 82 | with Image.open(img_path) as img: 83 | reconstructed_string: str = hmm.test_image(img) 84 | logging.warning(f'Reconstructed string: {reconstructed_string}') 85 | 86 | if accuracy > best_accuracy: 87 | best_hmm = hmm 88 | best_accuracy = accuracy 89 | best_avg_distance = average_distance 90 | 91 | # Finalize 92 | logging.warning(f'Found HMM with accuracy {best_accuracy} and average distance {best_avg_distance}') 93 | logging.warning(f'Associated parameters: ') 94 | logging.warning(f' Window Size: {best_hmm.picture_parameters.window_size}') 95 | logging.warning(f' Clusters: {best_hmm.training_parameters.n_clusters}') 96 | logging.warning(f' Training Images: {best_hmm.training_parameters.n_img_train}') 97 | 98 | # If a path to an image was given, analyze the image 99 | if img_path: 100 | with Image.open(img_path) as img: 101 | reconstructed_string: str = best_hmm.test_image(img) 102 | return reconstructed_string 103 | 104 | return None 105 | 106 | 107 | if __name__ == '__main__': 108 | image_path: Path = Path(__file__).parent.parent / 'resources' / 'images' / 'arial_50' / '123456789_blocksize-6.PNG' 109 | 110 | picture_parameters: PictureParameters = PictureParameters( 111 | pattern=r'\d{8,12}', 112 | font=ImageFont.truetype(str(DemoFontPaths.arial), 50), 113 | block_size=6 114 | ) 115 | 116 | training_parameters: TrainingParameters = TrainingParameters( 117 | n_img_train=1000, 118 | n_img_test=100, 119 | n_clusters=300 120 | ) 121 | 122 | depix_hmm(picture_parameters=picture_parameters, training_parameters=training_parameters, img_path=image_path) 123 | 124 | -------------------------------------------------------------------------------- /text_depixelizer/training_pipeline/training_pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from random import randint 3 | import time 4 | from typing import List, Tuple 5 | 6 | from PIL.ImageFont import FreeTypeFont 7 | 8 | from text_depixelizer.parameters import PictureParameters 9 | from text_depixelizer.training_pipeline.original_image import ImageCreationOptions, OriginalImage, generate_image_from_text 10 | from text_depixelizer.training_pipeline.pixelized_image import PixelizationOptions, PixelizedImage, pixelize_image 11 | from text_depixelizer.training_pipeline.windows import WindowOptions, Window, create_windows_from_image 12 | from text_depixelizer.training_pipeline.text_generator import RegexTextGenerator 13 | 14 | 15 | def create_training_data(n_img: int, picture_parameters: PictureParameters) \ 16 | -> Tuple[List[str], List[OriginalImage], List[PixelizedImage], List[List[Window]]]: 17 | """ 18 | Generates the data required for training the HMM. 19 | """ 20 | 21 | texts: List[str] = generate_texts(n_img, picture_parameters.pattern) 22 | original_images: List[OriginalImage] = generate_original_images( 23 | texts=texts, 24 | font=picture_parameters.font, 25 | font_color=picture_parameters.font_color, 26 | background_color=picture_parameters.background_color 27 | ) 28 | 29 | pixelized_images: List[PixelizedImage] = generate_pixelized_images( 30 | original_images, 31 | picture_parameters.block_size, 32 | picture_parameters.randomize_pixelization_origin_x, 33 | picture_parameters.offset_y 34 | ) 35 | 36 | windows: List[List[Window]] = generate_windows(original_images, pixelized_images, picture_parameters.window_size) 37 | return texts, original_images, pixelized_images, windows 38 | 39 | 40 | def generate_texts(n_img: int, pattern: str) -> List[str]: 41 | """ 42 | Generates n_img strings that follow the given regex pattern 43 | """ 44 | 45 | time_logger: logging.Logger = logging.getLogger('time_logger') 46 | t: float = time.perf_counter() 47 | text_generator: RegexTextGenerator = RegexTextGenerator(pattern=pattern) 48 | texts: List[str] = [text_generator.generate_text() for _ in range(n_img)] 49 | 50 | if n_img > 100: 51 | time_logger.info(f'Created texts in {time.perf_counter() - t} seconds') 52 | 53 | return texts 54 | 55 | 56 | def generate_original_images( 57 | texts: List[str], 58 | font: FreeTypeFont, 59 | font_color: Tuple[int, int, int] = (0, 0, 0), 60 | background_color: Tuple[int, int, int] = (255, 255, 255) 61 | ) -> List[OriginalImage]: 62 | """ 63 | Given a list of texts and a font, generate images with that text and font 64 | Padding will be added around the text to allow space for pixelization that extends over the text's bounding box 65 | """ 66 | time_logger: logging.Logger = logging.getLogger('time_logger') 67 | t = time.perf_counter() 68 | 69 | image_creation_options: ImageCreationOptions = ImageCreationOptions( 70 | padding=(20, 20), 71 | font=font, 72 | font_color=font_color, 73 | background_color=background_color 74 | ) 75 | 76 | original_images: List[OriginalImage] = [generate_image_from_text(text, image_creation_options) for text in texts] 77 | 78 | if len(texts) > 100: 79 | time_logger.info(f'Created original images in {time.perf_counter() - t} seconds') 80 | 81 | return original_images 82 | 83 | 84 | def generate_pixelized_images(original_images: List[OriginalImage], 85 | block_size: int, 86 | randomize_pixelization_origin_x: bool, 87 | pixelization_offset_y: int) -> List[PixelizedImage]: 88 | """ 89 | Pixelizes the original images with the given block_size. 90 | By default, the pixelization is in line with the baseline of the text and the right edge of the bounding box. This 91 | can be varied with the other two parameters 92 | """ 93 | time_logger: logging.Logger = logging.getLogger('time_logger') 94 | t = time.perf_counter() 95 | 96 | pixelization_options: List[PixelizationOptions] = [PixelizationOptions( 97 | block_size, 98 | offset=( 99 | randint(0, block_size) if randomize_pixelization_origin_x else 0, 100 | pixelization_offset_y 101 | ) 102 | ) for _ in range(len(original_images))] 103 | 104 | pixelized_images: List[PixelizedImage] = [pixelize_image(original_image, pix_o) for original_image, pix_o in zip(original_images, pixelization_options)] 105 | 106 | if len(original_images) > 100: 107 | time_logger.info(f'Pixelated images in {time.perf_counter() - t} seconds') 108 | 109 | return pixelized_images 110 | 111 | 112 | def generate_windows(original_images: List[OriginalImage], pixelized_images: List[PixelizedImage], window_size: int) -> List[List[Window]]: 113 | """ 114 | Generates the windows from the pixelized images. 115 | Note: The information from the original images is also needed, since we need to infer the characters that are in this window 116 | """ 117 | time_logger: logging.Logger = logging.getLogger('time_logger') 118 | t = time.perf_counter() 119 | 120 | window_options: WindowOptions = WindowOptions(window_size=window_size, character_threshold=0) 121 | windows: List[List[Window]] = [ 122 | create_windows_from_image(original_image, pixelized_image, window_options) 123 | for original_image, pixelized_image 124 | in zip(original_images, pixelized_images) 125 | ] 126 | if len(original_images) > 100: 127 | time_logger.info(f'Created windows in {time.perf_counter() - t} seconds') 128 | 129 | return windows 130 | -------------------------------------------------------------------------------- /documentation/correct_crop.svg: -------------------------------------------------------------------------------- 1 | 2 | 21 | 23 | 42 | 44 | 45 | 47 | image/svg+xml 48 | 50 | 51 | 52 | 53 | 57 | 64 | 110 | 117 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /test/training_pipeline/test_pixelize_image.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List 2 | from unittest import TestCase 3 | 4 | import numpy as np 5 | from PIL import Image, ImageFont 6 | 7 | from resources.fonts import DemoFontPaths 8 | from test import utils 9 | from test.utils import create_random_mosaic 10 | from text_depixelizer.training_pipeline.original_image import ImageCreationOptions, OriginalImage, generate_image_from_text 11 | from text_depixelizer.training_pipeline.pixelized_image import determine_number_of_tiles, PixelizationOptions, pixelize_image, \ 12 | pixelize_area, PixelizedImage, determine_origin 13 | 14 | 15 | class TestMosaicImage(TestCase): 16 | default_font_size: int = 50 17 | default_font: ImageFont = ImageFont.truetype(str(DemoFontPaths.arial), default_font_size) 18 | default_padding: Tuple[int, int] = (30, 30) 19 | 20 | def test_determine_number_of_tiles_no_offset(self): 21 | # Arrange 22 | text_width: int = 25 23 | font_metrics: Tuple[int, int] = (12, 8) 24 | offset: Tuple[int, int] = (0, 0) 25 | block_size: int = 10 26 | 27 | # Act 28 | n_tiles: Tuple[int, int] = determine_number_of_tiles(text_width, font_metrics, offset, block_size) 29 | 30 | # Assert 31 | self.assertTupleEqual((3, 3), n_tiles) 32 | 33 | def test_determine_number_of_tiles_y_offset_small(self): 34 | # Arrange 35 | text_width: int = 25 36 | font_metrics: Tuple[int, int] = (12, 8) 37 | offset: Tuple[int, int] = (0, 4) 38 | block_size: int = 10 39 | 40 | # Act 41 | n_tiles: Tuple[int, int] = determine_number_of_tiles(text_width, font_metrics, offset, block_size) 42 | 43 | # Assert 44 | self.assertTupleEqual((3, 3), n_tiles) 45 | 46 | def test_determine_number_of_tiles_y_offset_large(self): 47 | # Arrange 48 | text_width: int = 25 49 | font_metrics: Tuple[int, int] = (12, 8) 50 | offset: Tuple[int, int] = (0, 9) 51 | block_size: int = 10 52 | 53 | # Act 54 | n_tiles: Tuple[int, int] = determine_number_of_tiles(text_width, font_metrics, offset, block_size) 55 | 56 | # Assert 57 | self.assertTupleEqual((3, 3), n_tiles) 58 | 59 | def test_determine_origin_no_offset(self): 60 | # Arrange 61 | padding: Tuple[int, int] = (20, 20) 62 | font_metrics: Tuple[int, int] = (12, 8) 63 | offset: Tuple[int, int] = (0, 0) 64 | block_size: int = 10 65 | 66 | # Act 67 | origin: Tuple[int, int] = determine_origin(padding, font_metrics, offset, block_size) 68 | 69 | # Assert 70 | self.assertTupleEqual((20, 12), origin) 71 | 72 | def test_determine_origin_y_offset_small(self): 73 | # Arrange 74 | padding: Tuple[int, int] = (20, 20) 75 | font_metrics: Tuple[int, int] = (12, 8) 76 | offset: Tuple[int, int] = (0, 6) 77 | block_size: int = 10 78 | 79 | # Act 80 | origin: Tuple[int, int] = determine_origin(padding, font_metrics, offset, block_size) 81 | 82 | # Assert 83 | self.assertTupleEqual((20, 16), origin) 84 | 85 | def test_pixelize_image(self): 86 | # Arrange 87 | image_creation_options: ImageCreationOptions = ImageCreationOptions(self.default_padding, self.default_font) 88 | original_image: OriginalImage = generate_image_from_text(text='123456789', options=image_creation_options) 89 | block_size: int = 10 90 | offset: Tuple[int, int] = (0, 0) 91 | pixelization_options: PixelizationOptions = PixelizationOptions(block_size, offset) 92 | 93 | # Act 94 | pixelized_image: PixelizedImage = pixelize_image(original_image, pixelization_options) 95 | 96 | # Assert 97 | self.assertTrue(pixelized_image.n_tiles[0] > 0) 98 | self.assertEqual(pixelized_image.block_size, block_size) 99 | self.assertEqual(pixelized_image.origin, (30, 26)) 100 | 101 | def test_pixelize_image_correct_offset(self): 102 | # Arrange 103 | img_size = (120, 120) 104 | block_size = 10 105 | 106 | n_tiles = (int(img_size[0] / block_size), int(img_size[1] / block_size)) 107 | test_image: Image = create_random_mosaic(img_size, block_size) 108 | 109 | # Act 110 | pixelized_image: Image = pixelize_area(test_image, block_size, (0, 0), n_tiles) 111 | 112 | # Assert: Images are equal 113 | self.assertEqual(test_image, pixelized_image) 114 | 115 | # Assert: Pixel-size is correct 116 | self.assertNotEqual(pixelized_image.getpixel((0, 0)), pixelized_image.getpixel((block_size, 0))) 117 | self.assertEqual(pixelized_image.getpixel((0, 0)), pixelized_image.getpixel((block_size-1, 0))) 118 | 119 | def test_randomize_offset_x(self): 120 | # Arrange 121 | original_image: OriginalImage = utils.create_image(text='123456789') 122 | 123 | block_size: int = 10 124 | pixelization_options: List[PixelizationOptions] = [ 125 | PixelizationOptions(block_size=block_size, offset=(i, 0)) for i in range(block_size+1) 126 | ] 127 | 128 | # Act 129 | pixelized_images: List[PixelizedImage] = [pixelize_image(original_image=original_image, pixelization_options=p) for p in pixelization_options] 130 | 131 | # Assert: There is a difference between images with an offset of one 132 | self.assertNotEqual(np.sum(np.asarray(pixelized_images[0].image) - np.asarray(pixelized_images[1].image)), 1) 133 | 134 | # Assert: There is no difference between images with an offset of block_size 135 | self.assertEqual(np.sum(np.asarray(pixelized_images[0].image) - np.asarray(pixelized_images[block_size].image)), 0) 136 | 137 | def test_randomize_offset_y(self): 138 | # Arrange 139 | original_image: OriginalImage = utils.create_image(text='123456789') 140 | 141 | block_size: int = 10 142 | pixelization_options: List[PixelizationOptions] = [ 143 | PixelizationOptions(block_size=block_size, offset=(0, i)) for i in range(block_size + 1) 144 | ] 145 | 146 | # Act 147 | pixelized_images: List[PixelizedImage] = [pixelize_image(original_image=original_image, pixelization_options=p) for p in pixelization_options] 148 | 149 | # Assert: There is a difference between images with an offset of one 150 | self.assertNotEqual(np.sum(np.asarray(pixelized_images[0].image) - np.asarray(pixelized_images[1].image)), 1) 151 | 152 | # Assert: There is no difference between images with an offset of block_size 153 | self.assertEqual(np.sum(np.asarray(pixelized_images[0].image) - np.asarray(pixelized_images[block_size].image)), 0) 154 | 155 | -------------------------------------------------------------------------------- /documentation/picture_parameters.svg: -------------------------------------------------------------------------------- 1 | 2 | 21 | 23 | 42 | 44 | 45 | 47 | image/svg+xml 48 | 50 | 51 | 52 | 53 | 54 | 58 | 66 | 73 | 80 | 87 | 91 | 95 | 99 | block_size=8 110 | 114 | 118 | 123 | window_size=3 134 | 139 | 144 | 148 | offset_y=6 159 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /text_depixelizer/HMM/depix_hmm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | import time 4 | from collections import Counter 5 | from typing import List, Tuple, Set 6 | 7 | import numpy as np 8 | from PIL import Image 9 | 10 | from text_depixelizer.HMM.clusterer import KmeansClusterer, Clusterer 11 | from text_depixelizer.HMM.hmm import HMM 12 | from text_depixelizer.HMM.hmm_result_reconstructor import reconstruct_string_from_window_characters, string_similarity 13 | from text_depixelizer.parameters import PictureParameters, TrainingParameters 14 | from text_depixelizer.training_pipeline.training_pipeline import create_training_data 15 | from text_depixelizer.training_pipeline.windows import Window 16 | 17 | 18 | class DepixHMM(HMM): 19 | observations: List[int] 20 | states: List[Tuple[str, ...]] 21 | 22 | picture_parameters: PictureParameters 23 | training_parameters: TrainingParameters 24 | clusterer: Clusterer 25 | 26 | def __init__(self, picture_parameters: PictureParameters, training_parameters: TrainingParameters): 27 | self.picture_parameters = picture_parameters 28 | self.training_parameters = training_parameters 29 | 30 | def train(self): 31 | time_logger: logging.Logger = logging.getLogger('time_logger') 32 | 33 | # Generate training data 34 | texts_train, original_images_train, pixelized_images_train, windows_train = create_training_data( 35 | n_img=self.training_parameters.n_img_train, 36 | picture_parameters=self.picture_parameters 37 | ) 38 | windows_train_flattened = [window for windows in windows_train for window in windows] 39 | 40 | t: float = time.perf_counter() 41 | clusterer: KmeansClusterer = KmeansClusterer(windows_train_flattened, self.training_parameters.n_clusters) 42 | self.clusterer = clusterer 43 | windows_train_flattened = clusterer.map_windows_to_cluster(windows_train_flattened) 44 | 45 | # ToDo: Debug 46 | pass 47 | 48 | time_logger.info(f'Performed clustering in {time.perf_counter() - t} seconds') 49 | 50 | used_clusters_in_training_set: int = len(set([window.k for window in windows_train_flattened])) 51 | if used_clusters_in_training_set != self.training_parameters.n_clusters: 52 | logging.error(f'\n Out of possibly {self.training_parameters.n_clusters}, only ' 53 | f'{used_clusters_in_training_set} are used. This might be the case when using a monospaced' 54 | f'font with a font size that is a multiple of the window size.') 55 | 56 | # Generate observations and states 57 | self.calculate_hmm_properties(windows_train_flattened) 58 | 59 | def calculate_hmm_properties(self, windows_train: List[Window]): 60 | """ 61 | Takes a flattened list of windows to determine the probability matrices of the hidden markov model 62 | Note that the windows have to be clustered already! 63 | """ 64 | time_logger: logging.Logger = logging.getLogger('time_logger') 65 | t = time.perf_counter() 66 | 67 | observations: List[int] = list({window.k for window in windows_train}) 68 | self.observations: List[int] = observations 69 | 70 | states: List[Tuple[str, ...]] = list({window.characters for window in windows_train}) 71 | self.states: List[Tuple[str, ...]] = states 72 | 73 | # Compute the probability matrices 74 | self.starting_probabilities: np.ndarray = self.get_starting_probabilities(windows_train, states) 75 | self.transition_probabilities: np.ndarray = self.get_transition_probabilities(windows_train, states) 76 | self.emission_probabilities: np.ndarray = self.get_emission_probabilities(windows_train, states, observations) 77 | 78 | time_logger.info(f'Calculated HMM Properties in {time.perf_counter() - t} seconds') 79 | 80 | def test_image(self, img: Image): 81 | """ 82 | Takes a pixelized image and reconstructs the hidden string 83 | """ 84 | 85 | block_size = self.picture_parameters.block_size 86 | window_size = self.picture_parameters.window_size 87 | window_width = block_size * window_size 88 | 89 | n_tiles: Tuple[int] = tuple(int(el/block_size) for el in img.size) 90 | pixel_values_of_windows: List[np.array] = [] 91 | for window_index in range(n_tiles[0] - window_size + 1): 92 | window_top = 0 93 | window_bottom = n_tiles[1]*block_size - 1 94 | window_left: int = window_index*block_size 95 | window_right: int = window_left + window_width - 1 96 | 97 | values: np.array = np.asarray(img)[ 98 | window_top:window_bottom:block_size, 99 | window_left:window_right:block_size, 100 | :].flatten() 101 | pixel_values_of_windows.append(values) 102 | 103 | k_values: List[int] = self.clusterer.map_values_to_cluster(pixel_values_of_windows) 104 | return self.test_cluster_indices(k_values) 105 | 106 | def test_windows(self, windows: List[Window]) -> str: 107 | """ 108 | Takes a list of clustered windows and returns the most likely sequence of characters 109 | """ 110 | windows = self.clusterer.map_windows_to_cluster(windows) 111 | return self.test_cluster_indices([window.k for window in windows]) 112 | 113 | def test_cluster_indices(self, indices: List[int]): 114 | result: List[Tuple[str, ...]] = self.log_viterbi(indices) 115 | return reconstruct_string_from_window_characters(result, self.picture_parameters.block_size, 116 | self.picture_parameters.font) 117 | 118 | 119 | def evaluate(self) -> Tuple[float, float]: 120 | """ 121 | Generates test data and checks it with the already trained model. Returns two values: 122 | - Accuracy: Percentage of correctly reconstructing the string from the image 123 | - average_similarity: Average modified edit distance of the reconstructed string to the original text 124 | """ 125 | 126 | self.print_states() 127 | 128 | time_logger: logging.Logger = logging.getLogger('time_logger') 129 | t = time.perf_counter() 130 | 131 | texts_evaluate, original_images_evaluate, pixelized_images_evaluate, windows_evaluate = create_training_data( 132 | n_img=self.training_parameters.n_img_test, 133 | picture_parameters=self.picture_parameters 134 | ) 135 | 136 | similarities: List[float] = [] 137 | for text, windows in zip(texts_evaluate, windows_evaluate): 138 | reconstructed_text: str = self.test_windows(windows) 139 | similarity: float = string_similarity(text, reconstructed_text) 140 | similarities.append(similarity) 141 | 142 | logging.debug(f'Expected: {text}, Actual: {reconstructed_text}, Similarity: {similarity}') 143 | 144 | accuracy: float = similarities.count(1.0) / len(similarities) 145 | average_similarity: float = sum(similarities) / len(similarities) 146 | time_logger.info(f'Performed Evaluation in {time.perf_counter() - t} seconds') 147 | 148 | return accuracy, average_similarity 149 | 150 | @staticmethod 151 | def get_starting_probabilities(windows: List[Window], states: List[Tuple[str, ...]]) -> np.ndarray: 152 | """ 153 | Calculate the probability of starting in state X 154 | """ 155 | starting_states_unnormalized: Counter = Counter( 156 | [window.characters for window in windows if window.window_index == 0]) 157 | total: int = sum(starting_states_unnormalized.values()) 158 | starting_probabilities: List[float] = [starting_states_unnormalized.get(state, 0) / total for state in states] 159 | return np.array(starting_probabilities) 160 | 161 | @staticmethod 162 | def get_transition_probabilities(windows: List[Window], states: List[Tuple[str, ...]]) -> np.ndarray: 163 | """ 164 | From the given windows, count how many times state X follows state Y and save the (row-wise) normalized sum 165 | in transition_probabilities[X, Y] 166 | """ 167 | transition_probabilities_unnormalized: np.ndarray = np.zeros((len(states), len(states))) 168 | 169 | for current_window, next_window in zip(windows[:-1], windows[1:]): 170 | 171 | # no transition inbetween images 172 | if next_window.window_index == 0: 173 | continue 174 | index_of_current_state: int = states.index(current_window.characters) 175 | index_of_next_state: int = states.index(next_window.characters) 176 | transition_probabilities_unnormalized[index_of_current_state, index_of_next_state] += 1 177 | 178 | # Normalization: If there is 0/0 (no transition leaving state X was observed in the training data), we 179 | # assume that every transition from that state is equally likely 180 | transition_probabilities: np.ndarray = np.divide( 181 | transition_probabilities_unnormalized, 182 | transition_probabilities_unnormalized.sum(axis=1)[:, np.newaxis], 183 | out=np.full(shape=transition_probabilities_unnormalized.shape, fill_value=1.0/len(states), dtype=float), 184 | where=transition_probabilities_unnormalized.sum(axis=1)[:, np.newaxis] != 0 185 | ) 186 | return transition_probabilities 187 | 188 | @staticmethod 189 | def get_emission_probabilities(windows: List[Window], states: List[Tuple[str, ...]], 190 | observations: List[int]) -> np.ndarray: 191 | """ 192 | Calculate the probability that state X emits symbol Y and save the (row-wise) normalized sum 193 | in emission_probabilities[X, Y] 194 | """ 195 | emission_probabilities_unnormalized: np.ndarray = np.zeros((len(states), len(observations))) 196 | 197 | for window in windows: 198 | index_of_state = states.index(window.characters) 199 | index_of_observation = observations.index(window.k) 200 | emission_probabilities_unnormalized[index_of_state, index_of_observation] += 1 201 | 202 | emission_probabilities: np.ndarray = emission_probabilities_unnormalized / emission_probabilities_unnormalized.sum(axis=1)[:, np.newaxis] 203 | 204 | return emission_probabilities 205 | 206 | def print_states(self): 207 | unique_characters: Set[str] = set([c for char in self.states for c in char]) 208 | max_state_length: int = max([len(state) for state in self.states]) 209 | 210 | for i in range(1, max_state_length+1): 211 | states_with_length_i = [state for state in self.states if len(state) == i] 212 | logging.warning(f'Found {len(states_with_length_i)} states with length {i}, expected {math.pow(len(unique_characters), i)}') -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Depix-HMM 2 | Depix-HMM is a tool for recovering text from pixelized screenshots. As can be inferred from the project name, it was 3 | inspired by the [Depix](https://github.com/beurtschipper/Depix) library and uses an HMM-based approach, which is supposed to 4 | result in a higher accuracy and flexibility. 5 | 6 | It is an open source implementation of the paper [On the (In)effectiveness of Mosaicing and Blurring as Tools for Document Redaction](https://www.researchgate.net/publication/305423573_On_the_Ineffectiveness_of_Mosaicing_and_Blurring_as_Tools_for_Document_Redaction) by Hill, Zhou, Saul and Shacham. 7 | I recommend checking out the paper, it is a very insightful read and provides the necessary background I neither have 8 | the time nor space to explain in this readme. 9 | 10 | ## Example 11 | Below is an example with a font size of 50 and a pixelization block size of 12: 12 | 13 | | Pixelized | Recovered | 14 | |-----------|-----------| 15 | | ![](resources/images/arial_50/123456789_blocksize-12.png) | ![](resources/images/arial_50/123456789_blocksize-1.png) 16 | 17 | 18 | ## Theory 19 | 20 | ### Hidden Markov Models 21 | Hidden Markov Models (HMMs) are a way to model a large array of problems as graphs. In contrast to the simpler *Markov Chain*, 22 | the so called *states* aren't accessible in a Hidden Markov Model. Instead, we can only access an *observation* that is 23 | derived from the underlying state. 24 | 25 | In the (simplified) illustration below, the observations are the color values of the pixelized grid. The hidden states are 26 | the respective underlying original character: 27 | 28 | ![](documentation/hmm.png) 29 | 30 | In addition to (the set of hidden states of the model) and (the set of observable symbols), the following three 31 | probabilities are required to fully describe the model: 32 | - : The probability of starting in state . Two examples: When recovering passwords without any additional information, these probabilities are identical for each state. For recovering English language text, the probability of a word starting with `s`, `c`, `p`, `d` are higher than the probability of a word starting with `x`, `q` or `z`. See this [link](https://www.wolfram.com/language/11/text-and-language-processing/frequencies-of-letters-vs-first-letters.html?product=language) for more information. 33 | - : The probability of going from state *i* to state *j*. Again, if we don't have additional information about the text to be recovered (as is the case with passwords), all of these probabilities will be equal. However, in the English language, the letter `q` is followed by the letter `u` with a very high probability. 34 | - : The probability of an observation being generated from a state *i*. Finding meaningful probabilities for this matrix is the hardest part of the whole endeavor and is explained in the next section. 35 | 36 | Note: 37 | - For the sake of brevity, a few simplifications have been made in this explanation. For example, it is very unlikely that the hidden state only contains one letter (e.g. `H`). 38 | We always look at *windows* of the pixelized image, and these usually contain more than one character, e.g. `(H,E)`. 39 | - In order to keep the space of possible observations small enough, clustering is performed. So, not the pixel values of e.g. ![](documentation/H_pixelized.PNG) is the observed symbol, but the corresponding index of the cluster. 40 | 41 | 42 | ### Generating Training Data 43 | To estimate values for the three probability matrices defining the Hidden Markov Model, training data is generated. 44 | 45 | First, a list of texts is generated that mimics the text to be pixelized. 46 | This could be passwords containing digits, upper- and lowercase letters with a length between 6 and 9, matching the regex `[a-zA-Z\d]{6,9}`. 47 | Or it could be a dump of real-life email addresses or words from English wikipedia articles. 48 | 49 | Next, the texts are rendered using the same font and fontsize of the pixelized image with the unknown text. These images are then 50 | pixelized. Since we generated these images ourselves, we can then cut the pixelized images into windows with *known* hidden state 51 | (the characters at the position of the windows). 52 | From there, it is trivial to calculate starting probabilities and 53 | transition probabilities . 54 | 55 | As mentioned before, generating the emission probabilities requires an extra step. To reduce the number of possible observable 56 | states, the windows are clustered according to their pixel values. Taking the example from the illustration below, 57 | we can see that a window that falls into the left-most cluster has a 50% probability of encoding the tuple `(1,2)` and `(2,3)`, respectively. 58 | A window falling into the top-right cluster always encodes the tuple `(1)`. 59 | 60 | ### Recovering Hidden Text 61 | The pixelized image with the unkown text is then fed into the trained Hidden Markov Model. 62 | It will be cut into windows the same way the training data was. Each window is then assigned to the nearest cluster and the Viterbi Algorithm is 63 | used to recover the most likely sequence of hidden states. 64 | 65 | This could result in the tuples `(1), (1,2), (1,2), (2), (2,3), (3,4), ...` from which the original text can be reconstructed to be `1234...` 66 | 67 | ![](documentation/training_data.png) 68 | 69 | ## Installation and Usage 70 | Download the repository. 71 | 72 | Set up and activate your virtual environment. On windows, for example, type into your console: 73 | ``` 74 | python -m venv venv 75 | .\venv\Scripts.activate.bat 76 | ``` 77 | 78 | Install the packages from the `requirements.txt` file: 79 | ``` 80 | pip install -r requirements.txt 81 | ``` 82 | 83 | Use the code samples from the `examples` and `experiments` folder to get started. If you want to use the code to recover 84 | text from your own images, read the sections below. Providing an easy-to-use tool with a good user experience honestly 85 | wasn't the goal of this project. Reach out to me though, I'm more than happy to help you out. 86 | 87 | ### Correct Cropping of Images 88 | 89 | ##### Font Metrics 90 | To start out with, font metrics can be quite complex. As a quick overview, see the image below. 91 | The red line is the so-called *baseline*, on which the letters 'sit' on top. While the *ascent* measures the maximum height 92 | a letter of this font can have, most letters will only reach the smaller *cap height*. 93 | 94 | ![](documentation/original_image_text_ascent_descent.png) 95 | 96 | 97 | ##### How to Crop Your Image 98 | My implementation of the algorithm is a bit *dumb*. For example, when we care about images containing only 99 | digits, it is obvious that black pixels can only be found between the baseline and the cap height, and not above or below. 100 | However, the algorithm creates its training data over the full height (ascent + descent) that the font could possibly have, and 101 | so you have to crop accordingly! 102 | 103 | See the image below: Intuitively, one would have cropped at the red border, however the green border is correct! With 104 | the font size and y-offset of the pixelization, there is one "invisible" grid-row on top, and two on the bottom. 105 | 106 | It is relatively easy to crop the images in the training pipeline, I just haven't come around to doing it. 107 | 108 | ![](documentation/correct_crop.png) 109 | 110 | ### Explanation of Parameters 111 | Configuring the code is done by setting two sets of parameters. The first set are the `PictureParameters`. They contain the following values: 112 | - `pattern`: A regex pattern to generate sample text from. For passwords with a length between 6 and 9 characters containing digits, 113 | lower- and uppercase letters, this would be `r'[a-zA-Z\d]{6,9}'`. See the `rstr` package on [github](https://github.com/leapfrogonline/rstr) 114 | for more information. One possible improvement would be to feed a corpus of natural language data into the training pipeline. 115 | - `font`: The font that is most likely used. Often, we want to decode information taken from a partially pixelized screenshot. 116 | In this case, it is possible to infer the used font from the unobscured text that is still visible. Note that the availability 117 | of fonts varies by operating system. Arial and the MICR Encoding Font are included in the `/resources/fonts` folder. 118 | - `block_size`: The size of on pixelized block in px. See visualization below. 119 | - `randomize_pixelization_origin_x`: If set to `false`, the pixelization always starts at the leftmost pixel of the rendered font. 120 | However, this is not very realistic, as the person performing the pixelization most likely did not pay attention where they set their origin. 121 | It is recommended to always leave this at `true`. This is especially important for monospaced fonts, where the constant character width 122 | is a multiple of the window size. 123 | - `window_size`: The width (in blocks) of one window. There is a tradeoff - a wide window will contain more information, 124 | but it is also possible to contain multiple characters. Choose the value in a way, that nore 3-Tuples will be generated in the training 125 | data, since this bloats the search space by one order of magnitude. Hints will given in the terminal output. 126 | - `offset_y`: As explained in the previous section about Font Metrics, the font "rests" on the so-called baseline. This parameter 127 | measures the offset between the baseline and the beginning of the next pixel grid in px. The original paper has shown that 128 | the algorithm is somehow robust against small errors in estimating this parameter (see Figure 14 in the original paper). 129 | However, when testing I found this the trickiest thing to get right. Might require some experimentation. 130 | 131 | The second set are the `TrainingParameters`: 132 | - `n_img_train`: Number of images used to estimate the parameters of the HMM. Usually in the magnitude of 10.000 133 | - `n_img_test`: Number of images used to evaluate the estimated parameters. Note that this will NOT show you whether you have 134 | estimated the parameters from the image you want to decode correctly (`pattern`, `font`, `block_size`, `offset_y`). 135 | It will only tell you how the model performs on the synthetic data. 136 | - `n_clusters`: Number of clusters for k-means clustering. Should be high enough so that every tuple can possibly have its 'own' 137 | cluster. As an example, if you train the HMM on images containing only digits, and your `window_size` is chosen in a way that only 138 | 1- and 2-Tuples are created (`(0), (1), (2), ... (0,0), (0,1), ... (9,9)`), this should be at least 110. Don't go too much higher, 139 | otherwise some clusters are empty, which will result in a cryptic error message. Fixing this is on the roadmap. 140 | 141 | ![](documentation/picture_parameters.png) 142 | 143 | When using `PictureParametersGridSearch` and `TrainingParametersGridSearch`, some of these parameters can be turned into lists. 144 | Grid search will be performed. See the `parameters.py` file for further information. Also remember the information given 145 | above under the `n_img_test` bullet point when doing a grid search. 146 | 147 | There is an additional `LoggingParameters`, which is self-explanatory. 148 | 149 | 150 | ## Final Thoughts 151 | I replicated the author's most simple experiment (p. 409ff) with US bank account numbers (see the appropriately 152 | named `experiment_bank_account_numbers.py` for more details) and can confirm their findings. 153 | 154 | As mentioned above, the repository at its current state provides a working, but very proof-of-concept-y implementation of the 155 | original paper. Be ready for some frustration when you let it loose on your own images. I'm happy to help out if you have 156 | an interesting use case! 157 | 158 | For a more refined experience, I would recommend the highly popular [Depix](https://github.com/beurtschipper/Depix) tool. -------------------------------------------------------------------------------- /documentation/training_data.svg: -------------------------------------------------------------------------------- 1 | 2 | 21 | 23 | 31 | 36 | 37 | 45 | 50 | 51 | 59 | 64 | 65 | 74 | 79 | 80 | 88 | 93 | 94 | 102 | 107 | 108 | 117 | 122 | 123 | 131 | 136 | 137 | 145 | 150 | 151 | 159 | 164 | 165 | 166 | 185 | 187 | 188 | 190 | image/svg+xml 191 | 193 | 194 | 195 | 196 | 197 | 201 | 210 | Original Image: 221 | Pixelized Image: 232 | 240 | 243 | 250 | 258 | 265 | 272 | 279 | 280 | 283 | 290 | 299 | 306 | 313 | 314 | (1,2) 325 | (2,3) 336 | 340 | 343 | 351 | 358 | 359 | 366 | 370 | ... 381 | Windows: 392 | 396 | 400 | 405 | 408 | 416 | 423 | (2,3) 434 | 435 | 438 | 446 | 453 | 454 | 506 | Cluster 517 | 520 | 572 | 579 | (1) 590 | 591 | 645 | 652 | 659 | (1,2) 670 | (1) 681 | ... 692 | ... 703 | (7) 714 | ... 725 | 726 | 727 | -------------------------------------------------------------------------------- /documentation/hmm.svg: -------------------------------------------------------------------------------- 1 | 2 | 22 | 24 | 32 | 37 | 38 | 46 | 51 | 52 | 60 | 65 | 66 | 74 | 79 | 80 | 88 | 93 | 94 | 102 | 107 | 108 | 116 | 121 | 122 | 130 | 135 | 136 | 145 | 150 | 151 | 159 | 164 | 165 | 173 | 178 | 179 | 187 | 192 | 193 | 201 | 206 | 207 | 215 | 220 | 221 | 222 | 241 | 243 | 244 | 246 | image/svg+xml 247 | 249 | 250 | 251 | 252 | 253 | 257 | 264 | Observations 275 | 283 | 291 | 299 | 307 | 315 | Hidden States 326 | H 337 | 343 | 349 | 355 | 361 | 367 | E 378 | L 389 | L 400 | O 411 | 417 | 423 | 429 | 435 | 441 | 446 | 451 | 456 | 461 | 466 | 471 | 476 | 481 | 486 | 491 | 504 | 506 | 508 | 511 | 515 | 516 | 519 | 523 | 524 | 527 | 531 | 532 | 535 | 539 | 540 | 541 | 542 | 545 | 548 | 551 | 555 | 556 | 557 | 560 | 563 | 567 | 568 | 569 | 570 | 571 | 584 | 586 | 588 | 591 | 595 | 596 | 599 | 603 | 604 | 607 | 611 | 612 | 615 | 619 | 620 | 621 | 622 | 625 | 628 | 631 | 635 | 636 | 637 | 640 | 643 | 647 | 648 | 649 | 650 | 651 | 659 | 660 | 661 | --------------------------------------------------------------------------------