├── src
    └── ulozto_captcha_breaker
    │   ├── dataset
    │       ├── __init__.py
    │       ├── preprocessing
    │       │   ├── __init__.py
    │       │   ├── label_pipeline.py
    │       │   ├── image_pipeline.py
    │       │   ├── image_preprocessors.py
    │       │   └── label_preprocessors.py
    │       ├── captcha_dataset.py
    │       └── annotations_generator.py
    │   ├── utils.py
    │   ├── metrics.py
    │   └── captcha_network.py
├── .vscode
    └── settings.json
├── docs
    ├── examples.png
    └── abfd_ba574f47-92d8-407d-9b34-d5f6fa8a74c3.png
├── requirements.in
├── Makefile
├── scripts
    └── init.sh
├── bin
    ├── create_tflite.py
    ├── captcha_annotate.py
    ├── predict.py
    ├── simple_captcha_generate.py
    ├── train.py
    └── test.py
├── setup.py
├── .devcontainer
    ├── devcontainer.json
    └── Dockerfile
├── LICENSE
├── .gitignore
├── requirements.txt
└── README.md


/src/ulozto_captcha_breaker/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/dataset/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.defaultInterpreterPath": "/usr/local/bin/python"
3 | }


--------------------------------------------------------------------------------
/docs/examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JanPalasek/ulozto-captcha-breaker/HEAD/docs/examples.png


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | tensorflow
2 | matplotlib
3 | graphviz
4 | numpy
5 | pydot
6 | faker
7 | captcha
8 | scikit-image


--------------------------------------------------------------------------------
/docs/abfd_ba574f47-92d8-407d-9b34-d5f6fa8a74c3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JanPalasek/ulozto-captcha-breaker/HEAD/docs/abfd_ba574f47-92d8-407d-9b34-d5f6fa8a74c3.png


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL := /bin/bash
 2 | PYTHON ?= python
 3 | 
 4 | init:
 5 | 	sh scripts/init.sh
 6 | 
 7 | clean:
 8 | 	rm -rf .quarto _site/
 9 | 
10 | install_dev:
11 | 	$(PYTHON) -m pip install -r requirements.txt -e .
12 | 
13 | 
14 | .PHONY: init clean install_dev


--------------------------------------------------------------------------------
/scripts/init.sh:
--------------------------------------------------------------------------------
 1 | if ! grep -q "# DEVCONTAINER INIT" ~/.bashrc; then
 2 |     echo "Initializing..."
 3 |     echo "# DEVCONTAINER INIT #" >> ~/.bashrc;
 4 |     echo "export USER_NAME=$(id -un)" >> ~/.bashrc;
 5 |     echo "export USER_ID=$(id -u)" >> ~/.bashrc;
 6 |     echo "export USER_GID=$(id -g)" >> ~/.bashrc;
 7 |     echo "export USER_GNAME=$(id -gn)" >> ~/.bashrc;
 8 |     echo "export DOCKER_GID=$(getent group docker | cut -d: -f3)" >> ~/.bashrc;
 9 | fi
10 | echo "Initialization complete..."


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/dataset/preprocessing/label_pipeline.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ulozto_captcha_breaker.dataset.preprocessing.label_preprocessors import StringEncoder
 4 | 
 5 | 
 6 | class LabelPreprocessPipeline:
 7 |     def __init__(self, encoder):
 8 |         self._encoder = encoder
 9 | 
10 |     def __call__(self, labels):
11 |         result = []
12 |         for label in labels:
13 |             result.append(self._encoder.encode(label))
14 | 
15 |         return np.array(result)


--------------------------------------------------------------------------------
/bin/create_tflite.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import tensorflow as tf
 4 | import argparse
 5 | import os
 6 | 
 7 | if __name__ == "__main__":
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument("--out_dir", default="out", type=str, help="Out dir")
10 |     parser.add_argument("--pretrained_model", type=str, required=True)
11 |     args = parser.parse_args()
12 | 
13 |     converter = tf.lite.TFLiteConverter.from_saved_model(args.pretrained_model)
14 |     tflite_model = converter.convert()
15 | 
16 |     # Save the model.
17 |     with open(os.path.join(args.out_dir, 'model.tflite'), 'wb') as f:
18 |         f.write(tflite_model)
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | 
 4 | with open("README.md", "r") as fh:
 5 |     long_description = fh.read()
 6 | 
 7 | setuptools.setup(
 8 |     name='ulozto-captcha-breaker',
 9 |     version='3.0a',
10 |     description="",
11 |     long_description=long_description,
12 |     long_description_content_type="text/markdown",
13 |     package_dir={
14 |         "": "src"
15 |     },
16 |     install_requires=[
17 |         "tensorflow>=2.0.0",
18 |         "matplotlib>=3.5.0"
19 |     ],
20 |     packages=setuptools.find_packages("src"),
21 |     scripts=["bin/train.py", "bin/test.py", "bin/predict.py"],
22 |     python_requires=">=3.8"
23 | )


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
 3 | {
 4 | 	"name": "quarto-templates",
 5 | 	"build": { 
 6 | 		"context": "..",
 7 | 		"dockerfile": "Dockerfile",
 8 | 		"args": {
 9 | 			"USER_ID": "${localEnv:USER_ID}",
10 | 			"USER_NAME": "${localEnv:USER_NAME}",
11 | 			"USER_GID": "${localEnv:USER_GID}",
12 | 			"USER_GNAME": "${localEnv:USER_GNAME}"
13 | 		}
14 | 	},
15 | 
16 | 	// Use 'postCreateCommand' to run commands after the container is created.
17 | 	"postCreateCommand": "make install_dev",
18 | 
19 | 	// Configure tool-specific properties.
20 | 	"customizations": {
21 | 		"vscode": {
22 | 			"extensions": [
23 | 				"ms-python.python",
24 | 				"ms-toolsai.jupyter",
25 | 				"quarto.quarto",
26 | 				"ritwickdey.LiveServer"
27 | 			]
28 | 		}
29 | 	}
30 | }


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import datetime
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | import logging
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | class FileWriter:
11 |     def __init__(self, path):
12 |         self._path = path
13 |         self._internal_counter = 0
14 | 
15 |         if not os.path.exists(path):
16 |             os.makedirs(path)
17 | 
18 |     def save_image(self, img: np.ndarray, name=None, category="debug"):
19 |         img = np.copy(img)
20 |         if name is None:
21 |             # timestamp
22 |             name = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S.%f') + str(self._internal_counter)
23 | 
24 |             self._internal_counter += 1
25 | 
26 |         dest = self._path + os.sep + "{}_{}.png".format(category, name)
27 | 
28 |         if img.dtype in [np.float32, np.float64]:
29 |             img = img * 255
30 |             img = img.astype(np.uint8)
31 | 
32 | 
33 |         plt.imsave(dest, img)
34 | 


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/dataset/captcha_dataset.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | 
 4 | class CaptchaDataset:
 5 |     def __init__(self, annotations_path: str, classes: int):
 6 |         self._annotations_path = annotations_path
 7 |         self._classes = classes
 8 | 
 9 |         self._data = self._get_items()
10 | 
11 |     def get_image_shape(self):
12 |         return self._data[0][0].shape
13 | 
14 |     @property
15 |     def classes(self):
16 |         return self._classes
17 | 
18 |     def _get_items(self):
19 |         result = []
20 |         with open(self._annotations_path, "r") as file:
21 |             for line in file:
22 |                 image_path, image_label = line.rsplit(maxsplit=1)
23 |                 image_label = list(image_label)
24 | 
25 |                 image = plt.imread(image_path)
26 |                 result.append((image, image_label))
27 | 
28 |         return result
29 | 
30 |     def get_data(self):
31 |         return list(zip(*self._data))
32 | 


--------------------------------------------------------------------------------
/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10.12
 2 | 
 3 | ARG USER_ID
 4 | ARG USER_NAME
 5 | ARG USER_GID
 6 | ARG USER_GNAME
 7 | 
 8 | ARG QUARTO_VERSION=1.3.450
 9 | 
10 | # Update packages
11 | RUN apt-get update && apt-get install -y librsvg2-bin && \
12 |     rm -rf /var/lib/apt/lists/*
13 | 
14 | # Download and install Quarto
15 | RUN mkdir -p /opt/quarto/${QUARTO_VERSION}
16 | RUN curl -o quarto.tar.gz -L \
17 |     "https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.tar.gz"
18 | RUN tar -zxvf quarto.tar.gz \
19 |     -C "/opt/quarto/${QUARTO_VERSION}" \
20 |     --strip-components=1
21 | RUN rm quarto.tar.gz
22 | RUN ln -s /opt/quarto/${QUARTO_VERSION}/bin/quarto /usr/local/bin/quarto
23 | 
24 | # INSTALL THE REST AS A USER
25 | # Create user and group
26 | RUN groupadd -g ${USER_GID} ${USER_GNAME}
27 | RUN useradd ${USER_NAME} -u ${USER_ID} -g ${USER_GNAME} -m -s /bin/bash
28 | USER ${USER_NAME}
29 | 
30 | # Install Quarto extension for PDF generation
31 | RUN quarto install tinytex
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Jan Palasek
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/dataset/preprocessing/image_pipeline.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ulozto_captcha_breaker.utils import FileWriter
 4 | 
 5 | 
 6 | class ImagePreprocessorPipeline:
 7 |     """
 8 |     ImagePreprocessorPipeline specifies list of operations that are performed on each image in specified order. These
 9 |     operations are invoked using *__call__*.
10 |     """
11 |     def __init__(self, preprocessors, out_writer: FileWriter = None, debug_writer: FileWriter = None):
12 |         self._preprocessors = preprocessors
13 | 
14 |         self._out_writer = out_writer
15 |         self._debug_writer = debug_writer
16 | 
17 |     def __call__(self, images):
18 |         """
19 |         Performs specified operations on images.
20 |         :param images: List of images.
21 |         :return: Modified list of images.
22 |         """
23 |         result = []
24 |         for image in images:
25 |             modified_image = np.copy(image)
26 | 
27 |             for p in self._preprocessors:
28 |                 modified_image = p(modified_image)
29 |                 if self._debug_writer is not None:
30 |                     self._debug_writer.save_image(modified_image, category=str(type(p).__name__).lower())
31 | 
32 |             result.append(modified_image)
33 | 
34 |             if self._out_writer is not None:
35 |                 self._out_writer.save_image(modified_image, category="final")
36 | 
37 |         return np.array(result)
38 | 


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/metrics.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def all_correct_acc(y_true: tf.Tensor, y_pred: tf.Tensor):
 5 |     """
 6 |     Computes accuracy between y_true and y_pred in the following manner:
 7 | 
 8 |     - If i-th sample has all values on y_pred same as on y_true, then 1.
 9 |     - Otherwise 0.
10 | 
11 |     It is hence more restricting then a typical accuracy.
12 | 
13 |     Args:
14 |         y_true (tf.Tensor): 2D tensor of shape (N, L), where N is the number of samples and L is length of the vector (number of characters).
15 |         y_pred: 2D tensor of shape (N, L), where N is the number of samples and L is length of the vector (number of characters)
16 | 
17 |     Returns:
18 |         Accuracy: number between [0, 1] denoting how many codes were predicted correctly.
19 |     """
20 |     if y_true.shape[0] is None and y_true.shape[1] is None and y_true.shape[2] is None:
21 |         return tf.convert_to_tensor(0)
22 | 
23 |     # cast to int64 so we can compare it
24 |     y_true = tf.cast(y_true, tf.dtypes.int64)
25 | 
26 |     if len(y_pred.shape) <= 2:
27 |         y_pred = tf.expand_dims(y_pred, axis=1)
28 |     if len(y_true.shape) <= 1:
29 |         y_true = tf.expand_dims(y_true, axis=1)
30 |     y_pred = tf.argmax(y_pred, axis=2)
31 |     correct = y_true == y_pred
32 |     # tf.print(f"Pred shape: {y_true.shape}", output_stream=sys.stdout)
33 | 
34 |     all_correct = tf.reduce_all(correct, axis=1)
35 |     all_correct = tf.cast(all_correct, tf.dtypes.float32)
36 | 
37 |     return tf.reduce_mean(all_correct)
38 | 
39 | 


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/dataset/preprocessing/image_preprocessors.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from skimage.transform import resize
 3 | 
 4 | 
 5 | class ConvertToGrayscalePreprocessor:
 6 |     """
 7 |     Converts image to grayscale.
 8 |     """
 9 |     def __call__(self, img: np.ndarray):
10 |         r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
11 |         output = 0.299 * r + 0.587 * g + 0.114 * b
12 |         return output
13 | 
14 | 
15 | class ImageCutPreprocessor:
16 |     def __init__(self, pieces_count: int):
17 |         self._pieces_count = pieces_count
18 | 
19 |     def __call__(self, image: np.ndarray):
20 |         images = np.split(image, self._pieces_count, axis=1)
21 | 
22 |         return np.array(images)
23 | 
24 | 
25 | class NormalizeImagePreprocessor:
26 |     """
27 |     Converts image from byte format (values are integers in {0, ..., 255} to normalized float format (values are
28 |     floats in the interval [0, 1].
29 |     """
30 |     def __init__(self):
31 |         pass
32 | 
33 |     def __call__(self, image):
34 |         image = image.astype(np.float32) / 255
35 |         image = np.expand_dims(image, axis=len(image.shape))
36 |         return image
37 | 
38 | 
39 | class ResizePreprocessor:
40 |     """
41 |     Resizes image to target width and height.
42 |     """
43 |     def __init__(self, target_height, target_width):
44 |         self._target_height = target_height
45 |         self._target_width = target_width
46 | 
47 |     def __call__(self, img: np.ndarray):
48 |         return resize(img, (self._target_height, self._target_width))
49 | 


--------------------------------------------------------------------------------
/bin/captcha_annotate.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import argparse
 4 | import os
 5 | import random
 6 | 
 7 | from ulozto_captcha_breaker.dataset.annotations_generator import AnnotationsGenerator
 8 | 
 9 | if __name__ == "__main__":
10 | 
11 |     parser = argparse.ArgumentParser()
12 | 
13 |     parser.add_argument("--test_split", default=0.1, type=float, help="Specifies how large part of all data are used for "
14 |                                                                       "test. E.g. if 0.1, then 10% of all data are used "
15 |                                                                       "for test.")
16 |     parser.add_argument("--val_split", default=0.1, type=float, help="Specifies how large part of all data are used for "
17 |                                                                       "validation. E.g. if 0.1, then 10% of all data are used "
18 |                                                                       "for validation.")
19 |     parser.add_argument("--seed", default=42, type=int)
20 |     parser.add_argument("--out_dir", type=str, default="out")
21 |     parser.add_argument("--case_sensitive", action="store_true", default=False, help="Boolean switch that is true when "
22 |                                                                                      "captcha label should be case sensitive.")
23 | 
24 |     args = parser.parse_args()
25 | 
26 |     random.seed(args.seed)
27 | 
28 |     out_dir = os.path.abspath(args.out_dir)
29 |     data_dir = os.path.join(out_dir, "data")
30 | 
31 |     generator = AnnotationsGenerator(data_dir, out_dir, args.val_split, args.test_split, not args.case_sensitive)
32 |     generator.save_annotations()
33 | 


--------------------------------------------------------------------------------
/bin/predict.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from ulozto_captcha_breaker.dataset.preprocessing.image_pipeline import ImagePreprocessorPipeline
 4 | from ulozto_captcha_breaker.dataset.preprocessing.image_preprocessors import ConvertToGrayscalePreprocessor, NormalizeImagePreprocessor, ResizePreprocessor
 5 | from ulozto_captcha_breaker.dataset.preprocessing.label_preprocessors import StringEncoder
 6 | import tensorflow as tf
 7 | 
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | 
11 | 
12 | def main(args):
13 |     image = plt.imread(args.image_path)
14 |     image_preprocess_pipeline = ImagePreprocessorPipeline([
15 |         ConvertToGrayscalePreprocessor(),
16 |         ResizePreprocessor(image.shape[0], image.shape[1]),
17 |         NormalizeImagePreprocessor()
18 |     ])
19 | 
20 |     # create interpreter
21 |     interpreter = tf.lite.Interpreter(args.model_path)
22 |     interpreter.allocate_tensors()
23 | 
24 |     input_ = image_preprocess_pipeline([image])
25 |     input_details = interpreter.get_input_details()
26 |     output_details = interpreter.get_output_details()
27 |     interpreter.set_tensor(input_details[0]['index'], input_)
28 |     interpreter.invoke()
29 | 
30 |     # predict and get the output
31 |     output = interpreter.get_tensor(output_details[0]['index'])
32 |     output_label = np.argmax(output, axis=2)[0]
33 | 
34 |     # now get labels
35 |     label_decoder = StringEncoder(available_chars=args.available_chars)
36 |     decoded_label = label_decoder.decode(output_label)
37 |     
38 |     print("Decoded label is the following:")
39 |     print(decoded_label)
40 | 
41 | if __name__ == "__main__":
42 |     parser = argparse.ArgumentParser()
43 |     parser.add_argument("--image_path", default=None, type=str, help="To the input image.")
44 |     parser.add_argument("--model_path", default=None, type=str, help="Path to a pretrained model TF Lite.")
45 |     parser.add_argument("--available_chars", default="abcdefghijklmnopqrstuvwxyz", type=str, help="Characters")
46 |     args = parser.parse_args()
47 |     
48 |     main(args)


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/dataset/preprocessing/label_preprocessors.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from typing import List
 4 | 
 5 | 
 6 | class OneCharEncoder:
 7 |     """
 8 |     Encodes chars into integers.
 9 |     """
10 |     def __init__(self, available_chars):
11 |         self._available_chars = available_chars
12 | 
13 |     def encode_char(self, char: str):
14 |         return self._available_chars.index(char)
15 | 
16 |     def encode(self, string):
17 |         result = []
18 |         result.append(self.encode_char(string[1]))
19 |         return np.array(result)
20 | 
21 |     # def encode(self, input):
22 |     #     result = []
23 |     #     for x in input:
24 |     #         result.append(self.encode_str(x))
25 |     #     return np.array(result)
26 | 
27 | 
28 | 
29 | class OneHotEncoder:
30 |     def __init__(self, available_chars):
31 |         self._available_chars = available_chars
32 | 
33 |     def encode_char(self, char: str):
34 |         return tf.one_hot(self._available_chars.index(char), len(self._available_chars))
35 | 
36 |     def decode_char(self, one_hot_vector):
37 |         index = tf.argmax(one_hot_vector, axis=0)
38 |         return self._available_chars[index]
39 | 
40 | 
41 | class StringEncoder:
42 |     """
43 |     Encodes chars into integers.
44 |     """
45 |     def __init__(self, available_chars):
46 |         self._available_chars = available_chars
47 | 
48 |     def encode_char(self, char: str):
49 |         return self._available_chars.index(char)
50 | 
51 |     def encode(self, string):
52 |         result = []
53 |         for char in string:
54 |             result.append(self.encode_char(char))
55 |         return np.array(result)
56 | 
57 |     def decode_char(self, char_idx: int):
58 |         return self._available_chars[char_idx]
59 | 
60 |     def decode(self, li):
61 |         result = []
62 |         for char in li:
63 |             result.append(self.decode_char(char))
64 |         return "".join(result)
65 | 
66 |     # def encode(self, input):
67 |     #     result = []
68 |     #     for x in input:
69 |     #         result.append(self.encode_str(x))
70 |     #     return np.array(result)


--------------------------------------------------------------------------------
/bin/simple_captcha_generate.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | import os
 4 | import random
 5 | 
 6 | from captcha.image import ImageCaptcha
 7 | 
 8 | from faker import Faker
 9 | 
10 | 
11 | def generate_randomly(available_chars: str, dataset_size: int, captcha_length: int):
12 |     for i in range(0, dataset_size):
13 |         captcha_code = ""
14 |         for _ in range(captcha_length):
15 |             random_idx = random.randint(0, len(available_chars) - 1)
16 |             captcha_code += available_chars[random_idx]
17 | 
18 |         yield captcha_code
19 | 
20 | 
21 | def generate_systematically(available_chars: str, dataset_size: int, captcha_length: int):
22 |     y = [available_chars for _ in range(captcha_length)]
23 | 
24 |     available_combinations = itertools.product(*y)
25 | 
26 |     for x in itertools.islice(available_combinations, dataset_size):
27 |         yield "".join(x)
28 | 
29 | 
30 | if __name__ == "__main__":
31 | 
32 |     parser = argparse.ArgumentParser()
33 | 
34 |     parser.add_argument("--dataset_size", default=10000, type=int)
35 |     parser.add_argument("--seed", default=42, type=int)
36 |     parser.add_argument("--captcha_length", default=4, type=int)
37 |     parser.add_argument("--available_chars", default="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", type=str)
38 |     parser.add_argument("--generation_type", type=str, help="Either 'randomly' or 'systematically'", default="randomly")
39 |     parser.add_argument("--out_dir", type=str, default="out")
40 |     parser.add_argument("--width", type=int, default=175, help="Width of generated captcha code image.")
41 |     parser.add_argument("--height", type=int, default=70, help="Height of generated captcha code image.")
42 | 
43 |     args = parser.parse_args()
44 | 
45 |     random.seed(args.seed)
46 | 
47 |     out_dir = os.path.abspath(args.out_dir)
48 |     data_dir = os.path.join(out_dir, "data")
49 | 
50 |     if not os.path.exists(data_dir):
51 |         os.makedirs(data_dir)
52 | 
53 |     image = ImageCaptcha(width=args.width, height=args.height)
54 | 
55 |     # generate fake uuid4
56 |     fake = Faker()
57 |     Faker.seed(args.seed)
58 | 
59 |     generated_captchas = (generate_systematically(args.available_chars, args.dataset_size, args.captcha_length)
60 |                  if args.generation_type == "systematically"
61 |                  else generate_randomly(args.available_chars, args.dataset_size, args.captcha_length))
62 |     for captcha_code in generated_captchas:
63 |         image.write(f'{captcha_code}', f'{data_dir}/{captcha_code}_{fake.uuid4()}.png')


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # idea
138 | *.iws
139 | **/.idea/workspace.xml
140 | **/.idea/tasks.xml
141 | 
142 | #vscode
143 | .vscode/*
144 | !.vscode/settings.json
145 | 
146 | out/
147 | venv/
148 | model/
149 | 


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/dataset/annotations_generator.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy as np
 4 | 
 5 | 
 6 | class AnnotationsGenerator:
 7 |     def __init__(self, dir_path: str, annotations_out_dir: str,
 8 |                  validation_ratio: float, test_ratio: float, ignore_case: bool):
 9 |         self._dir_path = dir_path
10 |         self._validation_ratio = validation_ratio
11 |         self._test_ratio = test_ratio
12 |         self._ignore_case = ignore_case
13 |         self._annotations_out_dir = annotations_out_dir
14 | 
15 |     def get_annotations(self):
16 |         for item in os.listdir(self._dir_path):
17 |             item_path = os.path.join(self._dir_path, item)
18 | 
19 |             item_label = os.path.splitext(item)[0]
20 |             item_label = item_label.split("_")[0]
21 | 
22 |             yield item_path, item_label
23 | 
24 |     def save_annotations(self):
25 |         val_annotations_path = os.path.join(self._annotations_out_dir, "annotations-validation.txt")
26 |         test_annotations_path = os.path.join(self._annotations_out_dir, "annotations-test.txt")
27 |         train_annotations_path = os.path.join(self._annotations_out_dir, "annotations-train.txt")
28 |         annotations_path = os.path.join(self._annotations_out_dir, "annotations.txt")
29 | 
30 |         annotations = np.array(list(self.get_annotations()))
31 |         indices = list(range(len(annotations)))
32 |         random.shuffle(indices)
33 | 
34 |         test_samples_count = int(len(indices) * self._test_ratio)
35 |         validation_samples_count = int(len(indices) * self._validation_ratio)
36 |         test_indices = indices[:test_samples_count]
37 |         validation_indices = indices[test_samples_count:test_samples_count + validation_samples_count]
38 |         train_indices = indices[test_samples_count + validation_samples_count:]
39 | 
40 |         test_annotations = annotations[test_indices]
41 |         train_annotations = annotations[train_indices]
42 |         validation_annotations = annotations[validation_indices]
43 | 
44 |         with open(annotations_path, "w") as annotations_file:
45 |             with open(test_annotations_path, "w") as file:
46 |                 for image_path, label in test_annotations:
47 |                     result_label = label.lower() if self._ignore_case else label
48 |                     annotation = f"{image_path} {result_label}\n"
49 | 
50 |                     file.write(annotation)
51 |                     annotations_file.write(annotation)
52 | 
53 |             with open(val_annotations_path, "w") as file:
54 |                 for image_path, label in validation_annotations:
55 |                     result_label = label.lower() if self._ignore_case else label
56 |                     annotation = f"{image_path} {result_label}\n"
57 | 
58 |                     file.write(annotation)
59 |                     annotations_file.write(annotation)
60 | 
61 |             with open(train_annotations_path, "w") as file:
62 |                 for image_path, label in train_annotations:
63 |                     annotation = f"{image_path} {label.lower()}\n"
64 | 
65 |                     file.write(annotation)
66 |                     annotations_file.write(annotation)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with python 3.9
  3 | # To update, run:
  4 | #
  5 | #    pip-compile
  6 | #
  7 | absl-py==1.3.0
  8 |     # via
  9 |     #   tensorboard
 10 |     #   tensorflow
 11 | astunparse==1.6.3
 12 |     # via tensorflow
 13 | cachetools==5.2.0
 14 |     # via google-auth
 15 | captcha==0.4
 16 |     # via -r requirements.in
 17 | certifi==2022.9.24
 18 |     # via requests
 19 | charset-normalizer==2.1.1
 20 |     # via requests
 21 | contourpy==1.0.6
 22 |     # via matplotlib
 23 | cycler==0.11.0
 24 |     # via matplotlib
 25 | faker==15.2.0
 26 |     # via -r requirements.in
 27 | flatbuffers==22.10.26
 28 |     # via tensorflow
 29 | fonttools==4.38.0
 30 |     # via matplotlib
 31 | gast==0.4.0
 32 |     # via tensorflow
 33 | google-auth==2.14.0
 34 |     # via
 35 |     #   google-auth-oauthlib
 36 |     #   tensorboard
 37 | google-auth-oauthlib==0.4.6
 38 |     # via tensorboard
 39 | google-pasta==0.2.0
 40 |     # via tensorflow
 41 | graphviz==0.20.1
 42 |     # via -r requirements.in
 43 | grpcio==1.50.0
 44 |     # via
 45 |     #   tensorboard
 46 |     #   tensorflow
 47 | h5py==3.7.0
 48 |     # via tensorflow
 49 | idna==3.4
 50 |     # via requests
 51 | imageio==2.22.3
 52 |     # via scikit-image
 53 | importlib-metadata==5.0.0
 54 |     # via markdown
 55 | keras==2.10.0
 56 |     # via tensorflow
 57 | keras-preprocessing==1.1.2
 58 |     # via tensorflow
 59 | kiwisolver==1.4.4
 60 |     # via matplotlib
 61 | libclang==14.0.6
 62 |     # via tensorflow
 63 | markdown==3.4.1
 64 |     # via tensorboard
 65 | markupsafe==2.1.1
 66 |     # via werkzeug
 67 | matplotlib==3.6.2
 68 |     # via -r requirements.in
 69 | networkx==2.8.8
 70 |     # via scikit-image
 71 | numpy==1.23.4
 72 |     # via
 73 |     #   -r requirements.in
 74 |     #   contourpy
 75 |     #   h5py
 76 |     #   imageio
 77 |     #   keras-preprocessing
 78 |     #   matplotlib
 79 |     #   opt-einsum
 80 |     #   pywavelets
 81 |     #   scikit-image
 82 |     #   scipy
 83 |     #   tensorboard
 84 |     #   tensorflow
 85 |     #   tifffile
 86 | oauthlib==3.2.2
 87 |     # via requests-oauthlib
 88 | opt-einsum==3.3.0
 89 |     # via tensorflow
 90 | packaging==21.3
 91 |     # via
 92 |     #   matplotlib
 93 |     #   scikit-image
 94 |     #   tensorflow
 95 | pillow==9.3.0
 96 |     # via
 97 |     #   captcha
 98 |     #   imageio
 99 |     #   matplotlib
100 |     #   scikit-image
101 | protobuf==3.19.6
102 |     # via
103 |     #   tensorboard
104 |     #   tensorflow
105 | pyasn1==0.4.8
106 |     # via
107 |     #   pyasn1-modules
108 |     #   rsa
109 | pyasn1-modules==0.2.8
110 |     # via google-auth
111 | pydot==1.4.2
112 |     # via -r requirements.in
113 | pyparsing==3.0.9
114 |     # via
115 |     #   matplotlib
116 |     #   packaging
117 |     #   pydot
118 | python-dateutil==2.8.2
119 |     # via
120 |     #   faker
121 |     #   matplotlib
122 | pywavelets==1.4.1
123 |     # via scikit-image
124 | requests==2.28.1
125 |     # via
126 |     #   requests-oauthlib
127 |     #   tensorboard
128 | requests-oauthlib==1.3.1
129 |     # via google-auth-oauthlib
130 | rsa==4.9
131 |     # via google-auth
132 | scikit-image==0.19.3
133 |     # via -r requirements.in
134 | scipy==1.9.3
135 |     # via scikit-image
136 | six==1.16.0
137 |     # via
138 |     #   astunparse
139 |     #   google-auth
140 |     #   google-pasta
141 |     #   grpcio
142 |     #   keras-preprocessing
143 |     #   python-dateutil
144 |     #   tensorflow
145 | tensorboard==2.10.1
146 |     # via tensorflow
147 | tensorboard-data-server==0.6.1
148 |     # via tensorboard
149 | tensorboard-plugin-wit==1.8.1
150 |     # via tensorboard
151 | tensorflow==2.10.0
152 |     # via -r requirements.in
153 | tensorflow-estimator==2.10.0
154 |     # via tensorflow
155 | tensorflow-io-gcs-filesystem==0.27.0
156 |     # via tensorflow
157 | termcolor==2.1.0
158 |     # via tensorflow
159 | tifffile==2022.10.10
160 |     # via scikit-image
161 | typing-extensions==4.4.0
162 |     # via tensorflow
163 | urllib3==1.26.12
164 |     # via requests
165 | werkzeug==2.2.2
166 |     # via tensorboard
167 | wheel==0.38.2
168 |     # via
169 |     #   astunparse
170 |     #   tensorboard
171 | wrapt==1.14.1
172 |     # via tensorflow
173 | zipp==3.10.0
174 |     # via importlib-metadata
175 | 
176 | # The following packages are considered to be unsafe in a requirements file:
177 | # setuptools
178 | 


--------------------------------------------------------------------------------
/bin/train.py:
--------------------------------------------------------------------------------
 1 | from ulozto_captcha_breaker.dataset.preprocessing.image_preprocessors import ConvertToGrayscalePreprocessor, ResizePreprocessor, NormalizeImagePreprocessor
 2 | from ulozto_captcha_breaker.dataset.preprocessing.image_pipeline import ImagePreprocessorPipeline
 3 | 
 4 | from ulozto_captcha_breaker.dataset.preprocessing.label_preprocessors import StringEncoder
 5 | from ulozto_captcha_breaker.dataset.preprocessing.label_pipeline import LabelPreprocessPipeline
 6 | 
 7 | import numpy as np
 8 | import random
 9 | 
10 | from ulozto_captcha_breaker.captcha_network import CaptchaNetwork
11 | from ulozto_captcha_breaker.dataset.captcha_dataset import CaptchaDataset
12 | 
13 | import argparse
14 | import datetime
15 | import os
16 | import re
17 | import tensorflow as tf
18 | 
19 | if __name__ == "__main__":
20 |     # Parse arguments
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument("--weights_file", default=None, type=str, help="Path to file that contains pre-trained weights.")
23 |     parser.add_argument("--pretrained_model", default=None, type=str)
24 |     parser.add_argument("--freeze_layers", default=0, type=int, help="How many layers should be frozen for the training."
25 |                                                                      "Counts from the beginning.")
26 |     parser.add_argument("--remove_layers",
27 |                         action="store_true")
28 |     parser.add_argument("--batch_size", default=32, type=int, help="Batch size.")
29 |     parser.add_argument("--epochs", default=1500, type=int, help="Number of epochs.")
30 |     parser.add_argument("--out_dir", default="out", type=str, help="Out dir")
31 |     parser.add_argument("--seed", default=42, type=int)
32 |     parser.add_argument("--captcha_length", default=4, type=int)
33 |     parser.add_argument("--available_chars", default="abcdefghijklmnopqrstuvwxyz", type=str, help="Labels")
34 |     parser.add_argument("--transformed_img_width", default=None, type=int)
35 |     parser.add_argument("--transformed_img_height", default=None, type=int)
36 |     parser.add_argument("--l2", default=0.00001, type=float)
37 | 
38 |     args = parser.parse_args()
39 | 
40 |     args.save_model_path = None
41 | 
42 |     assert ((args.transformed_img_width is None and args.transformed_img_height is None) or
43 |             args.transformed_img_width is not None and args.transformed_img_height is not None)
44 | 
45 |     # Fix random seeds and number of threads
46 |     np.random.seed(args.seed)
47 |     tf.random.set_seed(args.seed)
48 |     random.seed(args.seed)
49 | 
50 |     out_dir = os.path.abspath(args.out_dir)
51 |     data_dir = os.path.join(out_dir, "data")
52 |     train_annotations_path = os.path.join(out_dir, "annotations-train.txt")
53 |     val_annotations_path = os.path.join(out_dir, "annotations-validation.txt")
54 | 
55 |     args.logdir = os.path.join(out_dir, "logs", "{}-{}-{}".format(
56 |         os.path.basename(__file__),
57 |         datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
58 |         ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items())))
59 |     ))
60 | 
61 |     train_dataset = CaptchaDataset(train_annotations_path, len(args.available_chars))
62 |     val_dataset = CaptchaDataset(val_annotations_path, len(args.available_chars))
63 | 
64 |     if args.transformed_img_width is not None and args.transformed_img_height is not None:
65 |         input_shape = (args.transformed_img_height, args.transformed_img_width)
66 |     else:
67 |         image_shape = train_dataset.get_image_shape()
68 |         input_shape = (image_shape[0], image_shape[1])
69 | 
70 |     image_preprocess_pipeline = ImagePreprocessorPipeline([
71 |         ConvertToGrayscalePreprocessor(),
72 |         ResizePreprocessor(input_shape[0], input_shape[1]),
73 |         NormalizeImagePreprocessor()
74 |     ])
75 |     label_preprocess_pipeline = LabelPreprocessPipeline(
76 |         StringEncoder(available_chars=args.available_chars)
77 |     )
78 | 
79 |     train_x, train_y = train_dataset.get_data()
80 |     val_x, val_y = val_dataset.get_data()
81 | 
82 |     network = CaptchaNetwork(image_shape=input_shape,
83 |                              classes=train_dataset.classes,
84 |                              image_preprocess_pipeline=image_preprocess_pipeline,
85 |                              label_preprocess_pipeline=label_preprocess_pipeline,
86 |                              args=args)
87 | 
88 |     network.train(train_x, train_y, val_x, val_y, args)
89 | 


--------------------------------------------------------------------------------
/bin/test.py:
--------------------------------------------------------------------------------
  1 | from ulozto_captcha_breaker.dataset.preprocessing.image_preprocessors import ConvertToGrayscalePreprocessor, ResizePreprocessor, NormalizeImagePreprocessor
  2 | from ulozto_captcha_breaker.dataset.preprocessing.image_pipeline import ImagePreprocessorPipeline
  3 | 
  4 | from ulozto_captcha_breaker.dataset.preprocessing.label_preprocessors import StringEncoder
  5 | from ulozto_captcha_breaker.dataset.preprocessing.label_pipeline import LabelPreprocessPipeline
  6 | 
  7 | import numpy as np
  8 | import random
  9 | 
 10 | from ulozto_captcha_breaker.captcha_network import CaptchaNetwork
 11 | from ulozto_captcha_breaker.dataset.captcha_dataset import CaptchaDataset
 12 | 
 13 | import argparse
 14 | import datetime
 15 | import os
 16 | import re
 17 | import tensorflow as tf
 18 | 
 19 | if __name__ == "__main__":
 20 |     # Parse arguments
 21 |     parser = argparse.ArgumentParser()
 22 |     parser.add_argument("--weights_file", default=None, type=str,
 23 |                         help="Path to file that contains pre-trained weights.")
 24 |     parser.add_argument("--pretrained_model", default=None, type=str)
 25 |     parser.add_argument("--freeze_layers", default=0, type=int,
 26 |                         help="How many layers should be frozen for the training."
 27 |                              "Counts from the beginning.")
 28 |     parser.add_argument("--remove_layers",
 29 |                         action="store_true")
 30 |     parser.add_argument("--batch_size", default=32, type=int, help="Batch size.")
 31 |     parser.add_argument("--epochs", default=1500, type=int, help="Number of epochs.")
 32 |     parser.add_argument("--out_dir", default="out", type=str, help="Out dir")
 33 |     parser.add_argument("--seed", default=42, type=int)
 34 |     parser.add_argument("--captcha_length", default=4, type=int)
 35 |     parser.add_argument("--available_chars", default="abcdefghijklmnopqrstuvwxyz", type=str, help="Labels")
 36 |     parser.add_argument("--transformed_img_width", default=None, type=int)
 37 |     parser.add_argument("--transformed_img_height", default=None, type=int)
 38 |     parser.add_argument("--l2", default=0.0001, type=float)
 39 | 
 40 |     args = parser.parse_args()
 41 | 
 42 |     # Fix random seeds and number of threads
 43 |     np.random.seed(args.seed)
 44 |     tf.random.set_seed(args.seed)
 45 |     random.seed(args.seed)
 46 | 
 47 |     out_dir = os.path.abspath(args.out_dir)
 48 |     data_dir = os.path.join(out_dir, "data")
 49 |     annotations_path = os.path.join(out_dir, "annotations-test.txt")
 50 | 
 51 |     args.logdir = os.path.join(out_dir, "logs", "{}-{}-{}".format(
 52 |         os.path.basename(__file__),
 53 |         datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
 54 |         ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items())))
 55 |     ))
 56 |     args.save_model_path = os.path.join(out_dir, "model")
 57 | 
 58 |     assert args.weights_file is not None or args.pretrained_model is not None, "Weights file or pretrained model must " \
 59 |                                                                                "be passed in order to test " \
 60 |                                                                                "it."
 61 | 
 62 |     dataset = CaptchaDataset(annotations_path, len(args.available_chars))
 63 |     inputs, labels = dataset.get_data()
 64 | 
 65 |     if args.transformed_img_width is not None and args.transformed_img_height is not None:
 66 |         input_shape = (args.transformed_img_height, args.transformed_img_width)
 67 |     else:
 68 |         image_shape = dataset.get_image_shape()
 69 |         input_shape = (image_shape[0], image_shape[1])
 70 | 
 71 |     image_preprocess_pipeline = ImagePreprocessorPipeline([
 72 |         ConvertToGrayscalePreprocessor(),
 73 |         ResizePreprocessor(input_shape[0], input_shape[1]),
 74 |         NormalizeImagePreprocessor()
 75 |     ])
 76 |     label_preprocess_pipeline = LabelPreprocessPipeline(
 77 |         StringEncoder(available_chars=args.available_chars)
 78 |     )
 79 | 
 80 |     network = CaptchaNetwork(image_shape=input_shape,
 81 |                              classes=dataset.classes,
 82 |                              image_preprocess_pipeline=image_preprocess_pipeline,
 83 |                              label_preprocess_pipeline=label_preprocess_pipeline,
 84 |                              args=args)
 85 | 
 86 |     labels = label_preprocess_pipeline(labels)
 87 | 
 88 |     pred_labels = network.predict(inputs, args)
 89 | 
 90 |     correct = labels == pred_labels
 91 | 
 92 |     all_correct = tf.reduce_all(correct, axis=1)
 93 |     all_correct = tf.cast(all_correct, tf.dtypes.float32)
 94 |     acc = tf.reduce_mean(all_correct)
 95 | 
 96 |     dec = StringEncoder(available_chars=args.available_chars)
 97 |     with open(os.path.join(out_dir, "out_test.csv"), "w") as file:
 98 |         for i in range(len(pred_labels)):
 99 |             decoded_label = dec.decode(labels[i])
100 |             decoded_pred_label = dec.decode(pred_labels[i])
101 |             file.write(f"{all_correct[i]};{decoded_label};{decoded_pred_label}\n")
102 | 
103 |     print(f"Test acc: {acc:.2f}")
104 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ulozto-captcha-breaker
  2 | Deep learning model using Tensorflow that breaks ulozto captcha codes.
  3 | 
  4 | ![examples](docs/examples.png)
  5 | 
  6 | Algorithm used will be described in a standalone document.
  7 | 
  8 | ## How to use pretrained model in your project
  9 | ### Prerequisities
 10 | Packages
 11 | - *numpy~=1.18.3*
 12 | - *tflite_runtime~=2.5.0*
 13 | 
 14 | You need to install Tensorflow Lite Runtime with the correct version depending on your operating system and instruction set. 
 15 | It can be found here: https://www.tensorflow.org/lite/guide/python.
 16 | 
 17 | ### Model specification
 18 | - Input shape: (batch_size, height, width, 1), where height = 70, width = 175
 19 | - Output shape: (batch_size, number_of_letters, number_of_classes), where number_of_letters = 4 and number_of_classes = 26
 20 | 
 21 | Note that it takes **grayscale images** as the input. RGB images therefore have to be converted.
 22 | 
 23 | ### Steps
 24 | 1. Go to latest release and download binary files
 25 | 2. Instantiate the tflite interpreter. For that you're going to need TFLite model. You can find it in the release binary files.
 26 |     - PATH_TO_TFLITE_MODEL is path to directory containing the neural network pretrained model
 27 |     ```python
 28 |    import tflite_runtime.interpreter as tflite
 29 |    interpreter = tflite.Interpreter(model_path=PATH_TO_TFLITE_MODEL)
 30 |    ```
 31 | 
 32 | 3. Normalize image to 0..1 interval. If it already is, skip this step.
 33 |     ```python
 34 |     img = (img / 255).astype(np.float32)
 35 |     ```
 36 | 4. Predict using following code
 37 |     ```python
 38 |     # convert to grayscale
 39 |     r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
 40 |     input = 0.299 * r + 0.587 * g + 0.114 * b
 41 |     
 42 |     # input has nowof  shape (70, 175)
 43 |     # we modify dimensions to match model's input
 44 |     input = np.expand_dims(input, 0)
 45 |     input = np.expand_dims(input, -1)
 46 |     # input is now of shape (batch_size, 70, 175, 1)
 47 |     # output will have shape (batch_size, 4, 26)
 48 |    
 49 |     interpreter.allocate_tensors()
 50 |     input_details = interpreter.get_input_details()
 51 |     output_details = interpreter.get_output_details()
 52 |     interpreter.set_tensor(input_details[0]['index'], input)
 53 |     interpreter.invoke()
 54 | 
 55 |     # predict and get the output
 56 |     output = interpreter.get_tensor(output_details[0]['index'])
 57 |     # now get labels
 58 |     labels_indices = np.argmax(output, axis=2)
 59 | 
 60 |     available_chars = "abcdefghijklmnopqrstuvwxyz"
 61 | 
 62 |     def decode(li):
 63 |         result = []
 64 |         for char in li:
 65 |             result.append(available_chars[char])
 66 |         return "".join(result)
 67 | 
 68 |     decoded_label = [decode(x) for x in labels_indices][0]
 69 |     ```
 70 |     - *np* for numpy
 71 | 
 72 | ## How to train your own model
 73 | 1. Install environment
 74 |     Following script creates new virtual environment. You can of course use global environment instead.
 75 |     All following section's scripts are expected to be executed from repository's root directory.
 76 |     ```shell script
 77 |     git clone https://github.com/JanPalasek/ulozto-captcha-breaker
 78 |     cd "ulozto-captcha-breaker"
 79 |     
 80 |     # create virtual environment
 81 |     python -m venv "venv"
 82 |     
 83 |     source venv/bin/activate # or .\venv\Scripts\activate.ps1 in windows Powershell
 84 |     python -m pip install -r "requirements.txt" -e .
 85 |     ```
 86 | 2. Obtain dataset of captcha images and store it to directory *out/data*. Images are expected to be named according
 87 | to captcha displayed in the image.
 88 | 
 89 |     E.g.
 90 |     
 91 |     ![captcha image](docs/abfd_ba574f47-92d8-407d-9b34-d5f6fa8a74c3.png)
 92 |     
 93 |     This captcha image is expected to be named e.g. *ABFD.png*, *abfd.png* (if we don't care about case sensitivity)
 94 |     or e.g. *ABFD_{UUID4 CODE}.png* (to distinguish different images for same captcha letters).
 95 |     
 96 |     This project contains a way to generate captchas yourself using *captcha* Python package using script *bin/simple_captcha_generate.py*.
 97 |     You can run it in a following manner
 98 |     ```shell script
 99 |     python bin/simple_captcha_generate.py --height=70 --width=175 --available_chars="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" --captcha_length=6 --dataset_size=10000
100 |     ```
101 |     
102 |     Some of notable parameters are:
103 |     - *available_chars* - list of characters that will be generated
104 |     - *captcha_length* - how long generated captcha is going to be
105 |     - *dataset_size* - how large dataset is going to be generated
106 |     - *height* - height of generated captcha
107 |     - *width* - width of generated captcha
108 | 
109 | 3. Generate *annotations* files using *bin/captcha_annotate.py* script. You can call it for example
110 |     ```shell script
111 |     python bin/captcha_annotate.py --val_split=0.1 --test_split=0.1 --case_sensitive
112 |     ```
113 |     This will shuffle and split data into train/validation/test according to following parameters:
114 |     - *val_split* - how large part of data is going to be used for validation, e.g. 0.1 means 10%
115 |     - *test_split* - how large part of data is going to be used for testing
116 |     - *case_sensitive* - switch denoting that labels that are created will be case sensitive
117 |         - if such parameter is not passed, then for example if *aBcD* is in the image (and image is named accordingly),
118 |         resulting label will be *abcd*
119 |         - if it is passed, resulting label will be *aBcD*
120 |     
121 |     This script will create *annotations.txt*, *annotations-train.txt*, *annotations-validation.txt* and *annotations-test.txt*.
122 | 
123 | 4. Run training script *bin/train.py* for example like this:
124 |     ```shell script
125 |     python bin/train.py --available_chars="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" --captcha_length=6 
126 |     ```
127 |    Training script notably logs models after each checkpoint into *logs/train.py-{START TIMESTAMP}-{parameters etc.}* directory.
128 | 


--------------------------------------------------------------------------------
/src/ulozto_captcha_breaker/captcha_network.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import tensorflow.keras as tf_keras
  6 | # from tensorflow.python.keras.utils.vis_utils import plot_model
  7 | 
  8 | from ulozto_captcha_breaker.metrics import all_correct_acc
  9 | 
 10 | 
 11 | class CaptchaNetwork:
 12 |     def __init__(self, image_shape, classes: int, image_preprocess_pipeline, label_preprocess_pipeline, args):
 13 |         """
 14 |         Initializes CaptchaNetwork instance.
 15 |         :param image_shape: Shape of image.
 16 |         :param classes: Number of classes that model recognizes. E.g. if we want it to detect abcdefghijklmnopqrstuvwxyz, then
 17 |         classes must be number 28.
 18 |         :param image_preprocess_pipeline: Specifies pipeline that is used before image is put as input to neural network.
 19 |         :param label_preprocess_pipeline: Specifies pipeline that transforms output of neural network from internal indices
 20 |         back into captcha characters.
 21 |         :param args:
 22 |         """
 23 | 
 24 |         assert args.weights_file is None or args.pretrained_model is None, "Cannot load pretrained model and weights file at the same time"
 25 | 
 26 |         self._image_preprocess_pipeline = image_preprocess_pipeline
 27 |         self._label_preprocess_pipeline = label_preprocess_pipeline
 28 | 
 29 |         self._classes = classes
 30 |         input_shape = (image_shape[0], image_shape[1], 1)
 31 | 
 32 |         input = tf_keras.layers.Input(shape=input_shape)
 33 | 
 34 |         layer = input
 35 | 
 36 |         if not args.pretrained_model:
 37 |             # to normalize input
 38 |             layer = tf_keras.layers.BatchNormalization()(layer)
 39 |             layer = tf_keras.layers.Convolution2D(
 40 |                 filters=32, kernel_size=7, strides=2, padding="same", use_bias=False,
 41 |                 kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer)
 42 |             layer = tf_keras.layers.BatchNormalization()(layer)
 43 |             layer = tf_keras.layers.ReLU()(layer)
 44 |             layer = tf_keras.layers.MaxPooling2D(strides=2)(layer)
 45 | 
 46 |             layer = self._create_residual_block(layer, filters=32, l2=args.l2)
 47 |             layer = self._create_residual_block(layer, filters=32, l2=args.l2)
 48 | 
 49 |             layer = tf_keras.layers.BatchNormalization()(layer)
 50 |             layer = tf_keras.layers.ReLU()(layer)
 51 |             layer = tf_keras.layers.Convolution2D(
 52 |                 filters=64, kernel_size=3, strides=2, padding="same", use_bias=False,
 53 |                 kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer)
 54 |             layer = self._create_residual_block(layer, filters=64, l2=args.l2)
 55 |             layer = self._create_residual_block(layer, filters=64, l2=args.l2)
 56 | 
 57 |             layer = tf_keras.layers.BatchNormalization()(layer)
 58 |             layer = tf_keras.layers.ReLU()(layer)
 59 |             layer = tf_keras.layers.Convolution2D(
 60 |                 filters=128, kernel_size=3, strides=2, padding="same", use_bias=False,
 61 |                 kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer)
 62 |             layer = self._create_residual_block(layer, filters=128, l2=args.l2)
 63 |             layer = self._create_residual_block(layer, filters=128, l2=args.l2)
 64 | 
 65 |             layer = tf_keras.layers.BatchNormalization()(layer)
 66 |             layer = tf_keras.layers.ReLU()(layer)
 67 |             layer = tf_keras.layers.Convolution2D(
 68 |                 filters=256, kernel_size=3, strides=2, padding="same", use_bias=False,
 69 |                 kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer)
 70 |             layer = self._create_residual_block(layer, filters=256, l2=args.l2)
 71 |             layer = self._create_residual_block(layer, filters=256, l2=args.l2)
 72 | 
 73 |             layer = tf_keras.layers.GlobalAveragePooling2D()(layer)
 74 | 
 75 |             layer = tf_keras.layers.Dense(units=args.captcha_length * classes,
 76 |                                           kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer)
 77 |             # # reshape into (batch, letters_count, rest)
 78 |             target_shape = (args.captcha_length, classes)
 79 |             layer = tf_keras.layers.Reshape(target_shape=target_shape)(layer)
 80 | 
 81 |             # layer = tf_keras.layers.Dense(units=100, activation="relu", kernel_regularizer=tf_keras.regularizers.l2(0.01))(layer)
 82 |             # layer = tf_keras.layers.Dropout(0.5)(layer)
 83 |             output = tf_keras.layers.Dense(units=classes, activation="softmax")(layer)
 84 | 
 85 |             self._model = tf_keras.Model(inputs=input, outputs=output)
 86 |         else:
 87 |             self._model = tf_keras.models.load_model(args.pretrained_model)
 88 | 
 89 |         if args.weights_file is not None:
 90 |             self._model.load_weights(args.weights_file)
 91 | 
 92 |         print(f"Total layers: {len(self._model.layers)}")
 93 |         if args.remove_layers:
 94 |             # remove classification header and add new one
 95 |             input = self._model.layers[0].input
 96 |             layer = self._model.layers[-1].input
 97 |             output = tf_keras.layers.Dense(units=classes, activation="softmax")(layer)
 98 | 
 99 |             self._model = tf_keras.Model(inputs=input, outputs=output)
100 | 
101 |         if args.freeze_layers > 0:
102 |             for i in range(args.freeze_layers):
103 |                 self._model.layers[i].trainable = False
104 | 
105 |         metrics = [tf_keras.metrics.sparse_categorical_accuracy]
106 |         if not args.save_model_path:
107 |             metrics.append(all_correct_acc)
108 |         self._model.compile(optimizer=tf_keras.optimizers.Adam(),
109 |                             loss=tf_keras.losses.SparseCategoricalCrossentropy(),
110 |                             metrics=metrics)
111 | 
112 |         self._model.summary()
113 |         # plot_model(self._model, to_file=os.path.join(args.out_dir, "model.png"), show_shapes=True)
114 | 
115 |         self._tb_callback = tf_keras.callbacks.TensorBoard(args.logdir, update_freq=1000, profile_batch=1)
116 |         self._tb_callback.on_train_end = lambda *_: None
117 |         checkpoint_path = os.path.join(args.logdir, 'cp-{epoch:02d}.h5')
118 |         self._check_callback = tf_keras.callbacks.ModelCheckpoint(
119 |             checkpoint_path, save_weights_only=True)
120 | 
121 |         if args.save_model_path:
122 |             self.save_model(args.save_model_path)
123 | 
124 |     def _create_residual_block(self, layer: tf_keras.layers.Layer, filters: int, l2: float):
125 |         prev_layer = layer
126 |         layer = tf_keras.layers.BatchNormalization()(layer)
127 |         layer = tf_keras.layers.ReLU()(layer)
128 |         layer = tf_keras.layers.Convolution2D(
129 |             filters=filters, kernel_size=3, strides=1, padding="same", use_bias=False,
130 |             kernel_regularizer=tf_keras.regularizers.l2(l2))(layer)
131 | 
132 |         layer = tf_keras.layers.BatchNormalization()(layer)
133 |         layer = tf_keras.layers.ReLU()(layer)
134 |         layer = tf_keras.layers.Convolution2D(
135 |             filters=filters, kernel_size=3, strides=1, padding="same", use_bias=False,
136 |             kernel_regularizer=tf_keras.regularizers.l2(l2))(layer)
137 |         layer = tf_keras.layers.Add()([prev_layer, layer])
138 | 
139 |         return layer
140 |     
141 |     def train(self, train_x, train_y, val_x, val_y, args):
142 |         """
143 |         Train the model.
144 |         :param train_x: Numpy array with train captcha images.
145 |         :param train_y: Numpy array with train captcha image labels (e.g. "abxz").
146 |         :param val_x: Numpy array with validation captcha images.
147 |         :param val_y: Numpy array with validation captcha image labels (e.g. "abxz").
148 |         :param args:
149 |         """
150 |         train_inputs, train_labels = self._image_preprocess_pipeline(train_x), self._label_preprocess_pipeline(train_y)
151 |         dev_inputs, dev_labels = self._image_preprocess_pipeline(val_x), self._label_preprocess_pipeline(
152 |             val_y)
153 | 
154 |         del train_x
155 |         del train_y
156 |         del val_x
157 |         del val_y
158 | 
159 |         self._model.fit(x=train_inputs, y=train_labels, batch_size=args.batch_size, epochs=args.epochs,
160 |                         validation_data=(dev_inputs, dev_labels),
161 |                         callbacks=[self._check_callback, self._tb_callback])
162 | 
163 |     def save_model(self, out_path):
164 |         tf.saved_model.save(self._model, out_path)
165 | 
166 |     def predict(self, inputs, args):
167 |         """
168 |         Predicts labels from input images. Returns list of indices that denote characters.
169 |         :param inputs: List of captcha images.
170 |         :param args:
171 |         :return: List of indices that denote characters.
172 |         """
173 |         inputs = self._image_preprocess_pipeline(inputs)
174 | 
175 |         y_pred = self._model.predict(inputs, args.batch_size)
176 |         if len(y_pred.shape) <= 2:
177 |             y_pred = np.expand_dims(y_pred, axis=1)
178 |         y_pred = np.argmax(y_pred, axis=2)
179 |         return y_pred


--------------------------------------------------------------------------------