├── src └── ulozto_captcha_breaker │ ├── dataset │ ├── __init__.py │ ├── preprocessing │ │ ├── __init__.py │ │ ├── label_pipeline.py │ │ ├── image_pipeline.py │ │ ├── image_preprocessors.py │ │ └── label_preprocessors.py │ ├── captcha_dataset.py │ └── annotations_generator.py │ ├── utils.py │ ├── metrics.py │ └── captcha_network.py ├── .vscode └── settings.json ├── docs ├── examples.png └── abfd_ba574f47-92d8-407d-9b34-d5f6fa8a74c3.png ├── requirements.in ├── Makefile ├── scripts └── init.sh ├── bin ├── create_tflite.py ├── captcha_annotate.py ├── predict.py ├── simple_captcha_generate.py ├── train.py └── test.py ├── setup.py ├── .devcontainer ├── devcontainer.json └── Dockerfile ├── LICENSE ├── .gitignore ├── requirements.txt └── README.md /src/ulozto_captcha_breaker/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/dataset/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.defaultInterpreterPath": "/usr/local/bin/python" 3 | } -------------------------------------------------------------------------------- /docs/examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JanPalasek/ulozto-captcha-breaker/HEAD/docs/examples.png -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | tensorflow 2 | matplotlib 3 | graphviz 4 | numpy 5 | pydot 6 | faker 7 | captcha 8 | scikit-image -------------------------------------------------------------------------------- /docs/abfd_ba574f47-92d8-407d-9b34-d5f6fa8a74c3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JanPalasek/ulozto-captcha-breaker/HEAD/docs/abfd_ba574f47-92d8-407d-9b34-d5f6fa8a74c3.png -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash 2 | PYTHON ?= python 3 | 4 | init: 5 | sh scripts/init.sh 6 | 7 | clean: 8 | rm -rf .quarto _site/ 9 | 10 | install_dev: 11 | $(PYTHON) -m pip install -r requirements.txt -e . 12 | 13 | 14 | .PHONY: init clean install_dev -------------------------------------------------------------------------------- /scripts/init.sh: -------------------------------------------------------------------------------- 1 | if ! grep -q "# DEVCONTAINER INIT" ~/.bashrc; then 2 | echo "Initializing..." 3 | echo "# DEVCONTAINER INIT #" >> ~/.bashrc; 4 | echo "export USER_NAME=$(id -un)" >> ~/.bashrc; 5 | echo "export USER_ID=$(id -u)" >> ~/.bashrc; 6 | echo "export USER_GID=$(id -g)" >> ~/.bashrc; 7 | echo "export USER_GNAME=$(id -gn)" >> ~/.bashrc; 8 | echo "export DOCKER_GID=$(getent group docker | cut -d: -f3)" >> ~/.bashrc; 9 | fi 10 | echo "Initialization complete..." -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/dataset/preprocessing/label_pipeline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ulozto_captcha_breaker.dataset.preprocessing.label_preprocessors import StringEncoder 4 | 5 | 6 | class LabelPreprocessPipeline: 7 | def __init__(self, encoder): 8 | self._encoder = encoder 9 | 10 | def __call__(self, labels): 11 | result = [] 12 | for label in labels: 13 | result.append(self._encoder.encode(label)) 14 | 15 | return np.array(result) -------------------------------------------------------------------------------- /bin/create_tflite.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import tensorflow as tf 4 | import argparse 5 | import os 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("--out_dir", default="out", type=str, help="Out dir") 10 | parser.add_argument("--pretrained_model", type=str, required=True) 11 | args = parser.parse_args() 12 | 13 | converter = tf.lite.TFLiteConverter.from_saved_model(args.pretrained_model) 14 | tflite_model = converter.convert() 15 | 16 | # Save the model. 17 | with open(os.path.join(args.out_dir, 'model.tflite'), 'wb') as f: 18 | f.write(tflite_model) 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | with open("README.md", "r") as fh: 5 | long_description = fh.read() 6 | 7 | setuptools.setup( 8 | name='ulozto-captcha-breaker', 9 | version='3.0a', 10 | description="", 11 | long_description=long_description, 12 | long_description_content_type="text/markdown", 13 | package_dir={ 14 | "": "src" 15 | }, 16 | install_requires=[ 17 | "tensorflow>=2.0.0", 18 | "matplotlib>=3.5.0" 19 | ], 20 | packages=setuptools.find_packages("src"), 21 | scripts=["bin/train.py", "bin/test.py", "bin/predict.py"], 22 | python_requires=">=3.8" 23 | ) -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/anaconda 3 | { 4 | "name": "quarto-templates", 5 | "build": { 6 | "context": "..", 7 | "dockerfile": "Dockerfile", 8 | "args": { 9 | "USER_ID": "${localEnv:USER_ID}", 10 | "USER_NAME": "${localEnv:USER_NAME}", 11 | "USER_GID": "${localEnv:USER_GID}", 12 | "USER_GNAME": "${localEnv:USER_GNAME}" 13 | } 14 | }, 15 | 16 | // Use 'postCreateCommand' to run commands after the container is created. 17 | "postCreateCommand": "make install_dev", 18 | 19 | // Configure tool-specific properties. 20 | "customizations": { 21 | "vscode": { 22 | "extensions": [ 23 | "ms-python.python", 24 | "ms-toolsai.jupyter", 25 | "quarto.quarto", 26 | "ritwickdey.LiveServer" 27 | ] 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | import matplotlib.pyplot as plt 4 | 5 | import logging 6 | 7 | import numpy as np 8 | 9 | 10 | class FileWriter: 11 | def __init__(self, path): 12 | self._path = path 13 | self._internal_counter = 0 14 | 15 | if not os.path.exists(path): 16 | os.makedirs(path) 17 | 18 | def save_image(self, img: np.ndarray, name=None, category="debug"): 19 | img = np.copy(img) 20 | if name is None: 21 | # timestamp 22 | name = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S.%f') + str(self._internal_counter) 23 | 24 | self._internal_counter += 1 25 | 26 | dest = self._path + os.sep + "{}_{}.png".format(category, name) 27 | 28 | if img.dtype in [np.float32, np.float64]: 29 | img = img * 255 30 | img = img.astype(np.uint8) 31 | 32 | 33 | plt.imsave(dest, img) 34 | -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/dataset/captcha_dataset.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | 4 | class CaptchaDataset: 5 | def __init__(self, annotations_path: str, classes: int): 6 | self._annotations_path = annotations_path 7 | self._classes = classes 8 | 9 | self._data = self._get_items() 10 | 11 | def get_image_shape(self): 12 | return self._data[0][0].shape 13 | 14 | @property 15 | def classes(self): 16 | return self._classes 17 | 18 | def _get_items(self): 19 | result = [] 20 | with open(self._annotations_path, "r") as file: 21 | for line in file: 22 | image_path, image_label = line.rsplit(maxsplit=1) 23 | image_label = list(image_label) 24 | 25 | image = plt.imread(image_path) 26 | result.append((image, image_label)) 27 | 28 | return result 29 | 30 | def get_data(self): 31 | return list(zip(*self._data)) 32 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.12 2 | 3 | ARG USER_ID 4 | ARG USER_NAME 5 | ARG USER_GID 6 | ARG USER_GNAME 7 | 8 | ARG QUARTO_VERSION=1.3.450 9 | 10 | # Update packages 11 | RUN apt-get update && apt-get install -y librsvg2-bin && \ 12 | rm -rf /var/lib/apt/lists/* 13 | 14 | # Download and install Quarto 15 | RUN mkdir -p /opt/quarto/${QUARTO_VERSION} 16 | RUN curl -o quarto.tar.gz -L \ 17 | "https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.tar.gz" 18 | RUN tar -zxvf quarto.tar.gz \ 19 | -C "/opt/quarto/${QUARTO_VERSION}" \ 20 | --strip-components=1 21 | RUN rm quarto.tar.gz 22 | RUN ln -s /opt/quarto/${QUARTO_VERSION}/bin/quarto /usr/local/bin/quarto 23 | 24 | # INSTALL THE REST AS A USER 25 | # Create user and group 26 | RUN groupadd -g ${USER_GID} ${USER_GNAME} 27 | RUN useradd ${USER_NAME} -u ${USER_ID} -g ${USER_GNAME} -m -s /bin/bash 28 | USER ${USER_NAME} 29 | 30 | # Install Quarto extension for PDF generation 31 | RUN quarto install tinytex 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jan Palasek 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/dataset/preprocessing/image_pipeline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ulozto_captcha_breaker.utils import FileWriter 4 | 5 | 6 | class ImagePreprocessorPipeline: 7 | """ 8 | ImagePreprocessorPipeline specifies list of operations that are performed on each image in specified order. These 9 | operations are invoked using *__call__*. 10 | """ 11 | def __init__(self, preprocessors, out_writer: FileWriter = None, debug_writer: FileWriter = None): 12 | self._preprocessors = preprocessors 13 | 14 | self._out_writer = out_writer 15 | self._debug_writer = debug_writer 16 | 17 | def __call__(self, images): 18 | """ 19 | Performs specified operations on images. 20 | :param images: List of images. 21 | :return: Modified list of images. 22 | """ 23 | result = [] 24 | for image in images: 25 | modified_image = np.copy(image) 26 | 27 | for p in self._preprocessors: 28 | modified_image = p(modified_image) 29 | if self._debug_writer is not None: 30 | self._debug_writer.save_image(modified_image, category=str(type(p).__name__).lower()) 31 | 32 | result.append(modified_image) 33 | 34 | if self._out_writer is not None: 35 | self._out_writer.save_image(modified_image, category="final") 36 | 37 | return np.array(result) 38 | -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/metrics.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def all_correct_acc(y_true: tf.Tensor, y_pred: tf.Tensor): 5 | """ 6 | Computes accuracy between y_true and y_pred in the following manner: 7 | 8 | - If i-th sample has all values on y_pred same as on y_true, then 1. 9 | - Otherwise 0. 10 | 11 | It is hence more restricting then a typical accuracy. 12 | 13 | Args: 14 | y_true (tf.Tensor): 2D tensor of shape (N, L), where N is the number of samples and L is length of the vector (number of characters). 15 | y_pred: 2D tensor of shape (N, L), where N is the number of samples and L is length of the vector (number of characters) 16 | 17 | Returns: 18 | Accuracy: number between [0, 1] denoting how many codes were predicted correctly. 19 | """ 20 | if y_true.shape[0] is None and y_true.shape[1] is None and y_true.shape[2] is None: 21 | return tf.convert_to_tensor(0) 22 | 23 | # cast to int64 so we can compare it 24 | y_true = tf.cast(y_true, tf.dtypes.int64) 25 | 26 | if len(y_pred.shape) <= 2: 27 | y_pred = tf.expand_dims(y_pred, axis=1) 28 | if len(y_true.shape) <= 1: 29 | y_true = tf.expand_dims(y_true, axis=1) 30 | y_pred = tf.argmax(y_pred, axis=2) 31 | correct = y_true == y_pred 32 | # tf.print(f"Pred shape: {y_true.shape}", output_stream=sys.stdout) 33 | 34 | all_correct = tf.reduce_all(correct, axis=1) 35 | all_correct = tf.cast(all_correct, tf.dtypes.float32) 36 | 37 | return tf.reduce_mean(all_correct) 38 | 39 | -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/dataset/preprocessing/image_preprocessors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from skimage.transform import resize 3 | 4 | 5 | class ConvertToGrayscalePreprocessor: 6 | """ 7 | Converts image to grayscale. 8 | """ 9 | def __call__(self, img: np.ndarray): 10 | r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] 11 | output = 0.299 * r + 0.587 * g + 0.114 * b 12 | return output 13 | 14 | 15 | class ImageCutPreprocessor: 16 | def __init__(self, pieces_count: int): 17 | self._pieces_count = pieces_count 18 | 19 | def __call__(self, image: np.ndarray): 20 | images = np.split(image, self._pieces_count, axis=1) 21 | 22 | return np.array(images) 23 | 24 | 25 | class NormalizeImagePreprocessor: 26 | """ 27 | Converts image from byte format (values are integers in {0, ..., 255} to normalized float format (values are 28 | floats in the interval [0, 1]. 29 | """ 30 | def __init__(self): 31 | pass 32 | 33 | def __call__(self, image): 34 | image = image.astype(np.float32) / 255 35 | image = np.expand_dims(image, axis=len(image.shape)) 36 | return image 37 | 38 | 39 | class ResizePreprocessor: 40 | """ 41 | Resizes image to target width and height. 42 | """ 43 | def __init__(self, target_height, target_width): 44 | self._target_height = target_height 45 | self._target_width = target_width 46 | 47 | def __call__(self, img: np.ndarray): 48 | return resize(img, (self._target_height, self._target_width)) 49 | -------------------------------------------------------------------------------- /bin/captcha_annotate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import argparse 4 | import os 5 | import random 6 | 7 | from ulozto_captcha_breaker.dataset.annotations_generator import AnnotationsGenerator 8 | 9 | if __name__ == "__main__": 10 | 11 | parser = argparse.ArgumentParser() 12 | 13 | parser.add_argument("--test_split", default=0.1, type=float, help="Specifies how large part of all data are used for " 14 | "test. E.g. if 0.1, then 10% of all data are used " 15 | "for test.") 16 | parser.add_argument("--val_split", default=0.1, type=float, help="Specifies how large part of all data are used for " 17 | "validation. E.g. if 0.1, then 10% of all data are used " 18 | "for validation.") 19 | parser.add_argument("--seed", default=42, type=int) 20 | parser.add_argument("--out_dir", type=str, default="out") 21 | parser.add_argument("--case_sensitive", action="store_true", default=False, help="Boolean switch that is true when " 22 | "captcha label should be case sensitive.") 23 | 24 | args = parser.parse_args() 25 | 26 | random.seed(args.seed) 27 | 28 | out_dir = os.path.abspath(args.out_dir) 29 | data_dir = os.path.join(out_dir, "data") 30 | 31 | generator = AnnotationsGenerator(data_dir, out_dir, args.val_split, args.test_split, not args.case_sensitive) 32 | generator.save_annotations() 33 | -------------------------------------------------------------------------------- /bin/predict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from ulozto_captcha_breaker.dataset.preprocessing.image_pipeline import ImagePreprocessorPipeline 4 | from ulozto_captcha_breaker.dataset.preprocessing.image_preprocessors import ConvertToGrayscalePreprocessor, NormalizeImagePreprocessor, ResizePreprocessor 5 | from ulozto_captcha_breaker.dataset.preprocessing.label_preprocessors import StringEncoder 6 | import tensorflow as tf 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | 12 | def main(args): 13 | image = plt.imread(args.image_path) 14 | image_preprocess_pipeline = ImagePreprocessorPipeline([ 15 | ConvertToGrayscalePreprocessor(), 16 | ResizePreprocessor(image.shape[0], image.shape[1]), 17 | NormalizeImagePreprocessor() 18 | ]) 19 | 20 | # create interpreter 21 | interpreter = tf.lite.Interpreter(args.model_path) 22 | interpreter.allocate_tensors() 23 | 24 | input_ = image_preprocess_pipeline([image]) 25 | input_details = interpreter.get_input_details() 26 | output_details = interpreter.get_output_details() 27 | interpreter.set_tensor(input_details[0]['index'], input_) 28 | interpreter.invoke() 29 | 30 | # predict and get the output 31 | output = interpreter.get_tensor(output_details[0]['index']) 32 | output_label = np.argmax(output, axis=2)[0] 33 | 34 | # now get labels 35 | label_decoder = StringEncoder(available_chars=args.available_chars) 36 | decoded_label = label_decoder.decode(output_label) 37 | 38 | print("Decoded label is the following:") 39 | print(decoded_label) 40 | 41 | if __name__ == "__main__": 42 | parser = argparse.ArgumentParser() 43 | parser.add_argument("--image_path", default=None, type=str, help="To the input image.") 44 | parser.add_argument("--model_path", default=None, type=str, help="Path to a pretrained model TF Lite.") 45 | parser.add_argument("--available_chars", default="abcdefghijklmnopqrstuvwxyz", type=str, help="Characters") 46 | args = parser.parse_args() 47 | 48 | main(args) -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/dataset/preprocessing/label_preprocessors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from typing import List 4 | 5 | 6 | class OneCharEncoder: 7 | """ 8 | Encodes chars into integers. 9 | """ 10 | def __init__(self, available_chars): 11 | self._available_chars = available_chars 12 | 13 | def encode_char(self, char: str): 14 | return self._available_chars.index(char) 15 | 16 | def encode(self, string): 17 | result = [] 18 | result.append(self.encode_char(string[1])) 19 | return np.array(result) 20 | 21 | # def encode(self, input): 22 | # result = [] 23 | # for x in input: 24 | # result.append(self.encode_str(x)) 25 | # return np.array(result) 26 | 27 | 28 | 29 | class OneHotEncoder: 30 | def __init__(self, available_chars): 31 | self._available_chars = available_chars 32 | 33 | def encode_char(self, char: str): 34 | return tf.one_hot(self._available_chars.index(char), len(self._available_chars)) 35 | 36 | def decode_char(self, one_hot_vector): 37 | index = tf.argmax(one_hot_vector, axis=0) 38 | return self._available_chars[index] 39 | 40 | 41 | class StringEncoder: 42 | """ 43 | Encodes chars into integers. 44 | """ 45 | def __init__(self, available_chars): 46 | self._available_chars = available_chars 47 | 48 | def encode_char(self, char: str): 49 | return self._available_chars.index(char) 50 | 51 | def encode(self, string): 52 | result = [] 53 | for char in string: 54 | result.append(self.encode_char(char)) 55 | return np.array(result) 56 | 57 | def decode_char(self, char_idx: int): 58 | return self._available_chars[char_idx] 59 | 60 | def decode(self, li): 61 | result = [] 62 | for char in li: 63 | result.append(self.decode_char(char)) 64 | return "".join(result) 65 | 66 | # def encode(self, input): 67 | # result = [] 68 | # for x in input: 69 | # result.append(self.encode_str(x)) 70 | # return np.array(result) -------------------------------------------------------------------------------- /bin/simple_captcha_generate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import itertools 3 | import os 4 | import random 5 | 6 | from captcha.image import ImageCaptcha 7 | 8 | from faker import Faker 9 | 10 | 11 | def generate_randomly(available_chars: str, dataset_size: int, captcha_length: int): 12 | for i in range(0, dataset_size): 13 | captcha_code = "" 14 | for _ in range(captcha_length): 15 | random_idx = random.randint(0, len(available_chars) - 1) 16 | captcha_code += available_chars[random_idx] 17 | 18 | yield captcha_code 19 | 20 | 21 | def generate_systematically(available_chars: str, dataset_size: int, captcha_length: int): 22 | y = [available_chars for _ in range(captcha_length)] 23 | 24 | available_combinations = itertools.product(*y) 25 | 26 | for x in itertools.islice(available_combinations, dataset_size): 27 | yield "".join(x) 28 | 29 | 30 | if __name__ == "__main__": 31 | 32 | parser = argparse.ArgumentParser() 33 | 34 | parser.add_argument("--dataset_size", default=10000, type=int) 35 | parser.add_argument("--seed", default=42, type=int) 36 | parser.add_argument("--captcha_length", default=4, type=int) 37 | parser.add_argument("--available_chars", default="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", type=str) 38 | parser.add_argument("--generation_type", type=str, help="Either 'randomly' or 'systematically'", default="randomly") 39 | parser.add_argument("--out_dir", type=str, default="out") 40 | parser.add_argument("--width", type=int, default=175, help="Width of generated captcha code image.") 41 | parser.add_argument("--height", type=int, default=70, help="Height of generated captcha code image.") 42 | 43 | args = parser.parse_args() 44 | 45 | random.seed(args.seed) 46 | 47 | out_dir = os.path.abspath(args.out_dir) 48 | data_dir = os.path.join(out_dir, "data") 49 | 50 | if not os.path.exists(data_dir): 51 | os.makedirs(data_dir) 52 | 53 | image = ImageCaptcha(width=args.width, height=args.height) 54 | 55 | # generate fake uuid4 56 | fake = Faker() 57 | Faker.seed(args.seed) 58 | 59 | generated_captchas = (generate_systematically(args.available_chars, args.dataset_size, args.captcha_length) 60 | if args.generation_type == "systematically" 61 | else generate_randomly(args.available_chars, args.dataset_size, args.captcha_length)) 62 | for captcha_code in generated_captchas: 63 | image.write(f'{captcha_code}', f'{data_dir}/{captcha_code}_{fake.uuid4()}.png') -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # idea 138 | *.iws 139 | **/.idea/workspace.xml 140 | **/.idea/tasks.xml 141 | 142 | #vscode 143 | .vscode/* 144 | !.vscode/settings.json 145 | 146 | out/ 147 | venv/ 148 | model/ 149 | -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/dataset/annotations_generator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | 5 | 6 | class AnnotationsGenerator: 7 | def __init__(self, dir_path: str, annotations_out_dir: str, 8 | validation_ratio: float, test_ratio: float, ignore_case: bool): 9 | self._dir_path = dir_path 10 | self._validation_ratio = validation_ratio 11 | self._test_ratio = test_ratio 12 | self._ignore_case = ignore_case 13 | self._annotations_out_dir = annotations_out_dir 14 | 15 | def get_annotations(self): 16 | for item in os.listdir(self._dir_path): 17 | item_path = os.path.join(self._dir_path, item) 18 | 19 | item_label = os.path.splitext(item)[0] 20 | item_label = item_label.split("_")[0] 21 | 22 | yield item_path, item_label 23 | 24 | def save_annotations(self): 25 | val_annotations_path = os.path.join(self._annotations_out_dir, "annotations-validation.txt") 26 | test_annotations_path = os.path.join(self._annotations_out_dir, "annotations-test.txt") 27 | train_annotations_path = os.path.join(self._annotations_out_dir, "annotations-train.txt") 28 | annotations_path = os.path.join(self._annotations_out_dir, "annotations.txt") 29 | 30 | annotations = np.array(list(self.get_annotations())) 31 | indices = list(range(len(annotations))) 32 | random.shuffle(indices) 33 | 34 | test_samples_count = int(len(indices) * self._test_ratio) 35 | validation_samples_count = int(len(indices) * self._validation_ratio) 36 | test_indices = indices[:test_samples_count] 37 | validation_indices = indices[test_samples_count:test_samples_count + validation_samples_count] 38 | train_indices = indices[test_samples_count + validation_samples_count:] 39 | 40 | test_annotations = annotations[test_indices] 41 | train_annotations = annotations[train_indices] 42 | validation_annotations = annotations[validation_indices] 43 | 44 | with open(annotations_path, "w") as annotations_file: 45 | with open(test_annotations_path, "w") as file: 46 | for image_path, label in test_annotations: 47 | result_label = label.lower() if self._ignore_case else label 48 | annotation = f"{image_path} {result_label}\n" 49 | 50 | file.write(annotation) 51 | annotations_file.write(annotation) 52 | 53 | with open(val_annotations_path, "w") as file: 54 | for image_path, label in validation_annotations: 55 | result_label = label.lower() if self._ignore_case else label 56 | annotation = f"{image_path} {result_label}\n" 57 | 58 | file.write(annotation) 59 | annotations_file.write(annotation) 60 | 61 | with open(train_annotations_path, "w") as file: 62 | for image_path, label in train_annotations: 63 | annotation = f"{image_path} {label.lower()}\n" 64 | 65 | file.write(annotation) 66 | annotations_file.write(annotation) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with python 3.9 3 | # To update, run: 4 | # 5 | # pip-compile 6 | # 7 | absl-py==1.3.0 8 | # via 9 | # tensorboard 10 | # tensorflow 11 | astunparse==1.6.3 12 | # via tensorflow 13 | cachetools==5.2.0 14 | # via google-auth 15 | captcha==0.4 16 | # via -r requirements.in 17 | certifi==2022.9.24 18 | # via requests 19 | charset-normalizer==2.1.1 20 | # via requests 21 | contourpy==1.0.6 22 | # via matplotlib 23 | cycler==0.11.0 24 | # via matplotlib 25 | faker==15.2.0 26 | # via -r requirements.in 27 | flatbuffers==22.10.26 28 | # via tensorflow 29 | fonttools==4.38.0 30 | # via matplotlib 31 | gast==0.4.0 32 | # via tensorflow 33 | google-auth==2.14.0 34 | # via 35 | # google-auth-oauthlib 36 | # tensorboard 37 | google-auth-oauthlib==0.4.6 38 | # via tensorboard 39 | google-pasta==0.2.0 40 | # via tensorflow 41 | graphviz==0.20.1 42 | # via -r requirements.in 43 | grpcio==1.50.0 44 | # via 45 | # tensorboard 46 | # tensorflow 47 | h5py==3.7.0 48 | # via tensorflow 49 | idna==3.4 50 | # via requests 51 | imageio==2.22.3 52 | # via scikit-image 53 | importlib-metadata==5.0.0 54 | # via markdown 55 | keras==2.10.0 56 | # via tensorflow 57 | keras-preprocessing==1.1.2 58 | # via tensorflow 59 | kiwisolver==1.4.4 60 | # via matplotlib 61 | libclang==14.0.6 62 | # via tensorflow 63 | markdown==3.4.1 64 | # via tensorboard 65 | markupsafe==2.1.1 66 | # via werkzeug 67 | matplotlib==3.6.2 68 | # via -r requirements.in 69 | networkx==2.8.8 70 | # via scikit-image 71 | numpy==1.23.4 72 | # via 73 | # -r requirements.in 74 | # contourpy 75 | # h5py 76 | # imageio 77 | # keras-preprocessing 78 | # matplotlib 79 | # opt-einsum 80 | # pywavelets 81 | # scikit-image 82 | # scipy 83 | # tensorboard 84 | # tensorflow 85 | # tifffile 86 | oauthlib==3.2.2 87 | # via requests-oauthlib 88 | opt-einsum==3.3.0 89 | # via tensorflow 90 | packaging==21.3 91 | # via 92 | # matplotlib 93 | # scikit-image 94 | # tensorflow 95 | pillow==9.3.0 96 | # via 97 | # captcha 98 | # imageio 99 | # matplotlib 100 | # scikit-image 101 | protobuf==3.19.6 102 | # via 103 | # tensorboard 104 | # tensorflow 105 | pyasn1==0.4.8 106 | # via 107 | # pyasn1-modules 108 | # rsa 109 | pyasn1-modules==0.2.8 110 | # via google-auth 111 | pydot==1.4.2 112 | # via -r requirements.in 113 | pyparsing==3.0.9 114 | # via 115 | # matplotlib 116 | # packaging 117 | # pydot 118 | python-dateutil==2.8.2 119 | # via 120 | # faker 121 | # matplotlib 122 | pywavelets==1.4.1 123 | # via scikit-image 124 | requests==2.28.1 125 | # via 126 | # requests-oauthlib 127 | # tensorboard 128 | requests-oauthlib==1.3.1 129 | # via google-auth-oauthlib 130 | rsa==4.9 131 | # via google-auth 132 | scikit-image==0.19.3 133 | # via -r requirements.in 134 | scipy==1.9.3 135 | # via scikit-image 136 | six==1.16.0 137 | # via 138 | # astunparse 139 | # google-auth 140 | # google-pasta 141 | # grpcio 142 | # keras-preprocessing 143 | # python-dateutil 144 | # tensorflow 145 | tensorboard==2.10.1 146 | # via tensorflow 147 | tensorboard-data-server==0.6.1 148 | # via tensorboard 149 | tensorboard-plugin-wit==1.8.1 150 | # via tensorboard 151 | tensorflow==2.10.0 152 | # via -r requirements.in 153 | tensorflow-estimator==2.10.0 154 | # via tensorflow 155 | tensorflow-io-gcs-filesystem==0.27.0 156 | # via tensorflow 157 | termcolor==2.1.0 158 | # via tensorflow 159 | tifffile==2022.10.10 160 | # via scikit-image 161 | typing-extensions==4.4.0 162 | # via tensorflow 163 | urllib3==1.26.12 164 | # via requests 165 | werkzeug==2.2.2 166 | # via tensorboard 167 | wheel==0.38.2 168 | # via 169 | # astunparse 170 | # tensorboard 171 | wrapt==1.14.1 172 | # via tensorflow 173 | zipp==3.10.0 174 | # via importlib-metadata 175 | 176 | # The following packages are considered to be unsafe in a requirements file: 177 | # setuptools 178 | -------------------------------------------------------------------------------- /bin/train.py: -------------------------------------------------------------------------------- 1 | from ulozto_captcha_breaker.dataset.preprocessing.image_preprocessors import ConvertToGrayscalePreprocessor, ResizePreprocessor, NormalizeImagePreprocessor 2 | from ulozto_captcha_breaker.dataset.preprocessing.image_pipeline import ImagePreprocessorPipeline 3 | 4 | from ulozto_captcha_breaker.dataset.preprocessing.label_preprocessors import StringEncoder 5 | from ulozto_captcha_breaker.dataset.preprocessing.label_pipeline import LabelPreprocessPipeline 6 | 7 | import numpy as np 8 | import random 9 | 10 | from ulozto_captcha_breaker.captcha_network import CaptchaNetwork 11 | from ulozto_captcha_breaker.dataset.captcha_dataset import CaptchaDataset 12 | 13 | import argparse 14 | import datetime 15 | import os 16 | import re 17 | import tensorflow as tf 18 | 19 | if __name__ == "__main__": 20 | # Parse arguments 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--weights_file", default=None, type=str, help="Path to file that contains pre-trained weights.") 23 | parser.add_argument("--pretrained_model", default=None, type=str) 24 | parser.add_argument("--freeze_layers", default=0, type=int, help="How many layers should be frozen for the training." 25 | "Counts from the beginning.") 26 | parser.add_argument("--remove_layers", 27 | action="store_true") 28 | parser.add_argument("--batch_size", default=32, type=int, help="Batch size.") 29 | parser.add_argument("--epochs", default=1500, type=int, help="Number of epochs.") 30 | parser.add_argument("--out_dir", default="out", type=str, help="Out dir") 31 | parser.add_argument("--seed", default=42, type=int) 32 | parser.add_argument("--captcha_length", default=4, type=int) 33 | parser.add_argument("--available_chars", default="abcdefghijklmnopqrstuvwxyz", type=str, help="Labels") 34 | parser.add_argument("--transformed_img_width", default=None, type=int) 35 | parser.add_argument("--transformed_img_height", default=None, type=int) 36 | parser.add_argument("--l2", default=0.00001, type=float) 37 | 38 | args = parser.parse_args() 39 | 40 | args.save_model_path = None 41 | 42 | assert ((args.transformed_img_width is None and args.transformed_img_height is None) or 43 | args.transformed_img_width is not None and args.transformed_img_height is not None) 44 | 45 | # Fix random seeds and number of threads 46 | np.random.seed(args.seed) 47 | tf.random.set_seed(args.seed) 48 | random.seed(args.seed) 49 | 50 | out_dir = os.path.abspath(args.out_dir) 51 | data_dir = os.path.join(out_dir, "data") 52 | train_annotations_path = os.path.join(out_dir, "annotations-train.txt") 53 | val_annotations_path = os.path.join(out_dir, "annotations-validation.txt") 54 | 55 | args.logdir = os.path.join(out_dir, "logs", "{}-{}-{}".format( 56 | os.path.basename(__file__), 57 | datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), 58 | ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items()))) 59 | )) 60 | 61 | train_dataset = CaptchaDataset(train_annotations_path, len(args.available_chars)) 62 | val_dataset = CaptchaDataset(val_annotations_path, len(args.available_chars)) 63 | 64 | if args.transformed_img_width is not None and args.transformed_img_height is not None: 65 | input_shape = (args.transformed_img_height, args.transformed_img_width) 66 | else: 67 | image_shape = train_dataset.get_image_shape() 68 | input_shape = (image_shape[0], image_shape[1]) 69 | 70 | image_preprocess_pipeline = ImagePreprocessorPipeline([ 71 | ConvertToGrayscalePreprocessor(), 72 | ResizePreprocessor(input_shape[0], input_shape[1]), 73 | NormalizeImagePreprocessor() 74 | ]) 75 | label_preprocess_pipeline = LabelPreprocessPipeline( 76 | StringEncoder(available_chars=args.available_chars) 77 | ) 78 | 79 | train_x, train_y = train_dataset.get_data() 80 | val_x, val_y = val_dataset.get_data() 81 | 82 | network = CaptchaNetwork(image_shape=input_shape, 83 | classes=train_dataset.classes, 84 | image_preprocess_pipeline=image_preprocess_pipeline, 85 | label_preprocess_pipeline=label_preprocess_pipeline, 86 | args=args) 87 | 88 | network.train(train_x, train_y, val_x, val_y, args) 89 | -------------------------------------------------------------------------------- /bin/test.py: -------------------------------------------------------------------------------- 1 | from ulozto_captcha_breaker.dataset.preprocessing.image_preprocessors import ConvertToGrayscalePreprocessor, ResizePreprocessor, NormalizeImagePreprocessor 2 | from ulozto_captcha_breaker.dataset.preprocessing.image_pipeline import ImagePreprocessorPipeline 3 | 4 | from ulozto_captcha_breaker.dataset.preprocessing.label_preprocessors import StringEncoder 5 | from ulozto_captcha_breaker.dataset.preprocessing.label_pipeline import LabelPreprocessPipeline 6 | 7 | import numpy as np 8 | import random 9 | 10 | from ulozto_captcha_breaker.captcha_network import CaptchaNetwork 11 | from ulozto_captcha_breaker.dataset.captcha_dataset import CaptchaDataset 12 | 13 | import argparse 14 | import datetime 15 | import os 16 | import re 17 | import tensorflow as tf 18 | 19 | if __name__ == "__main__": 20 | # Parse arguments 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--weights_file", default=None, type=str, 23 | help="Path to file that contains pre-trained weights.") 24 | parser.add_argument("--pretrained_model", default=None, type=str) 25 | parser.add_argument("--freeze_layers", default=0, type=int, 26 | help="How many layers should be frozen for the training." 27 | "Counts from the beginning.") 28 | parser.add_argument("--remove_layers", 29 | action="store_true") 30 | parser.add_argument("--batch_size", default=32, type=int, help="Batch size.") 31 | parser.add_argument("--epochs", default=1500, type=int, help="Number of epochs.") 32 | parser.add_argument("--out_dir", default="out", type=str, help="Out dir") 33 | parser.add_argument("--seed", default=42, type=int) 34 | parser.add_argument("--captcha_length", default=4, type=int) 35 | parser.add_argument("--available_chars", default="abcdefghijklmnopqrstuvwxyz", type=str, help="Labels") 36 | parser.add_argument("--transformed_img_width", default=None, type=int) 37 | parser.add_argument("--transformed_img_height", default=None, type=int) 38 | parser.add_argument("--l2", default=0.0001, type=float) 39 | 40 | args = parser.parse_args() 41 | 42 | # Fix random seeds and number of threads 43 | np.random.seed(args.seed) 44 | tf.random.set_seed(args.seed) 45 | random.seed(args.seed) 46 | 47 | out_dir = os.path.abspath(args.out_dir) 48 | data_dir = os.path.join(out_dir, "data") 49 | annotations_path = os.path.join(out_dir, "annotations-test.txt") 50 | 51 | args.logdir = os.path.join(out_dir, "logs", "{}-{}-{}".format( 52 | os.path.basename(__file__), 53 | datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), 54 | ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items()))) 55 | )) 56 | args.save_model_path = os.path.join(out_dir, "model") 57 | 58 | assert args.weights_file is not None or args.pretrained_model is not None, "Weights file or pretrained model must " \ 59 | "be passed in order to test " \ 60 | "it." 61 | 62 | dataset = CaptchaDataset(annotations_path, len(args.available_chars)) 63 | inputs, labels = dataset.get_data() 64 | 65 | if args.transformed_img_width is not None and args.transformed_img_height is not None: 66 | input_shape = (args.transformed_img_height, args.transformed_img_width) 67 | else: 68 | image_shape = dataset.get_image_shape() 69 | input_shape = (image_shape[0], image_shape[1]) 70 | 71 | image_preprocess_pipeline = ImagePreprocessorPipeline([ 72 | ConvertToGrayscalePreprocessor(), 73 | ResizePreprocessor(input_shape[0], input_shape[1]), 74 | NormalizeImagePreprocessor() 75 | ]) 76 | label_preprocess_pipeline = LabelPreprocessPipeline( 77 | StringEncoder(available_chars=args.available_chars) 78 | ) 79 | 80 | network = CaptchaNetwork(image_shape=input_shape, 81 | classes=dataset.classes, 82 | image_preprocess_pipeline=image_preprocess_pipeline, 83 | label_preprocess_pipeline=label_preprocess_pipeline, 84 | args=args) 85 | 86 | labels = label_preprocess_pipeline(labels) 87 | 88 | pred_labels = network.predict(inputs, args) 89 | 90 | correct = labels == pred_labels 91 | 92 | all_correct = tf.reduce_all(correct, axis=1) 93 | all_correct = tf.cast(all_correct, tf.dtypes.float32) 94 | acc = tf.reduce_mean(all_correct) 95 | 96 | dec = StringEncoder(available_chars=args.available_chars) 97 | with open(os.path.join(out_dir, "out_test.csv"), "w") as file: 98 | for i in range(len(pred_labels)): 99 | decoded_label = dec.decode(labels[i]) 100 | decoded_pred_label = dec.decode(pred_labels[i]) 101 | file.write(f"{all_correct[i]};{decoded_label};{decoded_pred_label}\n") 102 | 103 | print(f"Test acc: {acc:.2f}") 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ulozto-captcha-breaker 2 | Deep learning model using Tensorflow that breaks ulozto captcha codes. 3 | 4 | ![examples](docs/examples.png) 5 | 6 | Algorithm used will be described in a standalone document. 7 | 8 | ## How to use pretrained model in your project 9 | ### Prerequisities 10 | Packages 11 | - *numpy~=1.18.3* 12 | - *tflite_runtime~=2.5.0* 13 | 14 | You need to install Tensorflow Lite Runtime with the correct version depending on your operating system and instruction set. 15 | It can be found here: https://www.tensorflow.org/lite/guide/python. 16 | 17 | ### Model specification 18 | - Input shape: (batch_size, height, width, 1), where height = 70, width = 175 19 | - Output shape: (batch_size, number_of_letters, number_of_classes), where number_of_letters = 4 and number_of_classes = 26 20 | 21 | Note that it takes **grayscale images** as the input. RGB images therefore have to be converted. 22 | 23 | ### Steps 24 | 1. Go to latest release and download binary files 25 | 2. Instantiate the tflite interpreter. For that you're going to need TFLite model. You can find it in the release binary files. 26 | - PATH_TO_TFLITE_MODEL is path to directory containing the neural network pretrained model 27 | ```python 28 | import tflite_runtime.interpreter as tflite 29 | interpreter = tflite.Interpreter(model_path=PATH_TO_TFLITE_MODEL) 30 | ``` 31 | 32 | 3. Normalize image to 0..1 interval. If it already is, skip this step. 33 | ```python 34 | img = (img / 255).astype(np.float32) 35 | ``` 36 | 4. Predict using following code 37 | ```python 38 | # convert to grayscale 39 | r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] 40 | input = 0.299 * r + 0.587 * g + 0.114 * b 41 | 42 | # input has nowof shape (70, 175) 43 | # we modify dimensions to match model's input 44 | input = np.expand_dims(input, 0) 45 | input = np.expand_dims(input, -1) 46 | # input is now of shape (batch_size, 70, 175, 1) 47 | # output will have shape (batch_size, 4, 26) 48 | 49 | interpreter.allocate_tensors() 50 | input_details = interpreter.get_input_details() 51 | output_details = interpreter.get_output_details() 52 | interpreter.set_tensor(input_details[0]['index'], input) 53 | interpreter.invoke() 54 | 55 | # predict and get the output 56 | output = interpreter.get_tensor(output_details[0]['index']) 57 | # now get labels 58 | labels_indices = np.argmax(output, axis=2) 59 | 60 | available_chars = "abcdefghijklmnopqrstuvwxyz" 61 | 62 | def decode(li): 63 | result = [] 64 | for char in li: 65 | result.append(available_chars[char]) 66 | return "".join(result) 67 | 68 | decoded_label = [decode(x) for x in labels_indices][0] 69 | ``` 70 | - *np* for numpy 71 | 72 | ## How to train your own model 73 | 1. Install environment 74 | Following script creates new virtual environment. You can of course use global environment instead. 75 | All following section's scripts are expected to be executed from repository's root directory. 76 | ```shell script 77 | git clone https://github.com/JanPalasek/ulozto-captcha-breaker 78 | cd "ulozto-captcha-breaker" 79 | 80 | # create virtual environment 81 | python -m venv "venv" 82 | 83 | source venv/bin/activate # or .\venv\Scripts\activate.ps1 in windows Powershell 84 | python -m pip install -r "requirements.txt" -e . 85 | ``` 86 | 2. Obtain dataset of captcha images and store it to directory *out/data*. Images are expected to be named according 87 | to captcha displayed in the image. 88 | 89 | E.g. 90 | 91 | ![captcha image](docs/abfd_ba574f47-92d8-407d-9b34-d5f6fa8a74c3.png) 92 | 93 | This captcha image is expected to be named e.g. *ABFD.png*, *abfd.png* (if we don't care about case sensitivity) 94 | or e.g. *ABFD_{UUID4 CODE}.png* (to distinguish different images for same captcha letters). 95 | 96 | This project contains a way to generate captchas yourself using *captcha* Python package using script *bin/simple_captcha_generate.py*. 97 | You can run it in a following manner 98 | ```shell script 99 | python bin/simple_captcha_generate.py --height=70 --width=175 --available_chars="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" --captcha_length=6 --dataset_size=10000 100 | ``` 101 | 102 | Some of notable parameters are: 103 | - *available_chars* - list of characters that will be generated 104 | - *captcha_length* - how long generated captcha is going to be 105 | - *dataset_size* - how large dataset is going to be generated 106 | - *height* - height of generated captcha 107 | - *width* - width of generated captcha 108 | 109 | 3. Generate *annotations* files using *bin/captcha_annotate.py* script. You can call it for example 110 | ```shell script 111 | python bin/captcha_annotate.py --val_split=0.1 --test_split=0.1 --case_sensitive 112 | ``` 113 | This will shuffle and split data into train/validation/test according to following parameters: 114 | - *val_split* - how large part of data is going to be used for validation, e.g. 0.1 means 10% 115 | - *test_split* - how large part of data is going to be used for testing 116 | - *case_sensitive* - switch denoting that labels that are created will be case sensitive 117 | - if such parameter is not passed, then for example if *aBcD* is in the image (and image is named accordingly), 118 | resulting label will be *abcd* 119 | - if it is passed, resulting label will be *aBcD* 120 | 121 | This script will create *annotations.txt*, *annotations-train.txt*, *annotations-validation.txt* and *annotations-test.txt*. 122 | 123 | 4. Run training script *bin/train.py* for example like this: 124 | ```shell script 125 | python bin/train.py --available_chars="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" --captcha_length=6 126 | ``` 127 | Training script notably logs models after each checkpoint into *logs/train.py-{START TIMESTAMP}-{parameters etc.}* directory. 128 | -------------------------------------------------------------------------------- /src/ulozto_captcha_breaker/captcha_network.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import tensorflow.keras as tf_keras 6 | # from tensorflow.python.keras.utils.vis_utils import plot_model 7 | 8 | from ulozto_captcha_breaker.metrics import all_correct_acc 9 | 10 | 11 | class CaptchaNetwork: 12 | def __init__(self, image_shape, classes: int, image_preprocess_pipeline, label_preprocess_pipeline, args): 13 | """ 14 | Initializes CaptchaNetwork instance. 15 | :param image_shape: Shape of image. 16 | :param classes: Number of classes that model recognizes. E.g. if we want it to detect abcdefghijklmnopqrstuvwxyz, then 17 | classes must be number 28. 18 | :param image_preprocess_pipeline: Specifies pipeline that is used before image is put as input to neural network. 19 | :param label_preprocess_pipeline: Specifies pipeline that transforms output of neural network from internal indices 20 | back into captcha characters. 21 | :param args: 22 | """ 23 | 24 | assert args.weights_file is None or args.pretrained_model is None, "Cannot load pretrained model and weights file at the same time" 25 | 26 | self._image_preprocess_pipeline = image_preprocess_pipeline 27 | self._label_preprocess_pipeline = label_preprocess_pipeline 28 | 29 | self._classes = classes 30 | input_shape = (image_shape[0], image_shape[1], 1) 31 | 32 | input = tf_keras.layers.Input(shape=input_shape) 33 | 34 | layer = input 35 | 36 | if not args.pretrained_model: 37 | # to normalize input 38 | layer = tf_keras.layers.BatchNormalization()(layer) 39 | layer = tf_keras.layers.Convolution2D( 40 | filters=32, kernel_size=7, strides=2, padding="same", use_bias=False, 41 | kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer) 42 | layer = tf_keras.layers.BatchNormalization()(layer) 43 | layer = tf_keras.layers.ReLU()(layer) 44 | layer = tf_keras.layers.MaxPooling2D(strides=2)(layer) 45 | 46 | layer = self._create_residual_block(layer, filters=32, l2=args.l2) 47 | layer = self._create_residual_block(layer, filters=32, l2=args.l2) 48 | 49 | layer = tf_keras.layers.BatchNormalization()(layer) 50 | layer = tf_keras.layers.ReLU()(layer) 51 | layer = tf_keras.layers.Convolution2D( 52 | filters=64, kernel_size=3, strides=2, padding="same", use_bias=False, 53 | kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer) 54 | layer = self._create_residual_block(layer, filters=64, l2=args.l2) 55 | layer = self._create_residual_block(layer, filters=64, l2=args.l2) 56 | 57 | layer = tf_keras.layers.BatchNormalization()(layer) 58 | layer = tf_keras.layers.ReLU()(layer) 59 | layer = tf_keras.layers.Convolution2D( 60 | filters=128, kernel_size=3, strides=2, padding="same", use_bias=False, 61 | kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer) 62 | layer = self._create_residual_block(layer, filters=128, l2=args.l2) 63 | layer = self._create_residual_block(layer, filters=128, l2=args.l2) 64 | 65 | layer = tf_keras.layers.BatchNormalization()(layer) 66 | layer = tf_keras.layers.ReLU()(layer) 67 | layer = tf_keras.layers.Convolution2D( 68 | filters=256, kernel_size=3, strides=2, padding="same", use_bias=False, 69 | kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer) 70 | layer = self._create_residual_block(layer, filters=256, l2=args.l2) 71 | layer = self._create_residual_block(layer, filters=256, l2=args.l2) 72 | 73 | layer = tf_keras.layers.GlobalAveragePooling2D()(layer) 74 | 75 | layer = tf_keras.layers.Dense(units=args.captcha_length * classes, 76 | kernel_regularizer=tf_keras.regularizers.l2(args.l2))(layer) 77 | # # reshape into (batch, letters_count, rest) 78 | target_shape = (args.captcha_length, classes) 79 | layer = tf_keras.layers.Reshape(target_shape=target_shape)(layer) 80 | 81 | # layer = tf_keras.layers.Dense(units=100, activation="relu", kernel_regularizer=tf_keras.regularizers.l2(0.01))(layer) 82 | # layer = tf_keras.layers.Dropout(0.5)(layer) 83 | output = tf_keras.layers.Dense(units=classes, activation="softmax")(layer) 84 | 85 | self._model = tf_keras.Model(inputs=input, outputs=output) 86 | else: 87 | self._model = tf_keras.models.load_model(args.pretrained_model) 88 | 89 | if args.weights_file is not None: 90 | self._model.load_weights(args.weights_file) 91 | 92 | print(f"Total layers: {len(self._model.layers)}") 93 | if args.remove_layers: 94 | # remove classification header and add new one 95 | input = self._model.layers[0].input 96 | layer = self._model.layers[-1].input 97 | output = tf_keras.layers.Dense(units=classes, activation="softmax")(layer) 98 | 99 | self._model = tf_keras.Model(inputs=input, outputs=output) 100 | 101 | if args.freeze_layers > 0: 102 | for i in range(args.freeze_layers): 103 | self._model.layers[i].trainable = False 104 | 105 | metrics = [tf_keras.metrics.sparse_categorical_accuracy] 106 | if not args.save_model_path: 107 | metrics.append(all_correct_acc) 108 | self._model.compile(optimizer=tf_keras.optimizers.Adam(), 109 | loss=tf_keras.losses.SparseCategoricalCrossentropy(), 110 | metrics=metrics) 111 | 112 | self._model.summary() 113 | # plot_model(self._model, to_file=os.path.join(args.out_dir, "model.png"), show_shapes=True) 114 | 115 | self._tb_callback = tf_keras.callbacks.TensorBoard(args.logdir, update_freq=1000, profile_batch=1) 116 | self._tb_callback.on_train_end = lambda *_: None 117 | checkpoint_path = os.path.join(args.logdir, 'cp-{epoch:02d}.h5') 118 | self._check_callback = tf_keras.callbacks.ModelCheckpoint( 119 | checkpoint_path, save_weights_only=True) 120 | 121 | if args.save_model_path: 122 | self.save_model(args.save_model_path) 123 | 124 | def _create_residual_block(self, layer: tf_keras.layers.Layer, filters: int, l2: float): 125 | prev_layer = layer 126 | layer = tf_keras.layers.BatchNormalization()(layer) 127 | layer = tf_keras.layers.ReLU()(layer) 128 | layer = tf_keras.layers.Convolution2D( 129 | filters=filters, kernel_size=3, strides=1, padding="same", use_bias=False, 130 | kernel_regularizer=tf_keras.regularizers.l2(l2))(layer) 131 | 132 | layer = tf_keras.layers.BatchNormalization()(layer) 133 | layer = tf_keras.layers.ReLU()(layer) 134 | layer = tf_keras.layers.Convolution2D( 135 | filters=filters, kernel_size=3, strides=1, padding="same", use_bias=False, 136 | kernel_regularizer=tf_keras.regularizers.l2(l2))(layer) 137 | layer = tf_keras.layers.Add()([prev_layer, layer]) 138 | 139 | return layer 140 | 141 | def train(self, train_x, train_y, val_x, val_y, args): 142 | """ 143 | Train the model. 144 | :param train_x: Numpy array with train captcha images. 145 | :param train_y: Numpy array with train captcha image labels (e.g. "abxz"). 146 | :param val_x: Numpy array with validation captcha images. 147 | :param val_y: Numpy array with validation captcha image labels (e.g. "abxz"). 148 | :param args: 149 | """ 150 | train_inputs, train_labels = self._image_preprocess_pipeline(train_x), self._label_preprocess_pipeline(train_y) 151 | dev_inputs, dev_labels = self._image_preprocess_pipeline(val_x), self._label_preprocess_pipeline( 152 | val_y) 153 | 154 | del train_x 155 | del train_y 156 | del val_x 157 | del val_y 158 | 159 | self._model.fit(x=train_inputs, y=train_labels, batch_size=args.batch_size, epochs=args.epochs, 160 | validation_data=(dev_inputs, dev_labels), 161 | callbacks=[self._check_callback, self._tb_callback]) 162 | 163 | def save_model(self, out_path): 164 | tf.saved_model.save(self._model, out_path) 165 | 166 | def predict(self, inputs, args): 167 | """ 168 | Predicts labels from input images. Returns list of indices that denote characters. 169 | :param inputs: List of captcha images. 170 | :param args: 171 | :return: List of indices that denote characters. 172 | """ 173 | inputs = self._image_preprocess_pipeline(inputs) 174 | 175 | y_pred = self._model.predict(inputs, args.batch_size) 176 | if len(y_pred.shape) <= 2: 177 | y_pred = np.expand_dims(y_pred, axis=1) 178 | y_pred = np.argmax(y_pred, axis=2) 179 | return y_pred --------------------------------------------------------------------------------