├── .github └── FUNDING.yml ├── LICENSE ├── MODEL_CREATION.py ├── README.md ├── captcha ├── captcha.7z └── xlqg.png ├── generator.js ├── main.py ├── package.json ├── pre_processing.py ├── process_images.py ├── requirements.txt └── temp └── xIqg.webp /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | ko_fi: enderty 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, ender 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /MODEL_CREATION.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # # OCR model for reading Captchas 3 | # 4 | # **Author:** [A_K_Nain](https://twitter.com/A_K_Nain)
5 | # **Date created:** 2020/06/14
6 | # **Last modified:** 2020/06/26
7 | # **Description:** How to implement an OCR model using CNNs, RNNs and CTC loss. 8 | # %% 9 | import os 10 | 11 | os.environ["KERAS_BACKEND"] = "tensorflow" 12 | import os 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | from pathlib import Path 16 | from PIL import Image 17 | import tensorflow as tf 18 | import keras 19 | from keras import layers 20 | gpus = tf.config.experimental.list_physical_devices('GPU') 21 | for gpu in gpus: 22 | print(gpu) 23 | print("Num GPUs Available: ", len(gpus),"IsBuild with cuda: ", tf.test.is_built_with_cuda()) 24 | # %% 25 | 26 | data_dir = Path("./captcha/") 27 | images = sorted(list(map(str, list(data_dir.glob("*.png"))))) 28 | labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images] 29 | characters = set(char for label in labels for char in label) 30 | characters = sorted(list(characters)) 31 | 32 | print("Number of images found: ", len(images)) 33 | print("Number of labels found: ", len(labels)) 34 | print("Number of unique characters: ", len(characters)) 35 | print("Characters present: ", characters) 36 | batch_size = 16 37 | img_width, img_height = Image.open(images[0]).size 38 | input(f"img_width: {img_width}, img_height: {img_height}, Press Enter to continue...") 39 | 40 | # Factor by which the image is going to be downsampled 41 | # by the convolutional blocks. We will be using two 42 | # convolution blocks and each block will have 43 | # a pooling layer which downsample the features by a factor of 2. 44 | # Hence total downsampling factor would be 4. 45 | downsample_factor = 4 46 | # Maximum length of any captcha in the dataset 47 | max_length = max([len(label) for label in labels]) 48 | # %% 49 | 50 | # Mapping characters to integers 51 | char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None) 52 | # Mapping integers back to original characters 53 | num_to_char = layers.StringLookup( 54 | vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True 55 | ) 56 | 57 | def split_data(images, labels, train_size=0.9, shuffle=True): 58 | # 1. Get the total size of the dataset 59 | size = len(images) 60 | # 2. Make an indices array and shuffle it, if required 61 | indices = np.arange(size) 62 | if shuffle: 63 | np.random.shuffle(indices) 64 | # 3. Get the size of training samples 65 | train_samples = int(size * train_size) 66 | # 4. Split data into training and validation sets 67 | x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]] 68 | x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]] 69 | return x_train, x_valid, y_train, y_valid 70 | 71 | # Splitting data into training and validation sets 72 | x_train, x_valid, y_train, y_valid = split_data(np.array(images), np.array(labels)) 73 | 74 | def encode_single_sample(img_path, label): 75 | # 1. Read image 76 | img = tf.io.read_file(img_path) 77 | # 2. Decode and convert to grayscale 78 | img = tf.io.decode_png(img, channels=1) 79 | # 3. Convert to float32 in [0, 1] range 80 | img = tf.image.convert_image_dtype(img, tf.float32) 81 | # 4. Resize to the desired size 82 | img = tf.image.resize(img, [img_height, img_width]) 83 | # 5. Transpose the image because we want the time 84 | # dimension to correspond to the width of the image. 85 | img = tf.transpose(img, perm=[1, 0, 2]) 86 | # 6. Map the characters in label to numbers 87 | label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8")) 88 | # 7. Return a dict as our model is expecting two inputs 89 | return {"image": img, "label": label} 90 | 91 | # %% 92 | 93 | train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) 94 | train_dataset = ( 95 | train_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE) 96 | .batch(batch_size) 97 | .prefetch(buffer_size=tf.data.AUTOTUNE) 98 | ) 99 | 100 | validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)) 101 | validation_dataset = ( 102 | validation_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE) 103 | .batch(batch_size) 104 | .prefetch(buffer_size=tf.data.AUTOTUNE) 105 | ) 106 | 107 | # %% [markdown] 108 | # ## Visualize the data 109 | 110 | _, ax = plt.subplots(4, 4, figsize=(10, 5)) 111 | for batch in train_dataset.take(1): 112 | images = batch["image"] 113 | labels = batch["label"] 114 | for i in range(16): 115 | img = (images[i] * 255).numpy().astype("uint8") 116 | label = tf.strings.reduce_join(num_to_char(labels[i])).numpy().decode("utf-8") 117 | ax[i // 4, i % 4].imshow(img[:, :, 0].T, cmap="gray") 118 | ax[i // 4, i % 4].set_title(label) 119 | ax[i // 4, i % 4].axis("off") 120 | plt.show() 121 | 122 | # %% [markdown] 123 | # ## Model 124 | 125 | def ctc_batch_cost(y_true, y_pred, input_length, label_length): 126 | label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) 127 | input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32) 128 | sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32) 129 | 130 | y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) 131 | 132 | return tf.expand_dims( 133 | tf.compat.v1.nn.ctc_loss( 134 | inputs=y_pred, labels=sparse_labels, sequence_length=input_length 135 | ), 136 | 1, 137 | ) 138 | 139 | 140 | def ctc_label_dense_to_sparse(labels, label_lengths): 141 | label_shape = tf.shape(labels) 142 | num_batches_tns = tf.stack([label_shape[0]]) 143 | max_num_labels_tns = tf.stack([label_shape[1]]) 144 | 145 | def range_less_than(old_input, current_input): 146 | return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill( 147 | max_num_labels_tns, current_input 148 | ) 149 | 150 | init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) 151 | dense_mask = tf.compat.v1.scan( 152 | range_less_than, label_lengths, initializer=init, parallel_iterations=1 153 | ) 154 | dense_mask = dense_mask[:, 0, :] 155 | 156 | label_array = tf.reshape( 157 | tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape 158 | ) 159 | label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask) 160 | 161 | batch_array = tf.transpose( 162 | tf.reshape( 163 | tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), 164 | tf.reverse(label_shape, [0]), 165 | ) 166 | ) 167 | batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask) 168 | indices = tf.transpose( 169 | tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1]) 170 | ) 171 | 172 | vals_sparse = tf.compat.v1.gather_nd(labels, indices) 173 | 174 | return tf.SparseTensor( 175 | tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64) 176 | ) 177 | 178 | 179 | class CTCLayer(layers.Layer): 180 | def __init__(self, name=None): 181 | super().__init__(name=name) 182 | self.loss_fn = ctc_batch_cost 183 | 184 | def call(self, y_true, y_pred): 185 | # Compute the training-time loss value and add it 186 | # to the layer using `self.add_loss()`. 187 | batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64") 188 | input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64") 189 | label_length = tf.cast(tf.shape(y_true)[1], dtype="int64") 190 | 191 | input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64") 192 | label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64") 193 | 194 | loss = self.loss_fn(y_true, y_pred, input_length, label_length) 195 | self.add_loss(loss) 196 | 197 | # At test time, just return the computed predictions 198 | return y_pred 199 | 200 | 201 | def build_model(): 202 | # Inputs to the model 203 | input_img = layers.Input( 204 | shape=(img_width, img_height, 1), name="image", dtype="float32" 205 | ) 206 | labels = layers.Input(name="label", shape=(None,), dtype="float32") 207 | # First conv block 208 | x = layers.Conv2D( 209 | 32, 210 | (3, 3), 211 | activation="relu", 212 | kernel_initializer="he_normal", 213 | padding="same", 214 | name="Conv1", 215 | )(input_img) 216 | x = layers.MaxPooling2D((2, 2), name="pool1")(x) 217 | # Second conv block 218 | x = layers.Conv2D( 219 | 64, 220 | (3, 3), 221 | activation="relu", 222 | kernel_initializer="he_normal", 223 | padding="same", 224 | name="Conv2", 225 | )(x) 226 | x = layers.MaxPooling2D((2, 2), name="pool2")(x) 227 | # We have used two max pool with pool size and strides 2. 228 | # Hence, downsampled feature maps are 4x smaller. The number of 229 | # filters in the last layer is 64. Reshape accordingly before 230 | # passing the output to the RNN part of the model 231 | new_shape = ((img_width // 4), (img_height // 4) * 64) 232 | x = layers.Reshape(target_shape=new_shape, name="reshape")(x) 233 | x = layers.Dense(64, activation="relu", name="dense1")(x) 234 | x = layers.Dropout(0.2)(x) 235 | # RNNs 236 | x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x) 237 | x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x) 238 | # Output layer 239 | x = layers.Dense( 240 | len(char_to_num.get_vocabulary()) + 1, activation="softmax", name="dense2" 241 | )(x) 242 | # Add CTC layer for calculating CTC loss at each step 243 | output = CTCLayer(name="ctc_loss")(labels, x) 244 | # Define the model 245 | model = keras.models.Model( 246 | inputs=[input_img, labels], outputs=output, name="ocr_model_v1" 247 | ) 248 | # Optimizer 249 | opt = keras.optimizers.Adam() 250 | # Compile the model and return 251 | model.compile(optimizer=opt) 252 | return model 253 | 254 | 255 | # Get the model 256 | model = build_model() 257 | model.summary() 258 | 259 | # %% [markdown] 260 | # ## Training 261 | 262 | # TODO restore epoch count. 263 | epochs = 100 264 | early_stopping_patience = 10 265 | # Add early stopping 266 | early_stopping = keras.callbacks.EarlyStopping( 267 | monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True 268 | ) 269 | input("Train, Press Enter to continue...") 270 | # Train the model 271 | history = model.fit( 272 | train_dataset, 273 | validation_data=validation_dataset, 274 | epochs=epochs, 275 | callbacks=[early_stopping], 276 | ) 277 | # %% [markdown] 278 | # ## Inference 279 | 280 | def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1): 281 | input_shape = tf.shape(y_pred) 282 | num_samples, num_steps = input_shape[0], input_shape[1] 283 | y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) 284 | input_length = tf.cast(input_length, tf.int32) 285 | 286 | if greedy: 287 | (decoded, log_prob) = tf.nn.ctc_greedy_decoder( 288 | inputs=y_pred, sequence_length=input_length 289 | ) 290 | else: 291 | (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder( 292 | inputs=y_pred, 293 | sequence_length=input_length, 294 | beam_width=beam_width, 295 | top_paths=top_paths, 296 | ) 297 | decoded_dense = [] 298 | for st in decoded: 299 | st = tf.SparseTensor(st.indices, st.values, (num_samples, num_steps)) 300 | decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1)) 301 | return (decoded_dense, log_prob) 302 | 303 | 304 | # Get the prediction model by extracting layers till the output layer 305 | prediction_model = keras.models.Model( 306 | model.input[0], model.get_layer(name="dense2").output 307 | ) 308 | prediction_model.save("predi_model.h5")#don't save as keras or it'll crash when loading 309 | prediction_model.summary() 310 | 311 | 312 | # A utility function to decode the output of the network 313 | def decode_batch_predictions(pred): 314 | input_len = np.ones(pred.shape[0]) * pred.shape[1] 315 | # Use greedy search. For complex tasks, you can use beam search 316 | results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][ 317 | :, :max_length 318 | ] 319 | # Iterate over the results and get back the text 320 | output_text = [] 321 | for res in results: 322 | res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8") 323 | output_text.append(res) 324 | return output_text 325 | 326 | 327 | # Let's check results on some validation samples 328 | for batch in validation_dataset.take(1): 329 | batch_images = batch["image"] 330 | batch_labels = batch["label"] 331 | 332 | preds = prediction_model.predict(batch_images) 333 | pred_texts = decode_batch_predictions(preds) 334 | 335 | orig_texts = [] 336 | for label in batch_labels: 337 | label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8") 338 | orig_texts.append(label) 339 | 340 | _, ax = plt.subplots(4, 4, figsize=(15, 5)) 341 | for i in range(len(pred_texts)): 342 | img = (batch_images[i, :, :, 0] * 255).numpy().astype(np.uint8) 343 | img = img.T 344 | title = f"Prediction: {pred_texts[i]}" 345 | ax[i // 4, i % 4].imshow(img, cmap="gray") 346 | ax[i // 4, i % 4].set_title(title) 347 | ax[i // 4, i % 4].axis("off") 348 | plt.show() 349 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Captcha_solving 2 | All about creating a dataset, preprocessing images, and creating an actual model to solve captcha 3 |
4 |
5 |
6 | # STEP 1 - creating a dataset 7 | 2 Méthodes : 8 | - unzip the captcha.7z archive, and put all image under the `captcha` folder 9 | - OR use generator.js to create your own captcha(s) 10 | 11 | ## generator.js usage 12 | - install generator.js dependencies by using `npm install` inside the same directory as package.json 13 | - change `out = './temp'` to whatever temporary folder you want. Change `SIZE = [720,360]` to the size you want (its in width,height) and then change the `FORMAT = "webp"` to the format you want. 14 | - run it : `node generator.js` , wait until its finished generating captcha. 15 |
16 | 17 | # STEP 2 - pre-processing images for training 18 | - install `opencv-python` using pip, then run process_images by using `python process_images.py` command. 19 |
20 | 21 | # STEP 3 - creation a model 22 | I based my creation on keras [documentation](https://keras.io/examples/vision/captcha_ocr/) 23 | ## configuration stuff: 24 | - first download requirements : `pip install -r requirements.txt` 25 | - second try to run it and see if it detect any gpu devices (if you have one), if it tells you that you have 0 available gpu and you are on windows, 26 | I strongly recommand you to use wsl 2 by following tensorflow [tutorial](https://www.tensorflow.org/install/pip)

![image](https://github.com/NotTrueFalse/Captcha_solving/assets/122208389/f238564d-583d-47a3-a698-38221c7d2ca5) 27 | If you are on other platform and don't see any gpus, use tensorflow tutorial as well, I'm not an expert in this kind of situation 28 | 29 | now we are going to talk about actually running the model: 30 | - use `python MODEL_CREATION.py` then wait 31 | - check if there is any problem with the sizen it'll tell you `img_width: [with], img_height: [height], Press Enter to continue...` 32 | if its the good size (I made it so it resize the image to be 2 times smaller and does some cropping, so if you have in input a 720, 360 size, you'll see 529x120 size) then press enter. 33 |
if it doesn't fit, try creating an issue and ask me why. 34 | - if everything is fine, you'll have a popup with image and their labels, if the text doesn't correspond, create an issue. 35 | - after some wait you'll have a text `Train, Press Enter to continue...`, this is the good part, where all the magic happens, after pressing enter it will train your model, all you have to do is wait until its finished. 36 | - then it'll automaticly save the model, and show you a panel of image with their corresponding prediction. At this point you're pretty much done 37 | 38 | # STEP 4 - Re-use the model 39 | - put test captcha as unprocessed (as normal, not preprocessed) in a directory named `test`, then simply run `python main.py` and voilaa, you should see the prediction and the label of the image 40 | - if prediction is bad, try to add more captcha to your dataset 41 | 42 | That's all, if you have any problem or question, create an issue! 43 | -------------------------------------------------------------------------------- /captcha/captcha.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NotTrueFalse/Captcha_solving/89db6fa8a0d33596db16276d4bda35f9e842e06c/captcha/captcha.7z -------------------------------------------------------------------------------- /captcha/xlqg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NotTrueFalse/Captcha_solving/89db6fa8a0d33596db16276d4bda35f9e842e06c/captcha/xlqg.png -------------------------------------------------------------------------------- /generator.js: -------------------------------------------------------------------------------- 1 | var svgCaptcha = require('svg-captcha'); 2 | var fs = require('fs'); 3 | var svg2img = require('svg2img'); 4 | var num = 10000;//desired amount of captcha (10k is very good) 5 | const FORMAT = 'webp'; 6 | const SIZE = [720, 360]; 7 | const out = './temp'; 8 | async function generate(){ 9 | // process.stdout.write(`\r${num} left `); 10 | var captcha = svgCaptcha.create({ 11 | size: 4, 12 | noise: 1, 13 | color: true, 14 | background: '#2E3137', 15 | width: SIZE[0], 16 | height: SIZE[1], 17 | fontSize: 160 18 | }); 19 | svg2img(captcha.data,{ format: FORMAT }, function(error, buffer) { 20 | fs.writeFileSync(`${out}/${captcha.text}.${FORMAT}`, buffer); 21 | console.log(`${out}/${captcha.text}.${FORMAT}`); 22 | }); 23 | if (num > 1){ 24 | num--; 25 | await generate(); 26 | } 27 | } 28 | async function main(){ 29 | await generate(); 30 | } 31 | main(); 32 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from pre_processing import preprocess_image 2 | from prediction import predict 3 | import keras 4 | import os 5 | model = keras.saving.load_model('predi_model.h5') 6 | test_directory = './test' 7 | 8 | for image_path in os.listdir(test_directory): 9 | img = preprocess_image(f"{test_directory}/{image_path}") 10 | prediction = predict(img, model) 11 | label = image_path.split('.')[0].split('/')[-1] 12 | print("Prediction: ", prediction, "Label: ", label) 13 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "generator", 3 | "version": "1.0.0", 4 | "description": "generate captcha to train a model", 5 | "main": "gen.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "ender", 10 | "license": "ISC", 11 | "dependencies": { 12 | "fs": "^0.0.1-security", 13 | "sharp": "^0.33.2", 14 | "svg-captcha": "^1.4.0", 15 | "svg2img": "^1.0.0-beta.2" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /pre_processing.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import cv2 3 | import numpy as np 4 | # import os 5 | 6 | def preprocess_image(image_path:str,ctype:str='svgcaptcha')->Image.Image: 7 | """Preprocess the image to make it easier to read by the OCR""" 8 | result = cv2.imread(image_path, 0) 9 | if ctype == 'svgcaptcha': 10 | #remove 25% from each side 11 | p25 = result.shape[0]/100*28 12 | w,h = result.shape 13 | #precrop 14 | result = result[int(p25*1.43):w-int(p25*0.95), int(p25):h-int(p25*0.9)]# top,bottom left:right 15 | # canny = cv2.Canny(result, 50,50) 16 | mask = np.full(result.shape, 255, dtype=np.uint8) 17 | result_not = cv2.bitwise_not(result) 18 | result_not = cv2.threshold(result_not, 202, 255, cv2.THRESH_BINARY)[1] 19 | baw = cv2.erode(result_not, np.ones((3,3), np.uint8), iterations=1) 20 | contours, hierarchy = cv2.findContours(baw, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)#find the contours 21 | for contour in contours: 22 | if cv2.contourArea(contour) < 2100:#remove the big contours (line that are along the border of the image) 23 | cv2.drawContours(mask, [contour], -1, 0, -1) 24 | baw = cv2.bitwise_and(baw, baw, mask=mask) 25 | kernel = np.ones((1,1), np.uint8) 26 | dilatation = cv2.dilate(mask, kernel, iterations=1) 27 | masked_img = cv2.threshold(dilatation, 240, 255, cv2.THRESH_BINARY)[1] 28 | r = 11#radius of the circle 29 | kernel = np.zeros((r*2,r*2), np.uint8)#create a circle 30 | cv2.circle(kernel, (r-2,r-2), r, 255, -1)#-2 => décalage pour le bas droit des lettres 31 | #dilate the image (to expand the text mask) 32 | masked_img = cv2.erode(masked_img, kernel, iterations=2)#erode to expend on white 33 | #apply the mask on the original image 34 | masked_img = cv2.bitwise_not(masked_img) 35 | result_not[masked_img == 0] = 255#apply the mask and replace the black pixels by white pixels 36 | result = result_not 37 | #find the first pixel of the image who has 0 value 38 | x, y = np.where(result == 0) 39 | result = result[x.min():x.max(), y.min():y.max()] 40 | new_img = np.zeros((120, 529), dtype=np.uint8) 41 | new_img.fill(255) 42 | center_left = (529 - result.shape[1])//2 43 | center_top = (120 - result.shape[0])//2#center the image and embed it in a 529x120 image 44 | new_img[center_top:center_top+result.shape[0], center_left:center_left+result.shape[1]] = result 45 | return Image.fromarray(new_img) 46 | elif ctype == 'pythoncaptcha': 47 | result = cv2.bitwise_not(result) 48 | result = cv2.threshold(result, 35, 255, cv2.THRESH_BINARY)[1] 49 | result = cv2.erode(result, np.ones((2,2), np.uint8), iterations=1) 50 | x,y = np.where(result == 0) 51 | result = result[x.min():x.max(), y.min():y.max()] 52 | #embed it in a 122 by 56 image 53 | new_img = np.zeros((56,122), dtype=np.uint8) 54 | new_img.fill(255) 55 | center_left = (122 - result.shape[1])//2 56 | center_top = (56 - result.shape[0])//2 57 | new_img[center_top:center_top+result.shape[0], center_left:center_left+result.shape[1]] = result 58 | return Image.fromarray(new_img) 59 | # start_top = int(result.shape[1]/100) 60 | # start_left = int(result.shape[0]/100*15) 61 | # result = result[int(start_left):result.shape[0]-int(start_left), int(start_top):result.shape[1]-int(start_top)] 62 | -------------------------------------------------------------------------------- /process_images.py: -------------------------------------------------------------------------------- 1 | from pre_processing import preprocess_image 2 | import os 3 | i = 0 4 | l = len(os.listdir("temp")) 5 | if not os.path.exists('captcha'):os.mkdir('captcha') 6 | for img in os.listdir("temp"): 7 | i += 1 8 | if img.endswith(".png"): 9 | #reverse the color 10 | result = preprocess_image('temp/'+img) 11 | result.save('captcha/'+img) 12 | try: 13 | os.remove('temp/'+img) 14 | except:pass 15 | print(f"[*] {i} images processed {i}/{l}, {round(i/l*100, 2)}%"," "*20, end="\r") 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow 2 | keras 3 | pillow 4 | matplotlib 5 | numpy 6 | pathlib 7 | opencv-python 8 | -------------------------------------------------------------------------------- /temp/xIqg.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NotTrueFalse/Captcha_solving/89db6fa8a0d33596db16276d4bda35f9e842e06c/temp/xIqg.webp --------------------------------------------------------------------------------