├── .github
└── FUNDING.yml
├── LICENSE
├── MODEL_CREATION.py
├── README.md
├── captcha
├── captcha.7z
└── xlqg.png
├── generator.js
├── main.py
├── package.json
├── pre_processing.py
├── process_images.py
├── requirements.txt
└── temp
└── xIqg.webp
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | ko_fi: enderty
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2024, ender
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/MODEL_CREATION.py:
--------------------------------------------------------------------------------
1 | # %% [markdown]
2 | # # OCR model for reading Captchas
3 | #
4 | # **Author:** [A_K_Nain](https://twitter.com/A_K_Nain)
5 | # **Date created:** 2020/06/14
6 | # **Last modified:** 2020/06/26
7 | # **Description:** How to implement an OCR model using CNNs, RNNs and CTC loss.
8 | # %%
9 | import os
10 |
11 | os.environ["KERAS_BACKEND"] = "tensorflow"
12 | import os
13 | import numpy as np
14 | import matplotlib.pyplot as plt
15 | from pathlib import Path
16 | from PIL import Image
17 | import tensorflow as tf
18 | import keras
19 | from keras import layers
20 | gpus = tf.config.experimental.list_physical_devices('GPU')
21 | for gpu in gpus:
22 | print(gpu)
23 | print("Num GPUs Available: ", len(gpus),"IsBuild with cuda: ", tf.test.is_built_with_cuda())
24 | # %%
25 |
26 | data_dir = Path("./captcha/")
27 | images = sorted(list(map(str, list(data_dir.glob("*.png")))))
28 | labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images]
29 | characters = set(char for label in labels for char in label)
30 | characters = sorted(list(characters))
31 |
32 | print("Number of images found: ", len(images))
33 | print("Number of labels found: ", len(labels))
34 | print("Number of unique characters: ", len(characters))
35 | print("Characters present: ", characters)
36 | batch_size = 16
37 | img_width, img_height = Image.open(images[0]).size
38 | input(f"img_width: {img_width}, img_height: {img_height}, Press Enter to continue...")
39 |
40 | # Factor by which the image is going to be downsampled
41 | # by the convolutional blocks. We will be using two
42 | # convolution blocks and each block will have
43 | # a pooling layer which downsample the features by a factor of 2.
44 | # Hence total downsampling factor would be 4.
45 | downsample_factor = 4
46 | # Maximum length of any captcha in the dataset
47 | max_length = max([len(label) for label in labels])
48 | # %%
49 |
50 | # Mapping characters to integers
51 | char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)
52 | # Mapping integers back to original characters
53 | num_to_char = layers.StringLookup(
54 | vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
55 | )
56 |
57 | def split_data(images, labels, train_size=0.9, shuffle=True):
58 | # 1. Get the total size of the dataset
59 | size = len(images)
60 | # 2. Make an indices array and shuffle it, if required
61 | indices = np.arange(size)
62 | if shuffle:
63 | np.random.shuffle(indices)
64 | # 3. Get the size of training samples
65 | train_samples = int(size * train_size)
66 | # 4. Split data into training and validation sets
67 | x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]]
68 | x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
69 | return x_train, x_valid, y_train, y_valid
70 |
71 | # Splitting data into training and validation sets
72 | x_train, x_valid, y_train, y_valid = split_data(np.array(images), np.array(labels))
73 |
74 | def encode_single_sample(img_path, label):
75 | # 1. Read image
76 | img = tf.io.read_file(img_path)
77 | # 2. Decode and convert to grayscale
78 | img = tf.io.decode_png(img, channels=1)
79 | # 3. Convert to float32 in [0, 1] range
80 | img = tf.image.convert_image_dtype(img, tf.float32)
81 | # 4. Resize to the desired size
82 | img = tf.image.resize(img, [img_height, img_width])
83 | # 5. Transpose the image because we want the time
84 | # dimension to correspond to the width of the image.
85 | img = tf.transpose(img, perm=[1, 0, 2])
86 | # 6. Map the characters in label to numbers
87 | label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
88 | # 7. Return a dict as our model is expecting two inputs
89 | return {"image": img, "label": label}
90 |
91 | # %%
92 |
93 | train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
94 | train_dataset = (
95 | train_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
96 | .batch(batch_size)
97 | .prefetch(buffer_size=tf.data.AUTOTUNE)
98 | )
99 |
100 | validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
101 | validation_dataset = (
102 | validation_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
103 | .batch(batch_size)
104 | .prefetch(buffer_size=tf.data.AUTOTUNE)
105 | )
106 |
107 | # %% [markdown]
108 | # ## Visualize the data
109 |
110 | _, ax = plt.subplots(4, 4, figsize=(10, 5))
111 | for batch in train_dataset.take(1):
112 | images = batch["image"]
113 | labels = batch["label"]
114 | for i in range(16):
115 | img = (images[i] * 255).numpy().astype("uint8")
116 | label = tf.strings.reduce_join(num_to_char(labels[i])).numpy().decode("utf-8")
117 | ax[i // 4, i % 4].imshow(img[:, :, 0].T, cmap="gray")
118 | ax[i // 4, i % 4].set_title(label)
119 | ax[i // 4, i % 4].axis("off")
120 | plt.show()
121 |
122 | # %% [markdown]
123 | # ## Model
124 |
125 | def ctc_batch_cost(y_true, y_pred, input_length, label_length):
126 | label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32)
127 | input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32)
128 | sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32)
129 |
130 | y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())
131 |
132 | return tf.expand_dims(
133 | tf.compat.v1.nn.ctc_loss(
134 | inputs=y_pred, labels=sparse_labels, sequence_length=input_length
135 | ),
136 | 1,
137 | )
138 |
139 |
140 | def ctc_label_dense_to_sparse(labels, label_lengths):
141 | label_shape = tf.shape(labels)
142 | num_batches_tns = tf.stack([label_shape[0]])
143 | max_num_labels_tns = tf.stack([label_shape[1]])
144 |
145 | def range_less_than(old_input, current_input):
146 | return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill(
147 | max_num_labels_tns, current_input
148 | )
149 |
150 | init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool)
151 | dense_mask = tf.compat.v1.scan(
152 | range_less_than, label_lengths, initializer=init, parallel_iterations=1
153 | )
154 | dense_mask = dense_mask[:, 0, :]
155 |
156 | label_array = tf.reshape(
157 | tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape
158 | )
159 | label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)
160 |
161 | batch_array = tf.transpose(
162 | tf.reshape(
163 | tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
164 | tf.reverse(label_shape, [0]),
165 | )
166 | )
167 | batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)
168 | indices = tf.transpose(
169 | tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1])
170 | )
171 |
172 | vals_sparse = tf.compat.v1.gather_nd(labels, indices)
173 |
174 | return tf.SparseTensor(
175 | tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64)
176 | )
177 |
178 |
179 | class CTCLayer(layers.Layer):
180 | def __init__(self, name=None):
181 | super().__init__(name=name)
182 | self.loss_fn = ctc_batch_cost
183 |
184 | def call(self, y_true, y_pred):
185 | # Compute the training-time loss value and add it
186 | # to the layer using `self.add_loss()`.
187 | batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
188 | input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
189 | label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
190 |
191 | input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
192 | label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
193 |
194 | loss = self.loss_fn(y_true, y_pred, input_length, label_length)
195 | self.add_loss(loss)
196 |
197 | # At test time, just return the computed predictions
198 | return y_pred
199 |
200 |
201 | def build_model():
202 | # Inputs to the model
203 | input_img = layers.Input(
204 | shape=(img_width, img_height, 1), name="image", dtype="float32"
205 | )
206 | labels = layers.Input(name="label", shape=(None,), dtype="float32")
207 | # First conv block
208 | x = layers.Conv2D(
209 | 32,
210 | (3, 3),
211 | activation="relu",
212 | kernel_initializer="he_normal",
213 | padding="same",
214 | name="Conv1",
215 | )(input_img)
216 | x = layers.MaxPooling2D((2, 2), name="pool1")(x)
217 | # Second conv block
218 | x = layers.Conv2D(
219 | 64,
220 | (3, 3),
221 | activation="relu",
222 | kernel_initializer="he_normal",
223 | padding="same",
224 | name="Conv2",
225 | )(x)
226 | x = layers.MaxPooling2D((2, 2), name="pool2")(x)
227 | # We have used two max pool with pool size and strides 2.
228 | # Hence, downsampled feature maps are 4x smaller. The number of
229 | # filters in the last layer is 64. Reshape accordingly before
230 | # passing the output to the RNN part of the model
231 | new_shape = ((img_width // 4), (img_height // 4) * 64)
232 | x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
233 | x = layers.Dense(64, activation="relu", name="dense1")(x)
234 | x = layers.Dropout(0.2)(x)
235 | # RNNs
236 | x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
237 | x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
238 | # Output layer
239 | x = layers.Dense(
240 | len(char_to_num.get_vocabulary()) + 1, activation="softmax", name="dense2"
241 | )(x)
242 | # Add CTC layer for calculating CTC loss at each step
243 | output = CTCLayer(name="ctc_loss")(labels, x)
244 | # Define the model
245 | model = keras.models.Model(
246 | inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
247 | )
248 | # Optimizer
249 | opt = keras.optimizers.Adam()
250 | # Compile the model and return
251 | model.compile(optimizer=opt)
252 | return model
253 |
254 |
255 | # Get the model
256 | model = build_model()
257 | model.summary()
258 |
259 | # %% [markdown]
260 | # ## Training
261 |
262 | # TODO restore epoch count.
263 | epochs = 100
264 | early_stopping_patience = 10
265 | # Add early stopping
266 | early_stopping = keras.callbacks.EarlyStopping(
267 | monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
268 | )
269 | input("Train, Press Enter to continue...")
270 | # Train the model
271 | history = model.fit(
272 | train_dataset,
273 | validation_data=validation_dataset,
274 | epochs=epochs,
275 | callbacks=[early_stopping],
276 | )
277 | # %% [markdown]
278 | # ## Inference
279 |
280 | def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
281 | input_shape = tf.shape(y_pred)
282 | num_samples, num_steps = input_shape[0], input_shape[1]
283 | y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())
284 | input_length = tf.cast(input_length, tf.int32)
285 |
286 | if greedy:
287 | (decoded, log_prob) = tf.nn.ctc_greedy_decoder(
288 | inputs=y_pred, sequence_length=input_length
289 | )
290 | else:
291 | (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder(
292 | inputs=y_pred,
293 | sequence_length=input_length,
294 | beam_width=beam_width,
295 | top_paths=top_paths,
296 | )
297 | decoded_dense = []
298 | for st in decoded:
299 | st = tf.SparseTensor(st.indices, st.values, (num_samples, num_steps))
300 | decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1))
301 | return (decoded_dense, log_prob)
302 |
303 |
304 | # Get the prediction model by extracting layers till the output layer
305 | prediction_model = keras.models.Model(
306 | model.input[0], model.get_layer(name="dense2").output
307 | )
308 | prediction_model.save("predi_model.h5")#don't save as keras or it'll crash when loading
309 | prediction_model.summary()
310 |
311 |
312 | # A utility function to decode the output of the network
313 | def decode_batch_predictions(pred):
314 | input_len = np.ones(pred.shape[0]) * pred.shape[1]
315 | # Use greedy search. For complex tasks, you can use beam search
316 | results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
317 | :, :max_length
318 | ]
319 | # Iterate over the results and get back the text
320 | output_text = []
321 | for res in results:
322 | res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
323 | output_text.append(res)
324 | return output_text
325 |
326 |
327 | # Let's check results on some validation samples
328 | for batch in validation_dataset.take(1):
329 | batch_images = batch["image"]
330 | batch_labels = batch["label"]
331 |
332 | preds = prediction_model.predict(batch_images)
333 | pred_texts = decode_batch_predictions(preds)
334 |
335 | orig_texts = []
336 | for label in batch_labels:
337 | label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8")
338 | orig_texts.append(label)
339 |
340 | _, ax = plt.subplots(4, 4, figsize=(15, 5))
341 | for i in range(len(pred_texts)):
342 | img = (batch_images[i, :, :, 0] * 255).numpy().astype(np.uint8)
343 | img = img.T
344 | title = f"Prediction: {pred_texts[i]}"
345 | ax[i // 4, i % 4].imshow(img, cmap="gray")
346 | ax[i // 4, i % 4].set_title(title)
347 | ax[i // 4, i % 4].axis("off")
348 | plt.show()
349 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Captcha_solving
2 | All about creating a dataset, preprocessing images, and creating an actual model to solve captcha
3 |
4 |
5 |
6 | # STEP 1 - creating a dataset
7 | 2 Méthodes :
8 | - unzip the captcha.7z archive, and put all image under the `captcha` folder
9 | - OR use generator.js to create your own captcha(s)
10 |
11 | ## generator.js usage
12 | - install generator.js dependencies by using `npm install` inside the same directory as package.json
13 | - change `out = './temp'` to whatever temporary folder you want. Change `SIZE = [720,360]` to the size you want (its in width,height) and then change the `FORMAT = "webp"` to the format you want.
14 | - run it : `node generator.js` , wait until its finished generating captcha.
15 |
16 |
17 | # STEP 2 - pre-processing images for training
18 | - install `opencv-python` using pip, then run process_images by using `python process_images.py` command.
19 |
20 |
21 | # STEP 3 - creation a model
22 | I based my creation on keras [documentation](https://keras.io/examples/vision/captcha_ocr/)
23 | ## configuration stuff:
24 | - first download requirements : `pip install -r requirements.txt`
25 | - second try to run it and see if it detect any gpu devices (if you have one), if it tells you that you have 0 available gpu and you are on windows,
26 | I strongly recommand you to use wsl 2 by following tensorflow [tutorial](https://www.tensorflow.org/install/pip)

27 | If you are on other platform and don't see any gpus, use tensorflow tutorial as well, I'm not an expert in this kind of situation
28 |
29 | now we are going to talk about actually running the model:
30 | - use `python MODEL_CREATION.py` then wait
31 | - check if there is any problem with the sizen it'll tell you `img_width: [with], img_height: [height], Press Enter to continue...`
32 | if its the good size (I made it so it resize the image to be 2 times smaller and does some cropping, so if you have in input a 720, 360 size, you'll see 529x120 size) then press enter.
33 |
if it doesn't fit, try creating an issue and ask me why.
34 | - if everything is fine, you'll have a popup with image and their labels, if the text doesn't correspond, create an issue.
35 | - after some wait you'll have a text `Train, Press Enter to continue...`, this is the good part, where all the magic happens, after pressing enter it will train your model, all you have to do is wait until its finished.
36 | - then it'll automaticly save the model, and show you a panel of image with their corresponding prediction. At this point you're pretty much done
37 |
38 | # STEP 4 - Re-use the model
39 | - put test captcha as unprocessed (as normal, not preprocessed) in a directory named `test`, then simply run `python main.py` and voilaa, you should see the prediction and the label of the image
40 | - if prediction is bad, try to add more captcha to your dataset
41 |
42 | That's all, if you have any problem or question, create an issue!
43 |
--------------------------------------------------------------------------------
/captcha/captcha.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotTrueFalse/Captcha_solving/89db6fa8a0d33596db16276d4bda35f9e842e06c/captcha/captcha.7z
--------------------------------------------------------------------------------
/captcha/xlqg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotTrueFalse/Captcha_solving/89db6fa8a0d33596db16276d4bda35f9e842e06c/captcha/xlqg.png
--------------------------------------------------------------------------------
/generator.js:
--------------------------------------------------------------------------------
1 | var svgCaptcha = require('svg-captcha');
2 | var fs = require('fs');
3 | var svg2img = require('svg2img');
4 | var num = 10000;//desired amount of captcha (10k is very good)
5 | const FORMAT = 'webp';
6 | const SIZE = [720, 360];
7 | const out = './temp';
8 | async function generate(){
9 | // process.stdout.write(`\r${num} left `);
10 | var captcha = svgCaptcha.create({
11 | size: 4,
12 | noise: 1,
13 | color: true,
14 | background: '#2E3137',
15 | width: SIZE[0],
16 | height: SIZE[1],
17 | fontSize: 160
18 | });
19 | svg2img(captcha.data,{ format: FORMAT }, function(error, buffer) {
20 | fs.writeFileSync(`${out}/${captcha.text}.${FORMAT}`, buffer);
21 | console.log(`${out}/${captcha.text}.${FORMAT}`);
22 | });
23 | if (num > 1){
24 | num--;
25 | await generate();
26 | }
27 | }
28 | async function main(){
29 | await generate();
30 | }
31 | main();
32 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from pre_processing import preprocess_image
2 | from prediction import predict
3 | import keras
4 | import os
5 | model = keras.saving.load_model('predi_model.h5')
6 | test_directory = './test'
7 |
8 | for image_path in os.listdir(test_directory):
9 | img = preprocess_image(f"{test_directory}/{image_path}")
10 | prediction = predict(img, model)
11 | label = image_path.split('.')[0].split('/')[-1]
12 | print("Prediction: ", prediction, "Label: ", label)
13 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "generator",
3 | "version": "1.0.0",
4 | "description": "generate captcha to train a model",
5 | "main": "gen.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "author": "ender",
10 | "license": "ISC",
11 | "dependencies": {
12 | "fs": "^0.0.1-security",
13 | "sharp": "^0.33.2",
14 | "svg-captcha": "^1.4.0",
15 | "svg2img": "^1.0.0-beta.2"
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/pre_processing.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | import cv2
3 | import numpy as np
4 | # import os
5 |
6 | def preprocess_image(image_path:str,ctype:str='svgcaptcha')->Image.Image:
7 | """Preprocess the image to make it easier to read by the OCR"""
8 | result = cv2.imread(image_path, 0)
9 | if ctype == 'svgcaptcha':
10 | #remove 25% from each side
11 | p25 = result.shape[0]/100*28
12 | w,h = result.shape
13 | #precrop
14 | result = result[int(p25*1.43):w-int(p25*0.95), int(p25):h-int(p25*0.9)]# top,bottom left:right
15 | # canny = cv2.Canny(result, 50,50)
16 | mask = np.full(result.shape, 255, dtype=np.uint8)
17 | result_not = cv2.bitwise_not(result)
18 | result_not = cv2.threshold(result_not, 202, 255, cv2.THRESH_BINARY)[1]
19 | baw = cv2.erode(result_not, np.ones((3,3), np.uint8), iterations=1)
20 | contours, hierarchy = cv2.findContours(baw, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)#find the contours
21 | for contour in contours:
22 | if cv2.contourArea(contour) < 2100:#remove the big contours (line that are along the border of the image)
23 | cv2.drawContours(mask, [contour], -1, 0, -1)
24 | baw = cv2.bitwise_and(baw, baw, mask=mask)
25 | kernel = np.ones((1,1), np.uint8)
26 | dilatation = cv2.dilate(mask, kernel, iterations=1)
27 | masked_img = cv2.threshold(dilatation, 240, 255, cv2.THRESH_BINARY)[1]
28 | r = 11#radius of the circle
29 | kernel = np.zeros((r*2,r*2), np.uint8)#create a circle
30 | cv2.circle(kernel, (r-2,r-2), r, 255, -1)#-2 => décalage pour le bas droit des lettres
31 | #dilate the image (to expand the text mask)
32 | masked_img = cv2.erode(masked_img, kernel, iterations=2)#erode to expend on white
33 | #apply the mask on the original image
34 | masked_img = cv2.bitwise_not(masked_img)
35 | result_not[masked_img == 0] = 255#apply the mask and replace the black pixels by white pixels
36 | result = result_not
37 | #find the first pixel of the image who has 0 value
38 | x, y = np.where(result == 0)
39 | result = result[x.min():x.max(), y.min():y.max()]
40 | new_img = np.zeros((120, 529), dtype=np.uint8)
41 | new_img.fill(255)
42 | center_left = (529 - result.shape[1])//2
43 | center_top = (120 - result.shape[0])//2#center the image and embed it in a 529x120 image
44 | new_img[center_top:center_top+result.shape[0], center_left:center_left+result.shape[1]] = result
45 | return Image.fromarray(new_img)
46 | elif ctype == 'pythoncaptcha':
47 | result = cv2.bitwise_not(result)
48 | result = cv2.threshold(result, 35, 255, cv2.THRESH_BINARY)[1]
49 | result = cv2.erode(result, np.ones((2,2), np.uint8), iterations=1)
50 | x,y = np.where(result == 0)
51 | result = result[x.min():x.max(), y.min():y.max()]
52 | #embed it in a 122 by 56 image
53 | new_img = np.zeros((56,122), dtype=np.uint8)
54 | new_img.fill(255)
55 | center_left = (122 - result.shape[1])//2
56 | center_top = (56 - result.shape[0])//2
57 | new_img[center_top:center_top+result.shape[0], center_left:center_left+result.shape[1]] = result
58 | return Image.fromarray(new_img)
59 | # start_top = int(result.shape[1]/100)
60 | # start_left = int(result.shape[0]/100*15)
61 | # result = result[int(start_left):result.shape[0]-int(start_left), int(start_top):result.shape[1]-int(start_top)]
62 |
--------------------------------------------------------------------------------
/process_images.py:
--------------------------------------------------------------------------------
1 | from pre_processing import preprocess_image
2 | import os
3 | i = 0
4 | l = len(os.listdir("temp"))
5 | if not os.path.exists('captcha'):os.mkdir('captcha')
6 | for img in os.listdir("temp"):
7 | i += 1
8 | if img.endswith(".png"):
9 | #reverse the color
10 | result = preprocess_image('temp/'+img)
11 | result.save('captcha/'+img)
12 | try:
13 | os.remove('temp/'+img)
14 | except:pass
15 | print(f"[*] {i} images processed {i}/{l}, {round(i/l*100, 2)}%"," "*20, end="\r")
16 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow
2 | keras
3 | pillow
4 | matplotlib
5 | numpy
6 | pathlib
7 | opencv-python
8 |
--------------------------------------------------------------------------------
/temp/xIqg.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NotTrueFalse/Captcha_solving/89db6fa8a0d33596db16276d4bda35f9e842e06c/temp/xIqg.webp
--------------------------------------------------------------------------------