├── .gitignore
├── README.md
├── captchas
    ├── META-INF
    │   └── MANIFEST.MF
    ├── Main.java
    └── README.md
├── requirements.txt
└── solver
    ├── captcha_eval.py
    ├── captcha_gen_default.py
    ├── captcha_input.py
    ├── captcha_model.py
    ├── captcha_multi_gpu_train.py
    ├── captcha_predict.py
    ├── captcha_records.py
    ├── captcha_train.py
    ├── config.py
    └── data
        ├── test_data
            └── .gitkeep
        ├── train_data
            └── .gitkeep
        └── valid_data
            └── .gitkeep


/.gitignore:
--------------------------------------------------------------------------------
1 | captcha_train/
2 | solver/data/
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Tensorflow Captcha Solver
  2 | Solve image based captchas using [Tensorflow](https://www.tensorflow.org/) neural networks. This demo was developed for the [DAHO.AM Conference](https://daho.am/) in Munich, 2017.
  3 | 
  4 | **This repository is unmaintained and the code might not work as expected.** If you are looking for another Tensorflow-based solution, head over to: https://github.com/PatrickLib/captcha_recognize. 
  5 | 
  6 | ## Getting started
  7 | 
  8 | Clone the repository:
  9 | 
 10 | ```
 11 | $ git clone https://github.com/stekhn/tensorflow-captcha-solver.git
 12 | ```
 13 | 
 14 | This guide was written for Mac users, but users might still find it useful. 
 15 | 
 16 | ### Set up Python virtualenv
 17 | 
 18 | Create a new virtual environment:
 19 | 
 20 | ```
 21 | $ virtualenv venv
 22 | ```
 23 | 
 24 | Activate the virtual environment:
 25 | 
 26 | ```
 27 | $ source venv/bin/activate
 28 | ```
 29 | 
 30 | Check if the Python virtual environment is set up correctly:
 31 | 
 32 | ```
 33 | $ which python
 34 | /Users/your-username/Development/venv/env/bin/python
 35 | ```
 36 | 
 37 | Install dependencies:
 38 | 
 39 | ```
 40 | $ pip install -r requirements.txt
 41 | ```
 42 | 
 43 | **Troubleshooting:** Tensorflow could not be found:
 44 | 
 45 | ```
 46 | Could not find a version that satisfies the requirement tensorflow==1.1.0 (from -r requirements.txt (line 51)) (from versions: )
 47 | No matching distribution found for tensorflow==1.1.0 (from -r requirements.txt (line 51))
 48 | ```
 49 | 
 50 | If you encounter this error, try installing Tensorflow from the binary: 
 51 | 
 52 | ```
 53 | $ python -m pip install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.1.0-py2-none-any.whl
 54 | ```
 55 | 
 56 | *Linux or Windows users need to find another download link...*
 57 | 
 58 | ### Generate captchas
 59 | 
 60 | Go to captchas folder:
 61 | 
 62 | ```
 63 | $ cd captchas
 64 | ``` 
 65 | 
 66 | Download [SimpleCaptcha](http://simplecaptcha.sourceforge.net) to the folder:
 67 | 
 68 | ```
 69 | $ curl -O https://vorboss.dl.sourceforge.net/project/simplecaptcha/simplecaptcha-1.2-jdk1.5.jar
 70 | ```
 71 | 
 72 | Extract SimpleCaptcha:
 73 | 
 74 | ```
 75 | $ jar xf simplecaptcha-1.2-jdk1.5.jar
 76 | ```
 77 | 
 78 | Run SimpleCaptcha:
 79 | 
 80 | ```
 81 | $ javac Main.java && java Main
 82 | ```
 83 | 
 84 | ### Train the neural network
 85 | 
 86 | Once you've generated the test data, go to the solver folder:
 87 | 
 88 | ```
 89 | $ cd solver
 90 | ```
 91 | 
 92 | Create the tensorflow records:
 93 | 
 94 | ```
 95 | $ python captcha_records.py 
 96 | ```
 97 | 
 98 | Train the network (Note, that the training runs until you stop it):
 99 | 
100 | ```
101 | $ python captcha_train.py 
102 | ```
103 | 
104 | Evaluate the performance of the network:
105 | 
106 | ```
107 | $ python captcha_eval.py
108 | ```
109 | 
110 | Try to solve some captchas:
111 | 
112 | ```
113 | $ python captcha_predict.py
114 | ```
115 | 
116 | Everything working? Great! Go solve some captchas (on your own machine for developing purposes, 'f course).
117 | 
118 | ### Further info
119 | If you want to see how a neural network is working, check out [Tenserflow Graph Viz](https://www.tensorflow.org/get_started/graph_viz).
120 | 


--------------------------------------------------------------------------------
/captchas/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Ant-Version: Apache Ant 1.7.1
3 | Created-By: 19.1-b02-334 (Apple Inc.)
4 | 
5 | 


--------------------------------------------------------------------------------
/captchas/Main.java:
--------------------------------------------------------------------------------
 1 | import java.awt.FlowLayout;
 2 | import java.awt.image.BufferedImage;
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import java.io.PrintWriter;
 6 | import java.util.LinkedList;
 7 | import java.util.List;
 8 | import java.awt.Color;
 9 | 
10 | import javax.imageio.ImageIO;
11 | import javax.swing.ImageIcon;
12 | import javax.swing.JFrame;
13 | import javax.swing.JLabel;
14 | 
15 | import nl.captcha.Captcha;
16 | import nl.captcha.Captcha.Builder;
17 | import nl.captcha.backgrounds.GradiatedBackgroundProducer;
18 | import nl.captcha.backgrounds.FlatColorBackgroundProducer;
19 | import nl.captcha.gimpy.DropShadowGimpyRenderer;
20 | import nl.captcha.gimpy.FishEyeGimpyRenderer;
21 | import nl.captcha.noise.StraightLineNoiseProducer;
22 | import nl.captcha.text.producer.ChineseTextProducer;
23 | import nl.captcha.text.producer.DefaultTextProducer;
24 | 
25 | 
26 | public class Main {
27 | 
28 | 	public static void main(String[] args) throws IOException {
29 | 		// TODO Auto-generated method stub // Required! Always! lol
30 | 		img2ds("../solver/data/train_data/",50000);
31 | 		img2ds("../solver/data/valid_data/",20000);
32 | 		img2ds("../solver/data/test_data/",1000);
33 | 
34 | 	}
35 | 
36 | 	public static void img2ds(String dir, int N) throws IOException
37 | 	{
38 | 		Color c=new Color(1f,0f,0f,.0f);
39 | 		for(int i=1;i<=N;i++)
40 | 		{
41 | 			if(i%100==0)System.out.println(i+","+N);
42 | 			Captcha cap = new Captcha.Builder(150, 50)
43 | 			     .addText()
44 | 			     .gimp(new FishEyeGimpyRenderer(c, c))
45 | 			     .build();
46 | 			img2file(dir+"/"+cap.getAnswer()+".png",cap.getImage());
47 | 		}
48 | 	}
49 | 
50 | 	public static void img2disp(BufferedImage img)
51 | 	{
52 | 
53 | 		JFrame frame = new JFrame();
54 | 		frame.getContentPane().setLayout(new FlowLayout());
55 | 		frame.getContentPane().add(new JLabel(new ImageIcon(img)));
56 | 		frame.pack();
57 | 		frame.setVisible(true);
58 | 	}
59 | 
60 | 	public static void img2file(String fileName, BufferedImage img) throws IOException
61 | 	{
62 | 		ImageIO.write(img, "png", new File(fileName));
63 | 
64 | 	}
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/captchas/README.md:
--------------------------------------------------------------------------------
1 | Run with javac Main.java && java Main


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.3
 2 | appnope==0.1.0
 3 | backports-abc==0.5
 4 | backports.shutil-get-terminal-size==1.0.0
 5 | bleach==3.3.0
 6 | certifi==2017.4.17
 7 | configparser==3.5.0
 8 | decorator==4.0.11
 9 | entrypoints==0.2.2
10 | enum34==1.1.6
11 | funcsigs==1.0.2
12 | functools32==3.2.3.post2
13 | html5lib==0.999999999
14 | ipykernel==4.6.1
15 | ipython==5.3.0
16 | ipython-genutils==0.2.0
17 | ipywidgets==6.0.0
18 | Jinja2==2.11.3
19 | jsonschema==2.6.0
20 | jupyter==1.0.0
21 | jupyter-client==5.0.1
22 | jupyter-console==5.1.0
23 | jupyter-core==4.3.0
24 | MarkupSafe==1.0
25 | mistune==0.8.1
26 | mock==2.0.0
27 | nbconvert==5.2.1
28 | nbformat==4.3.0
29 | notebook==6.1.5
30 | numpy==1.12.1
31 | olefile==0.44
32 | packaging==16.8
33 | pandocfilters==1.4.1
34 | pathlib2==2.2.1
35 | pbr==3.0.1
36 | pexpect==4.2.1
37 | pickleshare==0.7.4
38 | Pillow==8.1.1
39 | prompt-toolkit==1.0.14
40 | protobuf==3.3.0
41 | ptyprocess==0.5.1
42 | Pygments==2.7.4
43 | pyparsing==2.2.0
44 | python-dateutil==2.6.0
45 | pyzmq==16.0.2
46 | qtconsole==4.3.0
47 | scandir==1.5
48 | simplegeneric==0.8.1
49 | singledispatch==3.4.0.3
50 | six==1.10.0
51 | tensorflow==1.15.4
52 | terminado==0.6
53 | testpath==0.3.1
54 | tornado==4.5.1
55 | traitlets==4.3.2
56 | wcwidth==0.1.7
57 | webencodings==0.5.1
58 | Werkzeug==0.15.3
59 | widgetsnbextension==2.0.0
60 | 


--------------------------------------------------------------------------------
/solver/captcha_eval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from datetime import datetime
 6 | import argparse
 7 | import sys
 8 | import math
 9 | 
10 | import tensorflow as tf
11 | import captcha_model as captcha
12 | 
13 | FLAGS = None
14 | 
15 | def run_eval():
16 |   with tf.Graph().as_default(), tf.device('/cpu:0'):
17 |     images, labels = captcha.inputs(train=False, batch_size=FLAGS.batch_size)
18 |     logits = captcha.inference(images, keep_prob=1)
19 |     eval_correct = captcha.evaluation(logits, labels)  
20 |     sess = tf.Session()    
21 |     saver = tf.train.Saver()    
22 |     saver.restore(sess, tf.train.latest_checkpoint(FLAGS.checkpoint_dir))
23 |     coord = tf.train.Coordinator()
24 |     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
25 |     try:
26 |       num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
27 |       true_count = 0
28 |       total_true_count = 0
29 |       total_sample_count = num_iter * FLAGS.batch_size
30 |       step = 0
31 |       print('>> loop: %d, total_sample_count: %d' % (num_iter, total_sample_count))
32 |       while step < num_iter and not coord.should_stop():
33 |         true_count = sess.run(eval_correct)
34 |         total_true_count += true_count
35 |         precision = true_count / FLAGS.batch_size
36 |         print('>> %s Step %d: true/total: %d/%d precision @ 1 = %.3f'
37 |                     %(datetime.now(), step, true_count, FLAGS.batch_size, precision))
38 |         step += 1
39 |       precision = total_true_count / total_sample_count
40 |       print('>> %s true/total: %d/%d precision @ 1 = %.3f'
41 |                     %(datetime.now(), total_true_count, total_sample_count, precision))       
42 |     except Exception as e:
43 |       coord.request_stop(e)
44 |     finally:
45 |       coord.request_stop()
46 |     coord.join(threads)
47 |     sess.close()
48 | 
49 | 
50 | def main(_):
51 |   if tf.gfile.Exists(FLAGS.eval_dir):
52 |     tf.gfile.DeleteRecursively(FLAGS.eval_dir)
53 |   tf.gfile.MakeDirs(FLAGS.eval_dir)
54 |   run_eval()
55 | 
56 | 
57 | if __name__ == '__main__':
58 |   parser = argparse.ArgumentParser()
59 |   parser.add_argument(
60 |       '--num_examples',
61 |       type=int,
62 |       default=20000,
63 |       help='Number of examples to run validation.'
64 |   )
65 |   parser.add_argument(
66 |       '--batch_size',
67 |       type=int,
68 |       default=100,
69 |       help='Batch size.'
70 |   )
71 |   parser.add_argument(
72 |       '--checkpoint_dir',
73 |       type=str,
74 |       default='./captcha_train',
75 |       help='Directory where to restore checkpoint.'
76 |   )
77 |   parser.add_argument(
78 |       '--eval_dir',
79 |       type=str,
80 |       default='./captcha_eval',
81 |       help='Directory where to write event logs.'
82 |   )
83 |   FLAGS, unparsed = parser.parse_known_args()
84 |   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
85 | 


--------------------------------------------------------------------------------
/solver/captcha_gen_default.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import argparse
 6 | import random
 7 | import os
 8 | from captcha.image import ImageCaptcha
 9 | 
10 | import config
11 | 
12 | IMAGE_HEIGHT = config.IMAGE_HEIGHT
13 | IMAGE_WIDTH = config.IMAGE_WIDTH
14 | CHARS_NUM = config.CHARS_NUM
15 | 
16 | TEST_SIZE = 1000
17 | TRAIN_SIZE = 50000
18 | VALID_SIZE = 20000
19 | 
20 | FLAGS = None
21 | 
22 | def gen(gen_dir, total_size, chars_num):
23 |   if not os.path.exists(gen_dir):
24 |     os.makedirs(gen_dir)
25 |   image = ImageCaptcha(width=IMAGE_WIDTH, height=IMAGE_HEIGHT,font_sizes=[40])
26 |   # must be subset of config.CHAR_SETS
27 |   char_sets = 'ABCDEFGHIJKLMNPQRSTUVWXYZ'
28 |   for i in xrange(total_size):
29 |     label = ''.join(random.sample(char_sets, chars_num))
30 |     image.write(label, os.path.join(gen_dir, label+'_num'+str(i)+'.png'))
31 | 
32 | 
33 | if __name__ == '__main__':
34 |   parser = argparse.ArgumentParser()
35 |   parser.add_argument(
36 |       '--test_dir',
37 |       type=str,
38 |       default='./data/test_data',
39 |       help='Directory testing to generate captcha data files'
40 |   )
41 |   parser.add_argument(
42 |       '--train_dir',
43 |       type=str,
44 |       default='./data/train_data',
45 |       help='Directory training to generate captcha data files'
46 |   )
47 |   parser.add_argument(
48 |       '--valid_dir',
49 |       type=str,
50 |       default='./data/valid_data',
51 |       help='Directory validation to generate captcha data files'
52 |   )
53 |   FLAGS, unparsed = parser.parse_known_args()
54 |   print('>> generate %d captchas in %s' % (TEST_SIZE, FLAGS.test_dir))
55 |   gen(FLAGS.test_dir, TEST_SIZE, CHARS_NUM)
56 |   print ('>> generate %d captchas in %s' % (TRAIN_SIZE, FLAGS.train_dir))
57 |   gen(FLAGS.train_dir, TRAIN_SIZE, CHARS_NUM)
58 |   print ('>> generate %d captchas in %s' % (VALID_SIZE, FLAGS.valid_dir))
59 |   gen(FLAGS.valid_dir, VALID_SIZE, CHARS_NUM)
60 |   print ('>> generate Done!')
61 | 


--------------------------------------------------------------------------------
/solver/captcha_input.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import os.path
 6 | import tensorflow as tf
 7 | 
 8 | import config
 9 | 
10 | RECORD_DIR = config.RECORD_DIR
11 | TRAIN_FILE = config.TRAIN_FILE
12 | VALID_FILE = config.VALID_FILE
13 | 
14 | IMAGE_WIDTH = config.IMAGE_WIDTH
15 | IMAGE_HEIGHT = config.IMAGE_HEIGHT
16 | CLASSES_NUM = config.CLASSES_NUM
17 | CHARS_NUM = config.CHARS_NUM
18 | 
19 | def read_and_decode(filename_queue):
20 |   reader = tf.TFRecordReader()
21 |   _, serialized_example = reader.read(filename_queue)
22 |   features = tf.parse_single_example(
23 |       serialized_example,
24 |       features={
25 |           'image_raw': tf.FixedLenFeature([], tf.string),
26 |           'label_raw': tf.FixedLenFeature([], tf.string),
27 |       })
28 |   image = tf.decode_raw(features['image_raw'], tf.int16)
29 |   image.set_shape([IMAGE_HEIGHT * IMAGE_WIDTH])
30 |   image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
31 |   reshape_image = tf.reshape(image, [IMAGE_HEIGHT, IMAGE_WIDTH, 1])
32 |   label = tf.decode_raw(features['label_raw'], tf.uint8)
33 |   label.set_shape([CHARS_NUM * CLASSES_NUM])
34 |   reshape_label = tf.reshape(label, [CHARS_NUM, CLASSES_NUM])
35 |   return tf.cast(reshape_image, tf.float32), tf.cast(reshape_label, tf.float32)
36 | 
37 | 
38 | def inputs(train, batch_size):
39 |   filename = os.path.join(RECORD_DIR,
40 |                           TRAIN_FILE if train else VALID_FILE)
41 | 
42 |   with tf.name_scope('input'):
43 |     filename_queue = tf.train.string_input_producer([filename])
44 |     image, label = read_and_decode(filename_queue)
45 |     if train:
46 |         images, sparse_labels = tf.train.shuffle_batch([image, label],
47 |                                                        batch_size=batch_size,
48 |                                                        num_threads=6,
49 |                                                        capacity=2000 + 3 * batch_size,
50 |                                                        min_after_dequeue=2000)
51 |     else:
52 |         images, sparse_labels = tf.train.batch([image, label],
53 |                                                batch_size=batch_size,
54 |                                                num_threads=6,
55 |                                                capacity=2000 + 3 * batch_size)
56 | 
57 |     return images, sparse_labels
58 | 


--------------------------------------------------------------------------------
/solver/captcha_model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | import captcha_input
  7 | import config
  8 | 
  9 | IMAGE_WIDTH = config.IMAGE_WIDTH
 10 | IMAGE_HEIGHT = config.IMAGE_HEIGHT
 11 | CLASSES_NUM = config.CLASSES_NUM
 12 | CHARS_NUM = config.CHARS_NUM
 13 | 
 14 | def inputs(train, batch_size):
 15 |     return captcha_input.inputs(train, batch_size=batch_size)
 16 | 
 17 | 
 18 | def _conv2d(value, weight):
 19 |   """conv2d returns a 2d convolution layer with full stride."""
 20 |   return tf.nn.conv2d(value, weight, strides=[1, 1, 1, 1], padding='SAME')
 21 | 
 22 | 
 23 | def _max_pool_2x2(value, name):
 24 |   """max_pool_2x2 downsamples a feature map by 2X."""
 25 |   return tf.nn.max_pool(value, ksize=[1, 2, 2, 1],
 26 |                         strides=[1, 2, 2, 1], padding='SAME', name=name)
 27 | 
 28 | 
 29 | def _weight_variable(name, shape):
 30 |   """weight_variable generates a weight variable of a given shape."""
 31 |   with tf.device('/cpu:0'):
 32 |     initializer = tf.truncated_normal_initializer(stddev=0.1)
 33 |     var = tf.get_variable(name,shape,initializer=initializer, dtype=tf.float32)
 34 |   return var
 35 | 
 36 | 
 37 | def _bias_variable(name, shape):
 38 |   """bias_variable generates a bias variable of a given shape."""
 39 |   with tf.device('/cpu:0'):
 40 |     initializer = tf.constant_initializer(0.1)
 41 |     var = tf.get_variable(name, shape, initializer=initializer,dtype=tf.float32)
 42 |   return var
 43 |   
 44 | def inference(images, keep_prob):
 45 |   images = tf.reshape(images, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
 46 |   
 47 |   with tf.variable_scope('conv1') as scope:
 48 |     kernel = _weight_variable('weights', shape=[3,3,1,64])
 49 |     biases = _bias_variable('biases',[64])
 50 |     pre_activation = tf.nn.bias_add(_conv2d(images, kernel),biases)
 51 |     conv1 = tf.nn.relu(pre_activation, name=scope.name)
 52 |     
 53 |   pool1 = _max_pool_2x2(conv1, name='pool1')
 54 |   
 55 |   with tf.variable_scope('conv2') as scope:
 56 |     kernel = _weight_variable('weights', shape=[3,3,64,64])
 57 |     biases = _bias_variable('biases',[64])
 58 |     pre_activation = tf.nn.bias_add(_conv2d(pool1, kernel),biases)
 59 |     conv2 = tf.nn.relu(pre_activation, name=scope.name)
 60 |     
 61 |   pool2 = _max_pool_2x2(conv2, name='pool2')
 62 |   
 63 |   with tf.variable_scope('conv3') as scope:
 64 |     kernel = _weight_variable('weights', shape=[3,3,64,64])
 65 |     biases = _bias_variable('biases',[64])
 66 |     pre_activation = tf.nn.bias_add(_conv2d(pool2, kernel),biases)
 67 |     conv3 = tf.nn.relu(pre_activation, name=scope.name)
 68 |     
 69 |   pool3 = _max_pool_2x2(conv3, name='pool3')
 70 |   
 71 |   with tf.variable_scope('conv4') as scope:
 72 |     kernel = _weight_variable('weights', shape=[3,3,64,64])
 73 |     biases = _bias_variable('biases',[64])
 74 |     pre_activation = tf.nn.bias_add(_conv2d(pool3, kernel),biases)
 75 |     conv4 = tf.nn.relu(pre_activation, name=scope.name)
 76 |     
 77 |   pool4 = _max_pool_2x2(conv4, name='pool4')
 78 |   
 79 |   with tf.variable_scope('local1') as scope:
 80 |     batch_size = images.get_shape()[0].value
 81 |     reshape = tf.reshape(pool4, [batch_size,-1])
 82 |     dim = reshape.get_shape()[1].value
 83 |     weights = _weight_variable('weights', shape=[dim,1024])
 84 |     biases = _bias_variable('biases',[1024])
 85 |     local1 = tf.nn.relu(tf.matmul(reshape,weights) + biases, name=scope.name)
 86 | 
 87 |   local1_drop = tf.nn.dropout(local1, keep_prob)
 88 | 
 89 |   with tf.variable_scope('softmax_linear') as scope:
 90 |     weights = _weight_variable('weights',shape=[1024,CHARS_NUM*CLASSES_NUM])
 91 |     biases = _bias_variable('biases',[CHARS_NUM*CLASSES_NUM])
 92 |     softmax_linear = tf.add(tf.matmul(local1_drop,weights), biases, name=scope.name)
 93 | 
 94 |   return tf.reshape(softmax_linear, [-1, CHARS_NUM, CLASSES_NUM])
 95 | 
 96 | 
 97 | def loss(logits, labels):
 98 |   cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
 99 |                   labels=labels, logits=logits, name='corss_entropy_per_example')
100 |   cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
101 |   tf.add_to_collection('losses', cross_entropy_mean)
102 |   return tf.add_n(tf.get_collection('losses'), name='total_loss')
103 | 
104 | 
105 | def training(loss):
106 |   optimizer = tf.train.AdamOptimizer(1e-4)
107 |   train_op = optimizer.minimize(loss)
108 |   return train_op
109 | 
110 | 
111 | def evaluation(logits, labels):
112 |   correct_prediction = tf.equal(tf.argmax(logits,2), tf.argmax(labels,2))
113 |   correct_batch = tf.reduce_mean(tf.cast(correct_prediction, tf.int32), 1)
114 |   return tf.reduce_sum(tf.cast(correct_batch, tf.float32))
115 | 
116 | 
117 | def output(logits):
118 |   return tf.argmax(logits, 2)
119 | 
120 | 


--------------------------------------------------------------------------------
/solver/captcha_multi_gpu_train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import time
  6 | from datetime import datetime
  7 | import argparse
  8 | import sys
  9 | 
 10 | import numpy as np
 11 | from six.moves import xrange 
 12 | import tensorflow as tf
 13 | import captcha_model as captcha
 14 | 
 15 | FLAGS = None
 16 | 
 17 | def tower_loss(scope, keep_prob):
 18 |   images, labels = captcha.inputs(train=True, batch_size=FLAGS.batch_size)
 19 |   logits = captcha.inference(images, keep_prob)
 20 |   _ = captcha.loss(logits, labels)
 21 |   losses = tf.get_collection('losses', scope)
 22 |   total_loss = tf.add_n(losses, name='total_loss')
 23 |   return total_loss
 24 | 
 25 | 
 26 | def average_gradients(tower_grads):
 27 |   average_grads = []
 28 |   for grad_and_vars in zip(*tower_grads):
 29 |     grads = []
 30 |     for g, _ in grad_and_vars:
 31 |       expanded_g = tf.expand_dims(g, 0)
 32 |       grads.append(expanded_g)
 33 |     grad = tf.concat(axis=0, values=grads)
 34 |     grad = tf.reduce_mean(grad, 0)
 35 |     v = grad_and_vars[0][1]
 36 |     grad_and_var = (grad, v)
 37 |     average_grads.append(grad_and_var)
 38 |   return average_grads
 39 | 
 40 | 
 41 | def run_train():
 42 |   with tf.Graph().as_default(), tf.device('/cpu:0'):
 43 |     opt = tf.train.AdamOptimizer(1e-4)
 44 |     tower_grads = []
 45 |     with tf.variable_scope(tf.get_variable_scope()):
 46 |       for i in xrange(FLAGS.num_gpus):
 47 |         with tf.device('/gpu:%d' % i):
 48 |           with tf.name_scope('tower_%d' % (i)) as scope:
 49 |             loss = tower_loss(scope, keep_prob=0.5)
 50 |             tf.get_variable_scope().reuse_variables()
 51 |             grads = opt.compute_gradients(loss)
 52 |             tower_grads.append(grads)
 53 | 
 54 |     grads = average_gradients(tower_grads)
 55 |     train_op = opt.apply_gradients(grads)
 56 |     saver = tf.train.Saver(tf.global_variables())
 57 |     init = tf.global_variables_initializer()
 58 |     sess = tf.Session(config=tf.ConfigProto(
 59 |         allow_soft_placement=True,
 60 |         log_device_placement=True))
 61 |     
 62 |     sess.run(init)
 63 |     tf.train.start_queue_runners(sess=sess)
 64 |     for step in xrange(FLAGS.max_steps):
 65 |       start_time = time.time()
 66 |       _, loss_value = sess.run([train_op, loss])
 67 |       duration = time.time() - start_time
 68 | 
 69 |       assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
 70 | 
 71 |       if step % 10 == 0:
 72 |         num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
 73 |         examples_per_sec = num_examples_per_step / duration
 74 |         sec_per_batch = duration / FLAGS.num_gpus
 75 | 
 76 |         format_str = ('>> %s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
 77 |                       'sec/batch)')
 78 |         print (format_str % (datetime.now(), step, loss_value,
 79 |                              examples_per_sec, sec_per_batch))
 80 | 
 81 |       if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
 82 |         print('>> %s Saving in %s' % (datetime.now(), FLAGS.checkpoint))
 83 |         saver.save(sess, FLAGS.checkpoint, global_step=step)
 84 |    
 85 |         
 86 | def main(_):
 87 |   if tf.gfile.Exists(FLAGS.train_dir):
 88 |     tf.gfile.DeleteRecursively(FLAGS.train_dir)
 89 |   tf.gfile.MakeDirs(FLAGS.train_dir)
 90 |   run_train()
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 |   parser = argparse.ArgumentParser()
 95 |   parser.add_argument(
 96 |       '--batch_size',
 97 |       type=int,
 98 |       default=128,
 99 |       help='Batch size.'
100 |   )
101 |   parser.add_argument(
102 |       '--max_steps',
103 |       type=int,
104 |       default=1000000,
105 |       help='Number of batches to run.'
106 |   )
107 |   parser.add_argument(
108 |       '--num_gpus',
109 |       type=int,
110 |       default=8,
111 |       help='How many GPUs to use.'
112 |   )
113 |   parser.add_argument(
114 |       '--train_dir',
115 |       type=str,
116 |       default='./captcha_train',
117 |       help='Directory where to write event logs.'
118 |   )
119 |   parser.add_argument(
120 |       '--checkpoint',
121 |       type=str,
122 |       default='./captcha_train/captcha',
123 |       help='Directory where to write checkpoint.'
124 |   )
125 |   FLAGS, unparsed = parser.parse_known_args()
126 |   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
127 | 


--------------------------------------------------------------------------------
/solver/captcha_predict.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import sys
  7 | import os.path
  8 | from datetime import datetime
  9 | from PIL import Image
 10 | import numpy as np
 11 | 
 12 | import tensorflow as tf
 13 | from tensorflow.python.platform import gfile
 14 | import captcha_model as captcha
 15 | 
 16 | import config
 17 | 
 18 | IMAGE_WIDTH = config.IMAGE_WIDTH
 19 | IMAGE_HEIGHT = config.IMAGE_HEIGHT
 20 | 
 21 | CHAR_SETS = config.CHAR_SETS
 22 | CLASSES_NUM = config.CLASSES_NUM
 23 | CHARS_NUM = config.CHARS_NUM
 24 | 
 25 | FLAGS = None
 26 | 
 27 | def one_hot_to_texts(recog_result):
 28 |   texts = []
 29 |   for i in xrange(recog_result.shape[0]):
 30 |     index = recog_result[i]
 31 |     texts.append(''.join([CHAR_SETS[i] for i in index]))
 32 |   return texts
 33 | 
 34 | 
 35 | def input_data(image_dir):
 36 |   if not gfile.Exists(image_dir):
 37 |     print(">> Image director '" + image_dir + "' not found.")
 38 |     return None
 39 |   extensions = ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']
 40 |   print(">> Looking for images in '" + image_dir + "'")
 41 |   file_list = []
 42 |   for extension in extensions:
 43 |     file_glob = os.path.join(image_dir, '*.' + extension)
 44 |     file_list.extend(gfile.Glob(file_glob))
 45 |   if not file_list:
 46 |     print(">> No files found in '" + image_dir + "'")
 47 |     return None
 48 |   batch_size = len(file_list)
 49 |   images = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH], dtype='float32')
 50 |   files = []
 51 |   i = 0
 52 |   for file_name in file_list:
 53 |     image = Image.open(file_name)
 54 |     image_gray = image.convert('L')
 55 |     image_resize = image_gray.resize(size=(IMAGE_WIDTH,IMAGE_HEIGHT))
 56 |     image.close()
 57 |     input_img = np.array(image_resize, dtype='float32')
 58 |     input_img = np.multiply(input_img.flatten(), 1./255) - 0.5    
 59 |     images[i,:] = input_img
 60 |     base_name = os.path.basename(file_name)
 61 |     files.append(base_name)
 62 |     i += 1
 63 |   return images, files
 64 | 
 65 | 
 66 | def run_predict():
 67 |   with tf.Graph().as_default(), tf.device('/cpu:0'):
 68 |     input_images, input_filenames = input_data(FLAGS.captcha_dir)
 69 |     images = tf.constant(input_images)
 70 |     logits = captcha.inference(images, keep_prob=1)
 71 |     result = captcha.output(logits)
 72 |     saver = tf.train.Saver()
 73 |     sess = tf.Session()
 74 |     saver.restore(sess, tf.train.latest_checkpoint(FLAGS.checkpoint_dir))
 75 |     print(tf.train.latest_checkpoint(FLAGS.checkpoint_dir))
 76 |     recog_result = sess.run(result)
 77 |     sess.close()
 78 |     text = one_hot_to_texts(recog_result)
 79 |     total_count = len(input_filenames)
 80 |     true_count = 0.
 81 |     for i in range(total_count):
 82 |       print('image ' + input_filenames[i] + " precise ----> '" + text[i] + "'")
 83 |       if text[i] in input_filenames[i]:
 84 |         true_count += 1
 85 |     precision = true_count / total_count
 86 |     print('%s true/total: %d/%d precision @ 1 = %.3f'
 87 |                     %(datetime.now(), true_count, total_count, precision))
 88 | 
 89 | 
 90 | def main(_):
 91 |   run_predict()
 92 | 
 93 | if __name__ == '__main__':
 94 |   parser = argparse.ArgumentParser()
 95 |   parser.add_argument(
 96 |       '--checkpoint_dir',
 97 |       type=str,
 98 |       default='./captcha_train',
 99 |       help='Directory where to restore checkpoint.'
100 |   )
101 |   parser.add_argument(
102 |       '--captcha_dir',
103 |       type=str,
104 |       default='./data/test_data',
105 |       help='Directory where to get captcha images.'
106 |   )
107 |   FLAGS, unparsed = parser.parse_known_args()
108 |   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
109 | 


--------------------------------------------------------------------------------
/solver/captcha_records.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import os.path
  7 | import sys
  8 | 
  9 | from PIL import Image
 10 | 
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | from tensorflow.python.platform import gfile
 14 | import config
 15 | 
 16 | IMAGE_HEIGHT = config.IMAGE_HEIGHT
 17 | IMAGE_WIDTH = config.IMAGE_WIDTH
 18 | CHAR_SETS = config.CHAR_SETS
 19 | CLASSES_NUM = config.CLASSES_NUM
 20 | CHARS_NUM = config.CHARS_NUM
 21 | 
 22 | RECORD_DIR = config.RECORD_DIR
 23 | TRAIN_FILE = config.TRAIN_FILE
 24 | VALID_FILE = config.VALID_FILE
 25 | 
 26 | FLAGS = None
 27 | 
 28 | def _int64_feature(value):
 29 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 30 | 
 31 | 
 32 | def _bytes_feature(value):
 33 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 34 | 
 35 |   
 36 | def label_to_one_hot(label):
 37 |   label = label.split(".")[0]
 38 |   one_hot_label = np.zeros([CHARS_NUM, CLASSES_NUM])
 39 |   offset = []
 40 |   index = []
 41 |   for i, c in enumerate(label):
 42 |     offset.append(i)
 43 |     index.append(CHAR_SETS.index(c))
 44 |   one_hot_index = [offset, index]
 45 |   one_hot_label[one_hot_index] = 1.0
 46 |   return one_hot_label.astype(np.uint8)
 47 | 
 48 | 
 49 | def conver_to_tfrecords(data_set, name):
 50 |   """Converts a dataset to tfrecords."""
 51 |   if not os.path.exists(RECORD_DIR):
 52 |       os.makedirs(RECORD_DIR)
 53 |   filename = os.path.join(RECORD_DIR, name)
 54 |   print('>> Writing', filename)
 55 |   writer = tf.python_io.TFRecordWriter(filename)
 56 |   num_examples = len(data_set)
 57 |   for index in range(num_examples):
 58 |     image = data_set[index][0]
 59 |     height = image.shape[0]
 60 |     width = image.shape[1]
 61 |     image_raw = image.tostring()
 62 |     label = data_set[index][1]
 63 |     label_raw = label_to_one_hot(label).tostring()
 64 |     example = tf.train.Example(features=tf.train.Features(feature={
 65 |         'height': _int64_feature(height),
 66 |         'width': _int64_feature(width),
 67 |         'label_raw': _bytes_feature(label_raw),
 68 |         'image_raw': _bytes_feature(image_raw)}))
 69 |     writer.write(example.SerializeToString())
 70 |   writer.close()
 71 |   print('>> Writing Done!')
 72 |   
 73 | 
 74 | def create_data_list(image_dir):
 75 |   if not gfile.Exists(image_dir):
 76 |     print("Image director '" + image_dir + "' not found.")
 77 |     return None
 78 |   extensions = ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']
 79 |   print("Looking for images in '" + image_dir + "'")
 80 |   file_list = []
 81 |   for extension in extensions:
 82 |     file_glob = os.path.join(image_dir, '*.' + extension)
 83 |     file_list.extend(gfile.Glob(file_glob))
 84 |   if not file_list:
 85 |     print("No files found in '" + image_dir + "'")
 86 |     return None
 87 |   images = []
 88 |   labels = []
 89 |   for file_name in file_list:
 90 |     image = Image.open(file_name)
 91 |     image_gray = image.convert('L')
 92 |     image_resize = image_gray.resize(size=(IMAGE_WIDTH,IMAGE_HEIGHT))
 93 |     input_img = np.array(image_resize, dtype='int16')
 94 |     image.close()
 95 |     label_name = os.path.basename(file_name).split('_')[0]
 96 |     images.append(input_img)
 97 |     labels.append(label_name)
 98 |   return zip(images, labels)
 99 | 
100 | 
101 | def main(_):
102 |   training_data = create_data_list(FLAGS.train_dir)
103 |   conver_to_tfrecords(training_data, TRAIN_FILE)
104 |     
105 |   validation_data = create_data_list(FLAGS.valid_dir)
106 |   conver_to_tfrecords(validation_data, VALID_FILE)
107 | 
108 | 
109 | if __name__ == '__main__':
110 |   parser = argparse.ArgumentParser()
111 |   parser.add_argument(
112 |       '--train_dir',
113 |       type=str,
114 |       default='./data/train_data',
115 |       help='Directory training to get captcha data files and write the converted result.'
116 |   )
117 |   parser.add_argument(
118 |       '--valid_dir',
119 |       type=str,
120 |       default='./data/valid_data',
121 |       help='Directory validation to get captcha data files and write the converted result.'
122 |   )
123 |   FLAGS, unparsed = parser.parse_known_args()
124 |   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
125 | 


--------------------------------------------------------------------------------
/solver/captcha_train.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import time
 6 | from datetime import datetime
 7 | import argparse
 8 | import sys
 9 | 
10 | import tensorflow as tf
11 | import captcha_model as captcha
12 | 
13 | FLAGS = None
14 | 
15 | def run_train():
16 |   """Train CAPTCHA for a number of steps."""
17 | 
18 |   with tf.Graph().as_default():
19 |     images, labels = captcha.inputs(train=True, batch_size=FLAGS.batch_size)
20 | 
21 |     logits = captcha.inference(images, keep_prob=0.5)
22 | 
23 |     loss = captcha.loss(logits, labels)
24 | 
25 |     train_op = captcha.training(loss)
26 | 
27 |     saver = tf.train.Saver(tf.global_variables())
28 | 
29 |     init_op = tf.group(tf.global_variables_initializer(),
30 |                        tf.local_variables_initializer())
31 | 
32 |     sess = tf.Session()
33 | 
34 |     sess.run(init_op)
35 | 
36 |     coord = tf.train.Coordinator()
37 |     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
38 |     try:
39 |       step = 0
40 |       while not coord.should_stop():
41 |         start_time = time.time()
42 |         _, loss_value = sess.run([train_op, loss])
43 |         duration = time.time() - start_time
44 |         if step % 10 == 0:
45 |           print('>> Step %d run_train: loss = %.2f (%.3f sec)' % (step, loss_value,
46 |                                                      duration))
47 |         if step % 100 == 0:
48 |           print('>> %s Saving in %s' % (datetime.now(), FLAGS.checkpoint))
49 |           saver.save(sess, FLAGS.checkpoint, global_step=step)
50 |         step += 1
51 |     except Exception as e:
52 |       print('>> %s Saving in %s' % (datetime.now(), FLAGS.checkpoint))
53 |       saver.save(sess, FLAGS.checkpoint, global_step=step)
54 |       coord.request_stop(e)
55 |     finally:
56 |       coord.request_stop()
57 |     coord.join(threads)
58 |     sess.close()
59 | 
60 | 
61 | def main(_):
62 |   if tf.gfile.Exists(FLAGS.train_dir):
63 |     tf.gfile.DeleteRecursively(FLAGS.train_dir)
64 |   tf.gfile.MakeDirs(FLAGS.train_dir)
65 |   run_train()
66 | 
67 | 
68 | if __name__ == '__main__':
69 |   parser = argparse.ArgumentParser()
70 |   parser.add_argument(
71 |       '--batch_size',
72 |       type=int,
73 |       default=128,
74 |       help='Batch size.'
75 |   )
76 |   parser.add_argument(
77 |       '--train_dir',
78 |       type=str,
79 |       default='./captcha_train',
80 |       help='Directory where to write event logs.'
81 |   )
82 |   parser.add_argument(
83 |       '--checkpoint',
84 |       type=str,
85 |       default='./captcha_train/captcha',
86 |       help='Directory where to write checkpoint.'
87 |   )
88 |   FLAGS, unparsed = parser.parse_known_args()
89 |   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
90 | 


--------------------------------------------------------------------------------
/solver/config.py:
--------------------------------------------------------------------------------
 1 | # about captcha image
 2 | IMAGE_HEIGHT = 50
 3 | IMAGE_WIDTH = 150
 4 | CHAR_SETS = 'abcdefghijklmnpqrstuvwxyz123456789ABCDEFGHIJKLMNPQRSTUVWXYZ'
 5 | CLASSES_NUM = len(CHAR_SETS)
 6 | CHARS_NUM = 5
 7 | # for train
 8 | RECORD_DIR = './data'
 9 | TRAIN_FILE = 'train.tfrecords'
10 | VALID_FILE = 'valid.tfrecords'
11 | 


--------------------------------------------------------------------------------
/solver/data/test_data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stekhn/tensorflow-captcha-solver/7d256f283e418a826aff4fb1e76d43138129a75b/solver/data/test_data/.gitkeep


--------------------------------------------------------------------------------
/solver/data/train_data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stekhn/tensorflow-captcha-solver/7d256f283e418a826aff4fb1e76d43138129a75b/solver/data/train_data/.gitkeep


--------------------------------------------------------------------------------
/solver/data/valid_data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stekhn/tensorflow-captcha-solver/7d256f283e418a826aff4fb1e76d43138129a75b/solver/data/valid_data/.gitkeep


--------------------------------------------------------------------------------