├── utils
    ├── __init__.py
    ├── utils.pyc
    ├── __init__.pyc
    └── utils.py
├── README
├── train
    ├── multiOutputCNN.py
    └── SimpleCnnRnn.py
└── test
    └── predict.py


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-


--------------------------------------------------------------------------------
/utils/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Slyne/SimpleCaptchaRecognition/HEAD/utils/utils.pyc


--------------------------------------------------------------------------------
/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Slyne/SimpleCaptchaRecognition/HEAD/utils/__init__.pyc


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | 一共有两个模型。一个模型是把CNN的输出和GRU（RNN）连接在一起，序列的输出结果;还有一个是CNN的输出分成5个类，分别输出结果。
 2 | 实验结果:   16000个训练数据 + 4000个测试数据
 3 | CNN + GRU : 93.2%
 4 | CNN 多分类: 99.99%
 5 | 
 6 | 数据说明:
 7 | 数据是Data Castle的验证码识别，type2的数据
 8 | 运行说明:
 9 | 1. 训练数据和模型在http://pan.baidu.com/s/1jIdnG70
10 |    下载解压后，放到项目文件夹下
11 | 
12 | 
13 | 其它：
14 | 这两个模型只能用于5个字符的验证码。
15 | 这里训练的模型都没有分割图片(因为我懒)，虽然效果还可以，不过只能适用于这种类型的验证码，所以还是要多加点各种类型的验证码会比较通用。
16 | 因为只能用于5个字符，所以不太通用，可以考虑用seq2seq包，把cnn作为encoder，rnn作为decoder，
17 | rnn的每一个time step的输出结果作为后一个time step的输入，这样就可以很长了(虽然我感觉正确性堪忧)。【20170619 更正: 效果还是不错的，详细可以参照另一个项目】
18 | 另外我试了一下把cnn的输出直接复制5份作为GRU的输入，直接训练的效果很诡异，一直不收敛，大概是我参数调的有问题，所以我又多加了一个GRU，
19 | 尽可能的不要让输出的那一层的输入是一样的。
20 | 
21 | 


--------------------------------------------------------------------------------
/train/multiOutputCNN.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from keras.callbacks import ModelCheckpoint, Callback
 4 | from keras.optimizers import SGD
 5 | from utils.utils import load_data, create_multiOutputCnn
 6 | 
 7 | images, labels = load_data()
 8 | print labels.shape
 9 | digit1 = labels[:,0,:]
10 | digit2 = labels[:,1,:]
11 | digit3 = labels[:,2,:]
12 | digit4 = labels[:,3,:]
13 | digit5 = labels[:,4,:]
14 | 
15 | model = create_multiOutputCnn()
16 | sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
17 | model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
18 | checkpointer= ModelCheckpoint(filepath="../model/weights.{epoch:02d}.hdf5")
19 | 
20 | model.fit(images, [digit1, digit2, digit3, digit4, digit5], validation_split=0.2, shuffle=True,batch_size=16, nb_epoch=20,callbacks=[checkpointer])
21 | model.save("../model/model2.hdf5")


--------------------------------------------------------------------------------
/test/predict.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from scipy import misc
 4 | import numpy as np
 5 | from utils.utils import create_simpleCnnRnn, create_multiOutputCnn
 6 | 
 7 | image_model = create_simpleCnnRnn()
 8 | # image_model = create_multiOutputCnn()
 9 | image_model.load_weights("../model/CnnRnn.hdf5")
10 | 
11 | predir = "../Data/type2_train/"
12 | pics = []
13 | base = 16001
14 | for i in range(4000):
15 |     index = str(base + i)
16 |     pic = misc.imread(predir + "type2_train_"+index+".jpg")
17 |     pic = np.rollaxis(pic, 2, 0)
18 |     pics.append(pic)
19 | pics = np.asarray(pics)
20 | 
21 | result_all = image_model.predict(pics)
22 | 
23 | with open("../Data/labels", "rb") as f:
24 |     labels = np.load(f)
25 | 
26 | correct = 0
27 | val_labels = labels[16000:]
28 | for (true, predict) in zip(val_labels,result_all):
29 |     predict_value = np.argmax(predict, axis=1)
30 |     if list(true) == list(predict_value):
31 |         correct += 1
32 |     else:
33 |         print true,
34 |         print predict_value
35 | print correct
36 | 


--------------------------------------------------------------------------------
/train/SimpleCnnRnn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from keras.callbacks import ModelCheckpoint, Callback
 4 | from keras.optimizers import SGD
 5 | from utils.utils import create_simpleCnnRnn, load_data
 6 | import numpy as np
 7 | 
 8 | image_model = create_simpleCnnRnn()
 9 | sgd = SGD(lr=0.0002, decay=1e-6, momentum=0.9, nesterov=True)
10 | image_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
11 | images, labels = load_data()  # categorical labels
12 | 
13 | 
14 | val_testx = images[-4000:]
15 | with open("../Data/labels", "rb") as f:
16 |     index_labels = np.load(f)
17 | val_testy = index_labels[-4000:]
18 | 
19 | 
20 | class ValidateAcc(Callback):
21 |     def on_epoch_end(self, epoch, logs={}):
22 |         print '\n———————————--------'
23 |         image_model.load_weights('../model/weights.%02d.hdf5' % epoch)
24 |         r = image_model.predict(val_testx, verbose=0)
25 |         y_predict = np.asarray([np.argmax(i, axis=1) for i in r])
26 |         length = len(y_predict) * 1.0
27 |         correct = 0
28 |         for (true,predict) in zip(val_testy,y_predict):
29 |             if list(true) == list(predict):
30 |                 correct += 1
31 |         print "Validation set acc is: ", correct/length
32 |         print '\n———————————--------'
33 | 
34 | 
35 | val_acc_check_pointer = ValidateAcc()
36 | check_pointer = ModelCheckpoint(filepath="../model/weights.{epoch:02d}.hdf5")
37 | 
38 | image_model.fit(images, labels,
39 |                 validation_split=0.2,  # split data into 4:1  the last 4000(0.2*20000) is used as val data set
40 |                 shuffle=True,batch_size=16, nb_epoch=20,callbacks=[check_pointer, val_acc_check_pointer])
41 | image_model.save("../model/model2.hdf5")
42 | 
43 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import numpy as np
 4 | from keras.utils.np_utils import to_categorical
 5 | from keras.layers import Convolution2D, MaxPooling2D, GRU, TimeDistributed
 6 | from keras.layers import Dense, Dropout, Activation, Flatten, RepeatVector,Input
 7 | from keras.layers.normalization import BatchNormalization
 8 | from keras.models import Sequential,Model
 9 | from keras import backend as K
10 | K.set_image_dim_ordering("th")
11 | vocab_size = 36  # 26 + 10
12 | C, H, W = 3, 60, 250
13 | max_caption_len = 5
14 | 
15 | 
16 | def load_data():
17 |     with open("../Data/pic", "rb") as f:
18 |         images = np.load(f)
19 |     with open("../Data/labels", "rb") as f:
20 |         labels = np.load(f)
21 |     labels_categorical = np.asarray([to_categorical(label, vocab_size) for label in labels])
22 |     print "images shape", images.shape
23 |     # print images[0]
24 |     print "input labels shape", labels_categorical.shape
25 |     return images,labels_categorical
26 | 
27 | 
28 | def create_simpleCnnRnn():
29 |     image_model = Sequential()
30 |     # input: 100x100 images with 3 channels -> (3, 100, 100) tensors.
31 |     # this applies 32 convolution filters of size 3x3 each.
32 |     image_model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(C, H, W)))
33 |     image_model.add(BatchNormalization())
34 |     image_model.add(Activation('relu'))
35 |     image_model.add(Convolution2D(32, 3, 3))
36 |     image_model.add(BatchNormalization())
37 |     image_model.add(Activation('relu'))
38 |     image_model.add(MaxPooling2D(pool_size=(2, 2)))
39 |     # image_model.add(Dropout(0.25))
40 |     image_model.add(Convolution2D(64, 3, 3, border_mode='valid'))
41 |     image_model.add(BatchNormalization())
42 |     image_model.add(Activation('relu'))
43 |     image_model.add(Convolution2D(64, 3, 3))
44 |     image_model.add(BatchNormalization())
45 |     image_model.add(Activation('relu'))
46 |     image_model.add(MaxPooling2D(pool_size=(2, 2)))
47 |     # image_model.add(Dropout(0.25))
48 |     image_model.add(Flatten())
49 |     # Note: Keras does automatic shape inference.
50 |     image_model.add(Dense(128))
51 |     image_model.add(RepeatVector(max_caption_len))
52 |     image_model.add(GRU(output_dim=128, return_sequences=True))
53 |     image_model.add(GRU(output_dim=128, return_sequences=True))
54 |     image_model.add(TimeDistributed(Dense(vocab_size)))
55 |     image_model.add(Activation('softmax'))
56 |     return image_model
57 | 
58 | 
59 | def create_multiOutputCnn():
60 |     image_model = Sequential()
61 |     # input: 100x100 images with 3 channels -> (3, 100, 100) tensors.
62 |     # this applies 32 convolution filters of size 3x3 each.
63 |     image_model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(C, H, W)))
64 |     image_model.add(BatchNormalization())
65 |     image_model.add(Activation('relu'))
66 |     image_model.add(Convolution2D(32, 3, 3))
67 |     image_model.add(BatchNormalization())
68 |     image_model.add(Activation('relu'))
69 |     image_model.add(MaxPooling2D(pool_size=(2, 2)))
70 |     # image_model.add(Dropout(0.25))
71 |     image_model.add(Convolution2D(64, 3, 3, border_mode='valid'))
72 |     image_model.add(BatchNormalization())
73 |     image_model.add(Activation('relu'))
74 |     image_model.add(Convolution2D(64, 3, 3))
75 |     image_model.add(BatchNormalization())
76 |     image_model.add(Activation('relu'))
77 |     image_model.add(MaxPooling2D(pool_size=(2, 2)))
78 |     # image_model.add(Dropout(0.25))
79 |     image_model.add(Flatten())
80 |     # Note: Keras does automatic shape inference.
81 |     image_input = Input(shape=(C, H, W))
82 |     encoded_image = image_model(image_input)
83 |     out1 = Dense(128, activation="relu")(encoded_image)
84 |     out2 = Dense(128, activation="relu")(encoded_image)
85 |     out3 = Dense(128, activation="relu")(encoded_image)
86 |     out4 = Dense(128, activation="relu")(encoded_image)
87 |     out5 = Dense(128, activation="relu")(encoded_image)
88 |     output1 = Dense(vocab_size, activation="softmax")(out1)
89 |     output2 = Dense(vocab_size, activation="softmax")(out2)
90 |     output3 = Dense(vocab_size, activation="softmax")(out3)
91 |     output4 = Dense(vocab_size, activation="softmax")(out4)
92 |     output5 = Dense(vocab_size, activation="softmax")(out5)
93 |     model = Model([image_input], [output1, output2, output3, output4, output5])
94 |     return model
95 | 


--------------------------------------------------------------------------------