├── utils ├── __init__.py ├── utils.pyc ├── __init__.pyc └── utils.py ├── README ├── train ├── multiOutputCNN.py └── SimpleCnnRnn.py └── test └── predict.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- -------------------------------------------------------------------------------- /utils/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Slyne/SimpleCaptchaRecognition/HEAD/utils/utils.pyc -------------------------------------------------------------------------------- /utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Slyne/SimpleCaptchaRecognition/HEAD/utils/__init__.pyc -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 一共有两个模型。一个模型是把CNN的输出和GRU(RNN)连接在一起,序列的输出结果;还有一个是CNN的输出分成5个类,分别输出结果。 2 | 实验结果: 16000个训练数据 + 4000个测试数据 3 | CNN + GRU : 93.2% 4 | CNN 多分类: 99.99% 5 | 6 | 数据说明: 7 | 数据是Data Castle的验证码识别,type2的数据 8 | 运行说明: 9 | 1. 训练数据和模型在http://pan.baidu.com/s/1jIdnG70 10 | 下载解压后,放到项目文件夹下 11 | 12 | 13 | 其它: 14 | 这两个模型只能用于5个字符的验证码。 15 | 这里训练的模型都没有分割图片(因为我懒),虽然效果还可以,不过只能适用于这种类型的验证码,所以还是要多加点各种类型的验证码会比较通用。 16 | 因为只能用于5个字符,所以不太通用,可以考虑用seq2seq包,把cnn作为encoder,rnn作为decoder, 17 | rnn的每一个time step的输出结果作为后一个time step的输入,这样就可以很长了(虽然我感觉正确性堪忧)。【20170619 更正: 效果还是不错的,详细可以参照另一个项目】 18 | 另外我试了一下把cnn的输出直接复制5份作为GRU的输入,直接训练的效果很诡异,一直不收敛,大概是我参数调的有问题,所以我又多加了一个GRU, 19 | 尽可能的不要让输出的那一层的输入是一样的。 20 | 21 | -------------------------------------------------------------------------------- /train/multiOutputCNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from keras.callbacks import ModelCheckpoint, Callback 4 | from keras.optimizers import SGD 5 | from utils.utils import load_data, create_multiOutputCnn 6 | 7 | images, labels = load_data() 8 | print labels.shape 9 | digit1 = labels[:,0,:] 10 | digit2 = labels[:,1,:] 11 | digit3 = labels[:,2,:] 12 | digit4 = labels[:,3,:] 13 | digit5 = labels[:,4,:] 14 | 15 | model = create_multiOutputCnn() 16 | sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True) 17 | model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) 18 | checkpointer= ModelCheckpoint(filepath="../model/weights.{epoch:02d}.hdf5") 19 | 20 | model.fit(images, [digit1, digit2, digit3, digit4, digit5], validation_split=0.2, shuffle=True,batch_size=16, nb_epoch=20,callbacks=[checkpointer]) 21 | model.save("../model/model2.hdf5") -------------------------------------------------------------------------------- /test/predict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from scipy import misc 4 | import numpy as np 5 | from utils.utils import create_simpleCnnRnn, create_multiOutputCnn 6 | 7 | image_model = create_simpleCnnRnn() 8 | # image_model = create_multiOutputCnn() 9 | image_model.load_weights("../model/CnnRnn.hdf5") 10 | 11 | predir = "../Data/type2_train/" 12 | pics = [] 13 | base = 16001 14 | for i in range(4000): 15 | index = str(base + i) 16 | pic = misc.imread(predir + "type2_train_"+index+".jpg") 17 | pic = np.rollaxis(pic, 2, 0) 18 | pics.append(pic) 19 | pics = np.asarray(pics) 20 | 21 | result_all = image_model.predict(pics) 22 | 23 | with open("../Data/labels", "rb") as f: 24 | labels = np.load(f) 25 | 26 | correct = 0 27 | val_labels = labels[16000:] 28 | for (true, predict) in zip(val_labels,result_all): 29 | predict_value = np.argmax(predict, axis=1) 30 | if list(true) == list(predict_value): 31 | correct += 1 32 | else: 33 | print true, 34 | print predict_value 35 | print correct 36 | -------------------------------------------------------------------------------- /train/SimpleCnnRnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from keras.callbacks import ModelCheckpoint, Callback 4 | from keras.optimizers import SGD 5 | from utils.utils import create_simpleCnnRnn, load_data 6 | import numpy as np 7 | 8 | image_model = create_simpleCnnRnn() 9 | sgd = SGD(lr=0.0002, decay=1e-6, momentum=0.9, nesterov=True) 10 | image_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) 11 | images, labels = load_data() # categorical labels 12 | 13 | 14 | val_testx = images[-4000:] 15 | with open("../Data/labels", "rb") as f: 16 | index_labels = np.load(f) 17 | val_testy = index_labels[-4000:] 18 | 19 | 20 | class ValidateAcc(Callback): 21 | def on_epoch_end(self, epoch, logs={}): 22 | print '\n———————————--------' 23 | image_model.load_weights('../model/weights.%02d.hdf5' % epoch) 24 | r = image_model.predict(val_testx, verbose=0) 25 | y_predict = np.asarray([np.argmax(i, axis=1) for i in r]) 26 | length = len(y_predict) * 1.0 27 | correct = 0 28 | for (true,predict) in zip(val_testy,y_predict): 29 | if list(true) == list(predict): 30 | correct += 1 31 | print "Validation set acc is: ", correct/length 32 | print '\n———————————--------' 33 | 34 | 35 | val_acc_check_pointer = ValidateAcc() 36 | check_pointer = ModelCheckpoint(filepath="../model/weights.{epoch:02d}.hdf5") 37 | 38 | image_model.fit(images, labels, 39 | validation_split=0.2, # split data into 4:1 the last 4000(0.2*20000) is used as val data set 40 | shuffle=True,batch_size=16, nb_epoch=20,callbacks=[check_pointer, val_acc_check_pointer]) 41 | image_model.save("../model/model2.hdf5") 42 | 43 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | from keras.utils.np_utils import to_categorical 5 | from keras.layers import Convolution2D, MaxPooling2D, GRU, TimeDistributed 6 | from keras.layers import Dense, Dropout, Activation, Flatten, RepeatVector,Input 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.models import Sequential,Model 9 | from keras import backend as K 10 | K.set_image_dim_ordering("th") 11 | vocab_size = 36 # 26 + 10 12 | C, H, W = 3, 60, 250 13 | max_caption_len = 5 14 | 15 | 16 | def load_data(): 17 | with open("../Data/pic", "rb") as f: 18 | images = np.load(f) 19 | with open("../Data/labels", "rb") as f: 20 | labels = np.load(f) 21 | labels_categorical = np.asarray([to_categorical(label, vocab_size) for label in labels]) 22 | print "images shape", images.shape 23 | # print images[0] 24 | print "input labels shape", labels_categorical.shape 25 | return images,labels_categorical 26 | 27 | 28 | def create_simpleCnnRnn(): 29 | image_model = Sequential() 30 | # input: 100x100 images with 3 channels -> (3, 100, 100) tensors. 31 | # this applies 32 convolution filters of size 3x3 each. 32 | image_model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(C, H, W))) 33 | image_model.add(BatchNormalization()) 34 | image_model.add(Activation('relu')) 35 | image_model.add(Convolution2D(32, 3, 3)) 36 | image_model.add(BatchNormalization()) 37 | image_model.add(Activation('relu')) 38 | image_model.add(MaxPooling2D(pool_size=(2, 2))) 39 | # image_model.add(Dropout(0.25)) 40 | image_model.add(Convolution2D(64, 3, 3, border_mode='valid')) 41 | image_model.add(BatchNormalization()) 42 | image_model.add(Activation('relu')) 43 | image_model.add(Convolution2D(64, 3, 3)) 44 | image_model.add(BatchNormalization()) 45 | image_model.add(Activation('relu')) 46 | image_model.add(MaxPooling2D(pool_size=(2, 2))) 47 | # image_model.add(Dropout(0.25)) 48 | image_model.add(Flatten()) 49 | # Note: Keras does automatic shape inference. 50 | image_model.add(Dense(128)) 51 | image_model.add(RepeatVector(max_caption_len)) 52 | image_model.add(GRU(output_dim=128, return_sequences=True)) 53 | image_model.add(GRU(output_dim=128, return_sequences=True)) 54 | image_model.add(TimeDistributed(Dense(vocab_size))) 55 | image_model.add(Activation('softmax')) 56 | return image_model 57 | 58 | 59 | def create_multiOutputCnn(): 60 | image_model = Sequential() 61 | # input: 100x100 images with 3 channels -> (3, 100, 100) tensors. 62 | # this applies 32 convolution filters of size 3x3 each. 63 | image_model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(C, H, W))) 64 | image_model.add(BatchNormalization()) 65 | image_model.add(Activation('relu')) 66 | image_model.add(Convolution2D(32, 3, 3)) 67 | image_model.add(BatchNormalization()) 68 | image_model.add(Activation('relu')) 69 | image_model.add(MaxPooling2D(pool_size=(2, 2))) 70 | # image_model.add(Dropout(0.25)) 71 | image_model.add(Convolution2D(64, 3, 3, border_mode='valid')) 72 | image_model.add(BatchNormalization()) 73 | image_model.add(Activation('relu')) 74 | image_model.add(Convolution2D(64, 3, 3)) 75 | image_model.add(BatchNormalization()) 76 | image_model.add(Activation('relu')) 77 | image_model.add(MaxPooling2D(pool_size=(2, 2))) 78 | # image_model.add(Dropout(0.25)) 79 | image_model.add(Flatten()) 80 | # Note: Keras does automatic shape inference. 81 | image_input = Input(shape=(C, H, W)) 82 | encoded_image = image_model(image_input) 83 | out1 = Dense(128, activation="relu")(encoded_image) 84 | out2 = Dense(128, activation="relu")(encoded_image) 85 | out3 = Dense(128, activation="relu")(encoded_image) 86 | out4 = Dense(128, activation="relu")(encoded_image) 87 | out5 = Dense(128, activation="relu")(encoded_image) 88 | output1 = Dense(vocab_size, activation="softmax")(out1) 89 | output2 = Dense(vocab_size, activation="softmax")(out2) 90 | output3 = Dense(vocab_size, activation="softmax")(out3) 91 | output4 = Dense(vocab_size, activation="softmax")(out4) 92 | output5 = Dense(vocab_size, activation="softmax")(out5) 93 | model = Model([image_input], [output1, output2, output3, output4, output5]) 94 | return model 95 | --------------------------------------------------------------------------------