├── .gitignore └── discern ├── .idea ├── discern.iml ├── misc.xml ├── modules.xml └── vcs.xml ├── __init__.py ├── convert.py ├── loader.py ├── model.py ├── settings.py ├── train.py └── verifies.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .idea/ 3 | train/ 4 | test/ 5 | predict/ 6 | images/ 7 | *.py[cod] -------------------------------------------------------------------------------- /discern/.idea/discern.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /discern/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /discern/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /discern/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /discern/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asyncins/captcha_cnn/833aae637578eb67ab9d1ad13146b37369f6f9ba/discern/__init__.py -------------------------------------------------------------------------------- /discern/convert.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from settings import CHARACTER, CAPTCHA_NUMBER 3 | 4 | 5 | def one_hot_encode(value: list) -> tuple: 6 | """编码,将字符转为独热码 7 | vector为独热码,order用于解码 8 | """ 9 | order = [] 10 | shape = CAPTCHA_NUMBER * len(CHARACTER) 11 | vector = numpy.zeros(shape, dtype=float) 12 | for k, v in enumerate(value): 13 | index = k * len(CHARACTER) + CHARACTER.get(v) 14 | vector[index] = 1.0 15 | order.append(index) 16 | return vector, order 17 | 18 | 19 | def one_hot_decode(value: list) -> str: 20 | """解码,将独热码转为字符 21 | """ 22 | res = [] 23 | for ik, iv in enumerate(value): 24 | val = iv - ik * len(CHARACTER) if ik else iv 25 | for k, v in CHARACTER.items(): 26 | if val == int(v): 27 | res.append(k) 28 | break 29 | return "".join(res) 30 | 31 | 32 | if __name__ == '__main__': 33 | code = '0A2JYD' 34 | vec, orders = one_hot_encode(code) 35 | print('将%s进行特征数字化处理' % code) 36 | print('特征数字化结果:%s' % vec) 37 | print('字符位置:%s' % orders) 38 | print('根据特征数字化时的字符位置进行解码,解码结果为:%s' % one_hot_decode(orders)) 39 | 40 | -------------------------------------------------------------------------------- /discern/loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from PIL import Image 3 | from torch.utils.data import Dataset 4 | from torch.utils.data import DataLoader 5 | from torchvision import transforms 6 | 7 | from convert import one_hot_encode 8 | from settings import * 9 | 10 | 11 | class ImageDataSet(Dataset): 12 | """ 图片加载和处理 """ 13 | 14 | def __init__(self, folder): 15 | self.transform = transforms.Compose([ 16 | # 图片灰度处理 17 | transforms.Grayscale(), 18 | # 把一个取值范围是[0,255]的PIL.Image对象转换成取值范围是[0,1.0]的Tensor对象 19 | transforms.ToTensor() 20 | ]) 21 | self.folder = folder 22 | # 从传入的文件夹路径中载入指定后缀为IMAGE_TYPE值的文件 23 | self.images = list(pathlib.Path(folder).glob('*.{}'.format(IMAGE_TYPE))) 24 | 25 | def __len__(self): 26 | return len(self.images) 27 | 28 | def __getitem__(self, idx): 29 | image_path = self.images[idx] 30 | image = self.transform(Image.open(image_path)) 31 | # 获取独热码和字符位置列表 32 | vector, order = one_hot_encode(image_path.stem) 33 | label = torch.from_numpy(vector) 34 | return image, label, order 35 | 36 | 37 | def loaders(folder: str, size: int) -> object: 38 | # 包装数据和目标张量的数据集 39 | objects = ImageDataSet(folder) 40 | return DataLoader(objects, batch_size=size, shuffle=True) 41 | -------------------------------------------------------------------------------- /discern/model.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Module 2 | from torch.nn import Sequential 3 | from torch.nn import Conv2d 4 | from torch.nn import BatchNorm2d 5 | from torch.nn import Dropout 6 | from torch.nn import ReLU 7 | from torch.nn import MaxPool2d 8 | from torch.nn import Linear 9 | from settings import * 10 | 11 | 12 | class CaptchaModelCNN(Module): 13 | """用于识别验证码的卷积神经网络""" 14 | 15 | def __init__(self): 16 | super(CaptchaModelCNN, self).__init__() 17 | 18 | # 设定参数 19 | self.pool = 2 # 最大池化 20 | self.padding = 1 # 矩形边的补充层数 21 | self.dropout = 0.5 # 随机概率 22 | self.kernel_size = 3 # 卷积核大小 3x3 23 | 24 | # 卷积池化 25 | self.layer1 = Sequential( 26 | # 时序容器Sequential,参数按顺序传入 27 | # 2维卷积层,卷积核大小为self.kernel_size,边的补充层数为self.padding 28 | Conv2d(1, 32, kernel_size=self.kernel_size, padding=self.padding), 29 | # 对小批量3d数据组成的4d输入进行批标准化(Batch Normalization)操作 30 | BatchNorm2d(32), 31 | # 随机将输入张量中部分元素设置为0,随机概率为self.dropout。 32 | Dropout(self.dropout), 33 | # 对输入数据运用修正线性单元函数 34 | ReLU(), 35 | # 最大池化 36 | MaxPool2d(2)) 37 | 38 | # 卷积池化 39 | self.layer2 = Sequential( 40 | Conv2d(32, 64, kernel_size=self.kernel_size, padding=self.padding), 41 | BatchNorm2d(64), 42 | Dropout(self.dropout), 43 | ReLU(), 44 | MaxPool2d(2)) 45 | 46 | # 卷积池化 47 | self.layer3 = Sequential( 48 | Conv2d(64, 64, kernel_size=self.kernel_size, padding=self.padding), 49 | BatchNorm2d(64), 50 | Dropout(self.dropout), 51 | ReLU(), 52 | MaxPool2d(2)) 53 | 54 | # 全连接 55 | self.fc = Sequential( 56 | Linear((IMAGE_WIDTH // 8) * (IMAGE_HEIGHT // 8) * 64, 1024), 57 | Dropout(self.dropout), 58 | ReLU()) 59 | self.rfc = Sequential(Linear(1024, CAPTCHA_NUMBER * len(CHARACTER))) 60 | 61 | def forward(self, x): 62 | out = self.layer1(x) 63 | out = self.layer2(out) 64 | out = self.layer3(out) 65 | out = out.view(out.size(0), -1) 66 | out = self.fc(out) 67 | out = self.rfc(out) 68 | return out 69 | -------------------------------------------------------------------------------- /discern/settings.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from os import path 3 | 4 | 5 | # 路径 6 | PARENT_LAYER = pathlib.Path.cwd().parent 7 | 8 | # 数字与大写字母混合 9 | NUMBER = [str(_) for _ in range(0, 10)] 10 | LETTER = [chr(_).upper() for _ in range(97, 123)] 11 | CHARACTER = {v: k for k, v in enumerate(NUMBER + LETTER)} 12 | 13 | # 图片路径 14 | PATH_IMAGE = path.join(PARENT_LAYER, 'images') 15 | PATH_TRAIN = path.join(PATH_IMAGE, 'train') 16 | PATH_TEST = path.join(PATH_IMAGE, 'test') 17 | PATH_PREDICT = path.join(PATH_IMAGE, 'predict') 18 | 19 | # 图片规格 20 | CAPTCHA_NUMBER = 6 21 | IMAGE_HEIGHT = 40 22 | IMAGE_WIDTH = 200 23 | IMAGE_TYPE = 'png' 24 | 25 | # 训练参数 26 | EPOCHS = 15 27 | BATCH_SIZE = 32 28 | RATE = 0.001 29 | MODEL_NAME = 'result.pkl' 30 | 31 | -------------------------------------------------------------------------------- /discern/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import logging 3 | from torch.nn import MultiLabelSoftMarginLoss 4 | from torch.autograd import Variable 5 | from torch.optim import Adam 6 | from settings import * 7 | from model import CaptchaModelCNN 8 | from loader import loaders 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | 12 | def start_train(): 13 | # 使用自定义的卷积神经网络训练 14 | model = CaptchaModelCNN().cuda() 15 | model.train() # 训练模式 16 | logging.info('Train start') 17 | # 损失函数 18 | criterion = MultiLabelSoftMarginLoss() 19 | # Adam算法 20 | optimizer = Adam(model.parameters(), lr=RATE) 21 | ids = loaders(PATH_TRAIN, BATCH_SIZE) 22 | logging.info('Iteration is %s' % len(ids)) 23 | for epoch in range(EPOCHS): 24 | for i, (image, label, order) in enumerate(ids): 25 | # 包装Tensor对象并记录其operations 26 | images = Variable(image).cuda() 27 | labels = Variable(label.float()).cuda() 28 | predict_labels = model(images) 29 | loss = criterion(predict_labels, labels) 30 | # 保持当前参数状态并基于计算得到的梯度进行参数更新。 31 | optimizer.zero_grad() 32 | loss.backward() 33 | optimizer.step() 34 | i += 1 35 | if i % 100 == 0: 36 | logging.info("epoch:%s, step:%s, loss:%s" % (epoch, i, loss.item())) 37 | # 保存训练结果 38 | torch.save(model.state_dict(), MODEL_NAME) 39 | # 保存训练结果 40 | torch.save(model.state_dict(), MODEL_NAME) 41 | logging.info('Train done') 42 | 43 | 44 | if __name__ == '__main__': 45 | start_train() 46 | -------------------------------------------------------------------------------- /discern/verifies.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import torch 3 | import logging 4 | from torch.autograd import Variable 5 | 6 | from settings import * 7 | from convert import one_hot_decode 8 | from loader import loaders 9 | from model import CaptchaModelCNN 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | 14 | def start_verifies(folder): 15 | model = CaptchaModelCNN().cuda() 16 | model.eval() # 预测模式 17 | # 载入模型 18 | model.load_state_dict(torch.load(MODEL_NAME)) 19 | logging.info('load cnn model') 20 | verifies = loaders(folder, 1) 21 | correct, total, current, cha_len, = 0, 0, 0, len(CHARACTER) 22 | for i, (image, label, order) in enumerate(verifies): 23 | captcha = one_hot_decode(order) # 正确的验证码 24 | images = Variable(image).cuda() 25 | predict_label = model(images) 26 | predicts = [] 27 | for k in range(CAPTCHA_NUMBER): 28 | # 根据预测结果取值 29 | code = one_hot_decode([(numpy.argmax(predict_label[0, k * cha_len: (k + 1) * cha_len].data.cpu().numpy()))]) 30 | predicts.append(code) 31 | predict = ''.join(predicts) # 预测结果 32 | current += 1 33 | total += 1 34 | if predict == captcha: 35 | logging.info('Success, captcha:%s->%s' % (captcha, predict)) 36 | correct += 1 37 | else: 38 | logging.info('Fail, captcha:%s->%s' % (captcha, predict)) 39 | if total % 300 == 0: 40 | logging.info('当前预测图片数为%s张,准确率为%s%%' % (current, int(100 * correct / current))) 41 | logging.info('完成。数据集%s当前预测图片数为%s张,准确率为%s%%' % (folder, total, int(100 * correct / total))) 42 | 43 | 44 | def get_image_name(folder): 45 | # 加载指定路径下的图片,并返回图片名称列表 46 | images = list(pathlib.Path(folder).glob('*.{}'.format(IMAGE_TYPE))) 47 | image_name = [i.stem for i in images] 48 | return image_name 49 | 50 | 51 | if __name__ == '__main__': 52 | folders = PATH_TEST # 指定预测集路径 53 | trains = get_image_name(PATH_TRAIN) # 获取训练样本所有图片的名称 54 | pres = get_image_name(folders) # 获取预测集所有图片的名称 55 | repeat = len([p for p in pres if p in trains]) # 获取重复数量 56 | start_verifies(folders) # 开启预测 57 | logging.info('预测前确认待预测图片与训练样本的重复情况,' 58 | '待预测图片%s张,训练样本%s张,重复数量为%s张' % (len(pres), len(trains), repeat)) 59 | --------------------------------------------------------------------------------