目前市场上利用深度学习识别验证码最简单的应该就是识别定长验证码了,最多52个字母+10个数字的分类组合,利用非常简单的神经网络结构基本就能识别,下面使用Pytorch迁移学习来实现一遍,使用迁移学习完全不用自己搭建模型,只需要自己做数据集,直接开练
准备数据集,我的数据集是这样,我这里有十几二十种数据集,需要的小伙伴可以私聊我
话不多说, 直接上pytorch训练的基本步骤
先导入库
from torch.utils.data import Dataset
import torchvision.transforms as T
from torch.utils.data import DataLoader
from torchvision import models
import os
import torch
from PIL import Image
from tqdm import tqdm
import numpy as np
1、搭建Dataset和Dataloader
number_chars = {k: str(k) for k in range(10)}
capital_chars = {k: chr(k + 55) for k in range(10, 36)}
lower_chars = {k: chr(k + 61) for k in range(36, 62)}
all_chars = {}
all_chars.update(number_chars)
all_chars.update(capital_chars)
all_chars.update(lower_chars)
all_chars = {v: k for k, v in all_chars.items()}
class NumberDataset(Dataset):
def __init__(self, path: str, transform=None, ):
"""
如果想封装一个 train = True/False 都可以,随便搞
:param path: 数据集路径
:param transform:
"""
super(NumberDataset, self).__init__()
if not transform:
transform = T.Compose([T.ToTensor(), ])
self.transform = transform
self.path = path
self.picture_list = list(os.walk(self.path))[0][-1]
# self.label_map = [i for i in "_" + "".join(calc_list)]
def __len__(self):
return len(self.picture_list)
def __getitem__(self, item):
"""
:param item: ID
:return: (图片,标签)
"""
picture_path_list = self._load_picture()
img = Image.open(picture_path_list[item]).convert("RGB") # 只训练三通道图像
img = self.transform(img)
label = self.picture_list[item].split("_")[0]
label = [[all_chars[i]] for i in label]
label = torch.as_tensor(label, dtype=torch.int64)
one_hot = torch.zeros(label.size(0), 62).long() # 转换为one——hot编码,因为所有的大小写字母加上数字一共有62种,一共是4位定长
one_hot.scatter_(dim=1, index=label.long(), src=torch.ones(label.size(0), 62).long())
one_hot = one_hot.to(torch.float32)
return img, one_hot
def _load_picture(self):
return [self.path + '/' + i for i in self.picture_list]
transform = T.Compose([
T.Resize((100, 300)),
T.ToTensor(),
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 均值可以自己计算
])
BATCH_SIZE = 256
dataset = NumberDataset("./all/train", transform=transform) # 训练路径以及transform
train_loader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
2、搭建训练模型
直接使用resnet18 开练
DEVICE = torch.device("cuda:1") # 使用gpu训练
model = models.resnet50(num_classes=4 * 62) # 因为所有的大小写字母加上数字一共有62种,一共是4位定长
model = model.to(DEVICE)
losses = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
3、开始训练
model.train()
for epoch in range(20):
bar = tqdm(enumerate(train_loader)) # 加入一个tqdm进度条,嘿嘿
loss_sum = []
for idx, (data, label) in bar:
data, label = data.to(DEVICE), label.to(DEVICE)
optimizer.zero_grad()
pred = model(data)
pred = pred.reshape(BATCH_SIZE, 4, 62) # 将resnet18最后的fc输出转换为4*62 进行one——hot损失
loss = losses(pred, label)
loss.backward()
optimizer.step()
loss_sum.append(loss.item())
# bar.set_description(str(idx))
#print("epcoh:{} idx:{},loss:{:.6f}".format(epoch, idx, np.mean(loss_sum)))
bar.set_description("epcoh:{} idx:{},loss:{:.6f}".format(epoch, idx, np.mean(loss_sum)))
# print("epcoh:{} ,loss:{:.8f}".format(epoch, np.mean(loss_sum)))
torch.save(model.state_dict(), './models/yzm46.pkl', _use_new_zipfile_serialization=True) # 模型保存
torch.save(optimizer.state_dict(), './models/optimizer.pkl', _use_new_zipfile_serialization=True) # 优化器保存
训练完之后测试一下准确率,这里我只训练了20次,拿了1000多张图片做测试,最后的准确率有95以上,毕竟用的迁移学习,大佬做好的模型,步骤也极为简单,其实还是蛮不错的了