init repo

This commit is contained in:
TaurusXin 2024-09-30 11:05:35 +08:00
commit 05a8338c72
Signed by: taurusxin
GPG Key ID: C334DCA04AC2D2CC
11 changed files with 466 additions and 0 deletions

9
.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
# project files
dataset/
*.pth
# python files
__pycache__/
.venv

62
README.md Normal file
View File

@ -0,0 +1,62 @@
# Captcha Recognition
基于深度神经网络(DNN)的验证码识别
## 使用方法
1. 克隆项目到本地
```shell
git clone https://git.taurusxin.com/taurusxin/captcha.git
cd captcha
```
2. 创建虚拟环境并安装依赖
```shell
python -m venv .venv
# Windows
.venv\Scripts\Activate.ps1
# Linux/MacOS
source .venv/bin/activate
# 先安装 PyTorch GPU 版本cuda 12.4
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
# 然后再安装其他依赖
pip install -r requirements.txt
```
3. 根据提示生成数据集生成3次数据集分别用于训练用于测试用于验证。
建议的数据集长度如下:
| 数据集 | 长度 |
| -------- | ------- |
| Train | 50000 |
| Test | 1000 |
| Predict | 30 |
```shell
python captcha_gen.py
```
4. 训练模型
```shell
python train.py
```
5. 测试模型
```shell
python test.py
```
6. 预测验证码
```shell
python predict.py
```

39
captcha_gen.py Normal file
View File

@ -0,0 +1,39 @@
import captcha_settings
import os
import random
from captcha.image import ImageCaptcha
from PIL import Image
from tqdm import trange
def random_captcha_text(char_set=captcha_settings.NUMBER + captcha_settings.ALPHABET, captcha_size=4):
captcha_text = []
for i in range(captcha_size):
c = random.choice(char_set)
captcha_text.append(c)
return "".join(captcha_text)
def gen_captcha_text_and_image():
image = ImageCaptcha()
captcha_text = random_captcha_text()
captcha_image = Image.open(image.generate(captcha_text))
return captcha_text, captcha_image
if __name__ == "__main__":
dataset_type = input("请输入数据集类型1 - train / 2 - test / 3 - predict")
dataset_len = input("请输入数据集长度:")
paths = [captcha_settings.TRAIN_DATASET_PATH, captcha_settings.TEST_DATASET_PATH, captcha_settings.PREDICT_DATASET_PATH]
dataset_type = int(dataset_type)
count = int(dataset_len)
path = (
paths[dataset_type - 1]
)
if not os.path.exists(path):
os.makedirs(path)
for i in trange(count):
text, image = gen_captcha_text_and_image()
filename = f"{str(i).zfill(5)}_{text}.png"
image.save(path + os.path.sep + filename)

18
captcha_settings.py Normal file
View File

@ -0,0 +1,18 @@
import os
# 验证码中的字符
# string.digits + string.ascii_uppercase
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
ALL_CHAR_SET = NUMBER + ALPHABET
ALL_CHAR_SET_STR = ''.join(ALL_CHAR_SET)
ALL_CHAR_SET_LEN = len(ALL_CHAR_SET)
MAX_CAPTCHA = 4
# 图像大小
IMAGE_HEIGHT = 60
IMAGE_WIDTH = 160
TRAIN_DATASET_PATH = 'dataset' + os.path.sep + 'train'
TEST_DATASET_PATH = 'dataset' + os.path.sep + 'test'
PREDICT_DATASET_PATH = 'dataset' + os.path.sep + 'predict'

84
cnn_net.py Normal file
View File

@ -0,0 +1,84 @@
import torch.nn as nn
import captcha_settings
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.layer2 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.layer3 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.layer4 = nn.Sequential(
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.layer5 = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=15360, out_features=4096),
nn.Dropout(0.5),
nn.ReLU(),
nn.Linear(
4096,
captcha_settings.MAX_CAPTCHA * captcha_settings.ALL_CHAR_SET_LEN,
),
)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
return x
# class ConvNet(nn.Module):
# def __init__(self):
# super(ConvNet, self).__init__()
# self.layer1 = nn.Sequential(
# nn.Conv2d(1, 32, kernel_size=3, padding=1),
# nn.BatchNorm2d(32),
# nn.Dropout(0.5), # drop 50% of the neuron
# nn.ReLU(),
# nn.MaxPool2d(2))
# self.layer2 = nn.Sequential(
# nn.Conv2d(32, 64, kernel_size=3, padding=1),
# nn.BatchNorm2d(64),
# nn.Dropout(0.5), # drop 50% of the neuron
# nn.ReLU(),
# nn.MaxPool2d(2))
# self.layer3 = nn.Sequential(
# nn.Conv2d(64, 64, kernel_size=3, padding=1),
# nn.BatchNorm2d(64),
# nn.Dropout(0.5), # drop 50% of the neuron
# nn.ReLU(),
# nn.MaxPool2d(2))
# self.fc = nn.Sequential(
# nn.Linear((captcha_settings.IMAGE_WIDTH//8)*(captcha_settings.IMAGE_HEIGHT//8)*64, 1024),
# nn.Dropout(0.5), # drop 50% of the neuron
# nn.ReLU())
# self.rfc = nn.Sequential(
# nn.Linear(1024, captcha_settings.MAX_CAPTCHA*captcha_settings.ALL_CHAR_SET_LEN),
# )
# def forward(self, x):
# out = self.layer1(x)
# out = self.layer2(out)
# out = self.layer3(out)
# out = out.view(out.size(0), -1)
# out = self.fc(out)
# out = self.rfc(out)
# return out

50
dataset.py Normal file
View File

@ -0,0 +1,50 @@
import os
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import one_hot_encoding as ohe
from captcha_settings import TRAIN_DATASET_PATH, TEST_DATASET_PATH, PREDICT_DATASET_PATH
class CaptchaDataset(Dataset):
def __init__(self, dir, transform=None):
# list all image files in the directory
self.train_images = [os.path.join(dir, image_file) for image_file in os.listdir(dir)]
self.transform = transform
def __len__(self):
return len(self.train_images)
def __getitem__(self, idx):
# load the image and convert it to grayscale
image_root = self.train_images[idx]
image_name = image_root.split(os.path.sep)[-1]
image = Image.open(image_root)
if self.transform is not None:
image = self.transform(image)
label = ohe.encode(image_name.split('.')[0].split('_')[-1])
return image, label
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Grayscale(),
])
def get_train_loader(batch_size=60):
dataset = CaptchaDataset(TRAIN_DATASET_PATH, transform)
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
def get_test_loader(batch_size=60):
dataset = CaptchaDataset(TEST_DATASET_PATH, transform)
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
def get_predict_loader(batch_size=60):
dataset = CaptchaDataset(PREDICT_DATASET_PATH, transform)
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
def main():
train_loader = get_train_loader()
for i, (image, label) in enumerate(train_loader):
print(image.shape, label.shape)
if __name__ == '__main__':
main()

34
one_hot_encoding.py Normal file
View File

@ -0,0 +1,34 @@
import numpy as np
import captcha_settings
import torch
# 用torch.zeros()函数生成一个4行36列值全是0的张量。接着循环标签中的各个字符将字符在captcha_settings.ALL_CHAR_SET_STR中对应的索引获取到然后将张量中对应位置的0改成1。最后要返回一个一维的列表长度是4*36=144
def encode(label):
"""将字符转为独热码"""
cols = len(captcha_settings.ALL_CHAR_SET_STR)
rows = captcha_settings.MAX_CAPTCHA
result = torch.zeros((rows, cols), dtype=float)
for i, char in enumerate(label):
j = captcha_settings.ALL_CHAR_SET_STR.index(char)
result[i, j] = 1.0
return result.view(1, -1)[0]
# 将模型预测的值从一维转成4行36列的二维张量然后调用torch.argmax()函数寻找每一行最大值也就是1的索引。知道索引后就可以从captcha_settings.ALL_CHAR_SET_STR中找到对应的字符
def decode(pred_result):
"""将独热码转为字符"""
pred_result = pred_result.view(-1, len(captcha_settings.ALL_CHAR_SET_STR))
index_list = torch.argmax(pred_result, dim=1)
text = "".join([captcha_settings.ALL_CHAR_SET_STR[i] for i in index_list])
return text
def main():
label = "ABCD"
one_hot_label = encode(label)
print(one_hot_label)
decoded_label = decode(one_hot_label)
print(decoded_label)
if __name__ == '__main__':
main()

50
predict.py Normal file
View File

@ -0,0 +1,50 @@
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from cnn_net import ConvNet
import os
import random
import captcha_settings
import one_hot_encoding
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")
def predict(model, file_path):
trans = transforms.Compose([
transforms.ToTensor(),
transforms.Grayscale()
])
with torch.no_grad():
X = trans(Image.open(file_path)).reshape(1, 1, 60, 160)
X = X.to(device)
pred = model(X)
text = one_hot_encoding.decode(pred)
return text
def main():
model = ConvNet().to(device)
model.load_state_dict(torch.load(f"./model.pth", weights_only=True))
model.eval()
# random pickup some test images
pickup_count = 30
pickup_rect = [5, 6]
files = os.listdir(captcha_settings.PREDICT_DATASET_PATH)
images_picked = random.sample(files, pickup_count)
# show as a grid, with predicted text, correct or not
fig, axes = plt.subplots(nrows=pickup_rect[0], ncols=pickup_rect[1], figsize=(10, 8))
for i, image_name in enumerate(images_picked):
real_text = image_name.split(".")[0].split("_")[-1]
file_path = os.path.join(captcha_settings.TEST_DATASET_PATH, image_name)
pred_text = predict(model, file_path)
correct = real_text == pred_text
axes[i//pickup_rect[1], i%pickup_rect[1]].imshow(plt.imread(file_path))
axes[i//pickup_rect[1], i%pickup_rect[1]].set_title(f"{pred_text}, {'yes' if correct else 'no'}")
axes[i//pickup_rect[1], i%pickup_rect[1]].axis('off')
plt.show()
if __name__ == "__main__":
main()

30
requirements.txt Normal file
View File

@ -0,0 +1,30 @@
captcha==0.6.0
colorama==0.4.6
contourpy==1.3.0
cycler==0.12.1
filelock==3.13.1
fonttools==4.54.1
fsspec==2024.2.0
Jinja2==3.1.3
joblib==1.4.2
kiwisolver==1.4.7
MarkupSafe==2.1.5
matplotlib==3.9.2
mpmath==1.3.0
networkx==3.2.1
numpy==1.26.3
packaging==24.1
pillow==10.2.0
pyparsing==3.1.4
python-dateutil==2.9.0.post0
scikit-learn==1.5.2
scipy==1.14.1
setuptools==70.0.0
six==1.16.0
sympy==1.12
threadpoolctl==3.5.0
torch==2.4.1+cu124
torchaudio==2.4.1+cu124
torchvision==0.19.1+cu124
tqdm==4.66.5
typing_extensions==4.9.0

45
test.py Normal file
View File

@ -0,0 +1,45 @@
import os
import torch
from PIL import Image
from cnn_net import ConvNet
import one_hot_encoding
from torchvision import transforms
import captcha_settings
from tqdm import tqdm
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")
def predict(model, file_path):
trans = transforms.Compose([
transforms.ToTensor(),
transforms.Grayscale()
])
with torch.no_grad():
X = trans(Image.open(file_path)).reshape(1, 1, 60, 160)
X = X.to(device)
pred = model(X)
text = one_hot_encoding.decode(pred)
return text
def main():
model = ConvNet().to(device)
model.load_state_dict(torch.load(f"./model.pth", weights_only=True))
model.eval()
correct = 0
total = len(os.listdir(captcha_settings.TEST_DATASET_PATH))
for filename in tqdm(os.listdir(captcha_settings.TEST_DATASET_PATH)):
file_path = f"{captcha_settings.TEST_DATASET_PATH}{os.path.sep}{filename}"
real_captcha = filename.split('.')[0].split('_')[-1]
pred_captcha = predict(model, file_path)
if pred_captcha == real_captcha:
correct += 1
accuracy = f"Test {total} files, accuracy: {correct / total * 100:.2f}%"
print(accuracy)
if __name__ == '__main__':
main()

45
train.py Normal file
View File

@ -0,0 +1,45 @@
import torch
import torch.nn as nn
from tqdm import tqdm
from cnn_net import ConvNet
import dataset
num_epochs = 10
learning_rate = 0.001
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
def main():
model = ConvNet().to(device)
model.train()
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
train_dataloader = dataset.get_train_loader()
for epoch in range(num_epochs):
print("Epoch:", epoch+1)
pbar = tqdm(enumerate(train_dataloader), total=len(train_dataloader))
for i, (images, labels) in pbar:
images, labels = images.to(device), labels.to(device)
predict_labels = model(images)
loss = criterion(predict_labels, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
pbar.set_description("loss: %.4f" % loss.item())
print("loss:", loss.item(), '\n')
torch.save(model.state_dict(), "./model.pth")
if __name__ == "__main__":
main()