init repo
This commit is contained in:
commit
05a8338c72
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
# project files
|
||||
dataset/
|
||||
|
||||
*.pth
|
||||
|
||||
# python files
|
||||
__pycache__/
|
||||
|
||||
.venv
|
62
README.md
Normal file
62
README.md
Normal file
@ -0,0 +1,62 @@
|
||||
# Captcha Recognition
|
||||
|
||||
基于深度神经网络(DNN)的验证码识别
|
||||
|
||||
## 使用方法
|
||||
|
||||
1. 克隆项目到本地
|
||||
|
||||
```shell
|
||||
git clone https://git.taurusxin.com/taurusxin/captcha.git
|
||||
cd captcha
|
||||
```
|
||||
|
||||
2. 创建虚拟环境并安装依赖
|
||||
|
||||
```shell
|
||||
python -m venv .venv
|
||||
|
||||
# Windows
|
||||
.venv\Scripts\Activate.ps1
|
||||
|
||||
# Linux/MacOS
|
||||
source .venv/bin/activate
|
||||
|
||||
# 先安装 PyTorch GPU 版本,cuda 12.4
|
||||
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
|
||||
|
||||
# 然后再安装其他依赖
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. 根据提示生成数据集,生成3次数据集,分别用于训练,用于测试,用于验证。
|
||||
|
||||
建议的数据集长度如下:
|
||||
|
||||
| 数据集 | 长度 |
|
||||
| -------- | ------- |
|
||||
| Train | 50000 |
|
||||
| Test | 1000 |
|
||||
| Predict | 30 |
|
||||
|
||||
```shell
|
||||
python captcha_gen.py
|
||||
```
|
||||
|
||||
4. 训练模型
|
||||
|
||||
```shell
|
||||
python train.py
|
||||
```
|
||||
|
||||
5. 测试模型
|
||||
|
||||
```shell
|
||||
python test.py
|
||||
```
|
||||
|
||||
6. 预测验证码
|
||||
|
||||
```shell
|
||||
python predict.py
|
||||
```
|
39
captcha_gen.py
Normal file
39
captcha_gen.py
Normal file
@ -0,0 +1,39 @@
|
||||
import captcha_settings
|
||||
import os
|
||||
import random
|
||||
|
||||
from captcha.image import ImageCaptcha
|
||||
from PIL import Image
|
||||
|
||||
from tqdm import trange
|
||||
|
||||
def random_captcha_text(char_set=captcha_settings.NUMBER + captcha_settings.ALPHABET, captcha_size=4):
|
||||
captcha_text = []
|
||||
for i in range(captcha_size):
|
||||
c = random.choice(char_set)
|
||||
captcha_text.append(c)
|
||||
return "".join(captcha_text)
|
||||
|
||||
def gen_captcha_text_and_image():
|
||||
image = ImageCaptcha()
|
||||
captcha_text = random_captcha_text()
|
||||
captcha_image = Image.open(image.generate(captcha_text))
|
||||
return captcha_text, captcha_image
|
||||
|
||||
if __name__ == "__main__":
|
||||
dataset_type = input("请输入数据集类型(1 - train / 2 - test / 3 - predict):")
|
||||
dataset_len = input("请输入数据集长度:")
|
||||
|
||||
paths = [captcha_settings.TRAIN_DATASET_PATH, captcha_settings.TEST_DATASET_PATH, captcha_settings.PREDICT_DATASET_PATH]
|
||||
|
||||
dataset_type = int(dataset_type)
|
||||
count = int(dataset_len)
|
||||
path = (
|
||||
paths[dataset_type - 1]
|
||||
)
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
for i in trange(count):
|
||||
text, image = gen_captcha_text_and_image()
|
||||
filename = f"{str(i).zfill(5)}_{text}.png"
|
||||
image.save(path + os.path.sep + filename)
|
18
captcha_settings.py
Normal file
18
captcha_settings.py
Normal file
@ -0,0 +1,18 @@
|
||||
import os
|
||||
# 验证码中的字符
|
||||
# string.digits + string.ascii_uppercase
|
||||
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
|
||||
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
|
||||
|
||||
ALL_CHAR_SET = NUMBER + ALPHABET
|
||||
ALL_CHAR_SET_STR = ''.join(ALL_CHAR_SET)
|
||||
ALL_CHAR_SET_LEN = len(ALL_CHAR_SET)
|
||||
MAX_CAPTCHA = 4
|
||||
|
||||
# 图像大小
|
||||
IMAGE_HEIGHT = 60
|
||||
IMAGE_WIDTH = 160
|
||||
|
||||
TRAIN_DATASET_PATH = 'dataset' + os.path.sep + 'train'
|
||||
TEST_DATASET_PATH = 'dataset' + os.path.sep + 'test'
|
||||
PREDICT_DATASET_PATH = 'dataset' + os.path.sep + 'predict'
|
84
cnn_net.py
Normal file
84
cnn_net.py
Normal file
@ -0,0 +1,84 @@
|
||||
import torch.nn as nn
|
||||
import captcha_settings
|
||||
|
||||
|
||||
class ConvNet(nn.Module):
|
||||
def __init__(self):
|
||||
super(ConvNet, self).__init__()
|
||||
self.layer1 = nn.Sequential(
|
||||
nn.Conv2d(1, 64, kernel_size=3, padding=1),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool2d(kernel_size=2),
|
||||
)
|
||||
self.layer2 = nn.Sequential(
|
||||
nn.Conv2d(64, 128, kernel_size=3, padding=1),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool2d(kernel_size=2),
|
||||
)
|
||||
self.layer3 = nn.Sequential(
|
||||
nn.Conv2d(128, 256, kernel_size=3, padding=1),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool2d(kernel_size=2),
|
||||
)
|
||||
self.layer4 = nn.Sequential(
|
||||
nn.Conv2d(256, 512, kernel_size=3, padding=1),
|
||||
nn.ReLU(),
|
||||
nn.MaxPool2d(kernel_size=2),
|
||||
)
|
||||
self.layer5 = nn.Sequential(
|
||||
nn.Flatten(),
|
||||
nn.Linear(in_features=15360, out_features=4096),
|
||||
nn.Dropout(0.5),
|
||||
nn.ReLU(),
|
||||
nn.Linear(
|
||||
4096,
|
||||
captcha_settings.MAX_CAPTCHA * captcha_settings.ALL_CHAR_SET_LEN,
|
||||
),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.layer5(x)
|
||||
return x
|
||||
|
||||
|
||||
# class ConvNet(nn.Module):
|
||||
# def __init__(self):
|
||||
# super(ConvNet, self).__init__()
|
||||
# self.layer1 = nn.Sequential(
|
||||
# nn.Conv2d(1, 32, kernel_size=3, padding=1),
|
||||
# nn.BatchNorm2d(32),
|
||||
# nn.Dropout(0.5), # drop 50% of the neuron
|
||||
# nn.ReLU(),
|
||||
# nn.MaxPool2d(2))
|
||||
# self.layer2 = nn.Sequential(
|
||||
# nn.Conv2d(32, 64, kernel_size=3, padding=1),
|
||||
# nn.BatchNorm2d(64),
|
||||
# nn.Dropout(0.5), # drop 50% of the neuron
|
||||
# nn.ReLU(),
|
||||
# nn.MaxPool2d(2))
|
||||
# self.layer3 = nn.Sequential(
|
||||
# nn.Conv2d(64, 64, kernel_size=3, padding=1),
|
||||
# nn.BatchNorm2d(64),
|
||||
# nn.Dropout(0.5), # drop 50% of the neuron
|
||||
# nn.ReLU(),
|
||||
# nn.MaxPool2d(2))
|
||||
# self.fc = nn.Sequential(
|
||||
# nn.Linear((captcha_settings.IMAGE_WIDTH//8)*(captcha_settings.IMAGE_HEIGHT//8)*64, 1024),
|
||||
# nn.Dropout(0.5), # drop 50% of the neuron
|
||||
# nn.ReLU())
|
||||
# self.rfc = nn.Sequential(
|
||||
# nn.Linear(1024, captcha_settings.MAX_CAPTCHA*captcha_settings.ALL_CHAR_SET_LEN),
|
||||
# )
|
||||
|
||||
# def forward(self, x):
|
||||
# out = self.layer1(x)
|
||||
# out = self.layer2(out)
|
||||
# out = self.layer3(out)
|
||||
# out = out.view(out.size(0), -1)
|
||||
# out = self.fc(out)
|
||||
# out = self.rfc(out)
|
||||
# return out
|
50
dataset.py
Normal file
50
dataset.py
Normal file
@ -0,0 +1,50 @@
|
||||
import os
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
import torchvision.transforms as transforms
|
||||
from PIL import Image
|
||||
import one_hot_encoding as ohe
|
||||
from captcha_settings import TRAIN_DATASET_PATH, TEST_DATASET_PATH, PREDICT_DATASET_PATH
|
||||
|
||||
class CaptchaDataset(Dataset):
|
||||
def __init__(self, dir, transform=None):
|
||||
# list all image files in the directory
|
||||
self.train_images = [os.path.join(dir, image_file) for image_file in os.listdir(dir)]
|
||||
self.transform = transform
|
||||
|
||||
def __len__(self):
|
||||
return len(self.train_images)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
# load the image and convert it to grayscale
|
||||
image_root = self.train_images[idx]
|
||||
image_name = image_root.split(os.path.sep)[-1]
|
||||
image = Image.open(image_root)
|
||||
if self.transform is not None:
|
||||
image = self.transform(image)
|
||||
label = ohe.encode(image_name.split('.')[0].split('_')[-1])
|
||||
return image, label
|
||||
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Grayscale(),
|
||||
])
|
||||
|
||||
def get_train_loader(batch_size=60):
|
||||
dataset = CaptchaDataset(TRAIN_DATASET_PATH, transform)
|
||||
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
def get_test_loader(batch_size=60):
|
||||
dataset = CaptchaDataset(TEST_DATASET_PATH, transform)
|
||||
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
def get_predict_loader(batch_size=60):
|
||||
dataset = CaptchaDataset(PREDICT_DATASET_PATH, transform)
|
||||
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
def main():
|
||||
train_loader = get_train_loader()
|
||||
for i, (image, label) in enumerate(train_loader):
|
||||
print(image.shape, label.shape)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
34
one_hot_encoding.py
Normal file
34
one_hot_encoding.py
Normal file
@ -0,0 +1,34 @@
|
||||
import numpy as np
|
||||
import captcha_settings
|
||||
import torch
|
||||
|
||||
|
||||
# 用torch.zeros()函数生成一个4行36列,值全是0的张量。接着循环标签中的各个字符,将字符在captcha_settings.ALL_CHAR_SET_STR中对应的索引获取到,然后将张量中对应位置的0,改成1。最后要返回一个一维的列表,长度是4*36=144
|
||||
def encode(label):
|
||||
"""将字符转为独热码"""
|
||||
cols = len(captcha_settings.ALL_CHAR_SET_STR)
|
||||
rows = captcha_settings.MAX_CAPTCHA
|
||||
result = torch.zeros((rows, cols), dtype=float)
|
||||
for i, char in enumerate(label):
|
||||
j = captcha_settings.ALL_CHAR_SET_STR.index(char)
|
||||
result[i, j] = 1.0
|
||||
return result.view(1, -1)[0]
|
||||
|
||||
|
||||
# 将模型预测的值从一维转成4行36列的二维张量,然后调用torch.argmax()函数寻找每一行最大值(也就是1)的索引。知道索引后就可以从captcha_settings.ALL_CHAR_SET_STR中找到对应的字符
|
||||
def decode(pred_result):
|
||||
"""将独热码转为字符"""
|
||||
pred_result = pred_result.view(-1, len(captcha_settings.ALL_CHAR_SET_STR))
|
||||
index_list = torch.argmax(pred_result, dim=1)
|
||||
text = "".join([captcha_settings.ALL_CHAR_SET_STR[i] for i in index_list])
|
||||
return text
|
||||
|
||||
def main():
|
||||
label = "ABCD"
|
||||
one_hot_label = encode(label)
|
||||
print(one_hot_label)
|
||||
decoded_label = decode(one_hot_label)
|
||||
print(decoded_label)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
50
predict.py
Normal file
50
predict.py
Normal file
@ -0,0 +1,50 @@
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
from PIL import Image
|
||||
import matplotlib.pyplot as plt
|
||||
from cnn_net import ConvNet
|
||||
import os
|
||||
import random
|
||||
import captcha_settings
|
||||
import one_hot_encoding
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
||||
print(f"Using {device} device")
|
||||
|
||||
def predict(model, file_path):
|
||||
trans = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Grayscale()
|
||||
])
|
||||
with torch.no_grad():
|
||||
X = trans(Image.open(file_path)).reshape(1, 1, 60, 160)
|
||||
X = X.to(device)
|
||||
pred = model(X)
|
||||
text = one_hot_encoding.decode(pred)
|
||||
return text
|
||||
|
||||
def main():
|
||||
model = ConvNet().to(device)
|
||||
model.load_state_dict(torch.load(f"./model.pth", weights_only=True))
|
||||
model.eval()
|
||||
|
||||
# random pickup some test images
|
||||
pickup_count = 30
|
||||
pickup_rect = [5, 6]
|
||||
files = os.listdir(captcha_settings.PREDICT_DATASET_PATH)
|
||||
images_picked = random.sample(files, pickup_count)
|
||||
|
||||
# show as a grid, with predicted text, correct or not
|
||||
fig, axes = plt.subplots(nrows=pickup_rect[0], ncols=pickup_rect[1], figsize=(10, 8))
|
||||
for i, image_name in enumerate(images_picked):
|
||||
real_text = image_name.split(".")[0].split("_")[-1]
|
||||
file_path = os.path.join(captcha_settings.TEST_DATASET_PATH, image_name)
|
||||
pred_text = predict(model, file_path)
|
||||
correct = real_text == pred_text
|
||||
axes[i//pickup_rect[1], i%pickup_rect[1]].imshow(plt.imread(file_path))
|
||||
axes[i//pickup_rect[1], i%pickup_rect[1]].set_title(f"{pred_text}, {'yes' if correct else 'no'}")
|
||||
axes[i//pickup_rect[1], i%pickup_rect[1]].axis('off')
|
||||
plt.show()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
30
requirements.txt
Normal file
30
requirements.txt
Normal file
@ -0,0 +1,30 @@
|
||||
captcha==0.6.0
|
||||
colorama==0.4.6
|
||||
contourpy==1.3.0
|
||||
cycler==0.12.1
|
||||
filelock==3.13.1
|
||||
fonttools==4.54.1
|
||||
fsspec==2024.2.0
|
||||
Jinja2==3.1.3
|
||||
joblib==1.4.2
|
||||
kiwisolver==1.4.7
|
||||
MarkupSafe==2.1.5
|
||||
matplotlib==3.9.2
|
||||
mpmath==1.3.0
|
||||
networkx==3.2.1
|
||||
numpy==1.26.3
|
||||
packaging==24.1
|
||||
pillow==10.2.0
|
||||
pyparsing==3.1.4
|
||||
python-dateutil==2.9.0.post0
|
||||
scikit-learn==1.5.2
|
||||
scipy==1.14.1
|
||||
setuptools==70.0.0
|
||||
six==1.16.0
|
||||
sympy==1.12
|
||||
threadpoolctl==3.5.0
|
||||
torch==2.4.1+cu124
|
||||
torchaudio==2.4.1+cu124
|
||||
torchvision==0.19.1+cu124
|
||||
tqdm==4.66.5
|
||||
typing_extensions==4.9.0
|
45
test.py
Normal file
45
test.py
Normal file
@ -0,0 +1,45 @@
|
||||
import os
|
||||
import torch
|
||||
from PIL import Image
|
||||
from cnn_net import ConvNet
|
||||
import one_hot_encoding
|
||||
from torchvision import transforms
|
||||
import captcha_settings
|
||||
from tqdm import tqdm
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
||||
print(f"Using {device} device")
|
||||
|
||||
|
||||
def predict(model, file_path):
|
||||
trans = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Grayscale()
|
||||
])
|
||||
with torch.no_grad():
|
||||
X = trans(Image.open(file_path)).reshape(1, 1, 60, 160)
|
||||
X = X.to(device)
|
||||
pred = model(X)
|
||||
text = one_hot_encoding.decode(pred)
|
||||
return text
|
||||
|
||||
|
||||
def main():
|
||||
model = ConvNet().to(device)
|
||||
model.load_state_dict(torch.load(f"./model.pth", weights_only=True))
|
||||
model.eval()
|
||||
|
||||
correct = 0
|
||||
total = len(os.listdir(captcha_settings.TEST_DATASET_PATH))
|
||||
for filename in tqdm(os.listdir(captcha_settings.TEST_DATASET_PATH)):
|
||||
file_path = f"{captcha_settings.TEST_DATASET_PATH}{os.path.sep}{filename}"
|
||||
real_captcha = filename.split('.')[0].split('_')[-1]
|
||||
pred_captcha = predict(model, file_path)
|
||||
if pred_captcha == real_captcha:
|
||||
correct += 1
|
||||
accuracy = f"Test {total} files, accuracy: {correct / total * 100:.2f}%"
|
||||
print(accuracy)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
45
train.py
Normal file
45
train.py
Normal file
@ -0,0 +1,45 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from cnn_net import ConvNet
|
||||
import dataset
|
||||
|
||||
num_epochs = 10
|
||||
learning_rate = 0.001
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
||||
|
||||
def main():
|
||||
model = ConvNet().to(device)
|
||||
|
||||
model.train()
|
||||
|
||||
criterion = nn.MultiLabelSoftMarginLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
|
||||
|
||||
# Train the model
|
||||
train_dataloader = dataset.get_train_loader()
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print("Epoch:", epoch+1)
|
||||
pbar = tqdm(enumerate(train_dataloader), total=len(train_dataloader))
|
||||
for i, (images, labels) in pbar:
|
||||
images, labels = images.to(device), labels.to(device)
|
||||
|
||||
predict_labels = model(images)
|
||||
|
||||
loss = criterion(predict_labels, labels)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
pbar.set_description("loss: %.4f" % loss.item())
|
||||
|
||||
print("loss:", loss.item(), '\n')
|
||||
torch.save(model.state_dict(), "./model.pth")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user