参考resnet,简单搭了一个cnn网络,这里记录一下。
任务是做一个猫狗识别,数据集是从kaggle上下的。
下面是网络搭建代码:
import torch
from torch import nn
import torch.nn.functional as F
class cnn(nn.Module):
def __init__(self):
super(cnn, self).__init__()
self.conv1 = nn.Sequential(
# 输入3x224x224,输出64x112x112
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=8, stride=2, padding=3),
nn.ReLU(),
# 输出64x56x56
nn.MaxPool2d(kernel_size=4, stride=2, padding=1)
)
self.conv2 = nn.Sequential(
# 输出64x56x56
nn.Conv2d(64, 64, 3, 1, 1),
nn.ReLU()
)
self.conv3 = nn.Sequential(
# 输出64x56x56
nn.Conv2d(64, 64, 3, 1, 1),
nn.ReLU()
)
self.conv4 = nn.Sequential(
# 输出64x56x56
nn.Conv2d(64, 64, 3, 1, 1),
nn.ReLU()
)
self.conv5 = nn.Sequential(
# 输出64x56x56
nn.Conv2d(64, 64, 3, 1, 1),
nn.ReLU()
)
self.conv6 = nn.Sequential(
# 输出128x28x28
nn.Conv2d(64, 128, 4, 2, 1),
nn.ReLU()
)
self.conv7 = nn.Sequential(
# 输出128x28x28
nn.Conv2d(128, 128, 3, 1, 1),
nn.ReLU()
)
self.conv8 = nn.Sequential(
# 输出128x28x28
nn.Conv2d(128, 128, 3, 1, 1),
nn.ReLU()
)
self.conv9 = nn.Sequential(
# 输出128x28x28
nn.Conv2d(128, 128, 3, 1, 1),
nn.ReLU()
)
self.conv10 = nn.Sequential(
# 输出256x14x14
nn.Conv2d(128, 256, 4, 2, 1),
nn.ReLU()
)
self.conv11 = nn.Sequential(
# 输出256x14x14
nn.Conv2d(256, 256, 3, 1, 1),
nn.ReLU()
)
self.conv12 = nn.Sequential(
# 输出256x14x14
nn.Conv2d(256, 256, 3, 1, 1),
nn.ReLU()
)
self.conv13 = nn.Sequential(
# 输出256x14x14
nn.Conv2d(256, 256, 3, 1, 1),
nn.ReLU()
)
self.conv14 = nn.Sequential(
# 输出512x7x7
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.ReLU()
)
self.conv15 = nn.Sequential(
# 输出512x7x7
nn.Conv2d(512, 512, 3, 1, 1),
nn.ReLU()
)
self.conv16 = nn.Sequential(
# 输出512x7x7
nn.Conv2d(512, 512, 3, 1, 1),
nn.ReLU()
)
self.conv17 = nn.Sequential(
# 输出512x7x7
nn.Conv2d(512, 512, 3, 1, 1),
nn.ReLU(),
nn.AvgPool2d(7)
)
self.flatten = nn.Flatten()
self.fc = nn.Linear(512*1*1, 2)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
x = self.conv8(x)
x = self.conv9(x)
x = self.conv10(x)
x = self.conv11(x)
x = self.conv12(x)
x = self.conv13(x)
x = self.conv14(x)
x = self.conv15(x)
x = self.conv16(x)
x = self.conv17(x)
x = self.flatten(x)
x = self.fc(x)
x = F.dropout(x)
return x
搭建完成后,编写训练代码:
import torch
import torch.optim as optim
from torchvision import datasets, transforms
from torch import nn
from Resnet import cnn
train_root = './archive/training_set/training_set'
test_root = './archive/test_set/test_set'
train_transforms = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.RandomVerticalFlip(),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
]
)
test_transforms = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
]
)
train_datasets = datasets.ImageFolder(root=train_root, transform=train_transforms)
test_datasets = datasets.ImageFolder(root=test_root, transform=test_transforms)
train_dataloader = torch.utils.data.DataLoader(train_datasets, batch_size=16, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=16, shuffle=True)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = cnn().to(device)
loss = nn.CrossEntropyLoss() #交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
lr_optimizer = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)
def train(model, dataloader, loss, optimizer):
n, los, accuracy = 0, 0, 0
model.train()
for batch, (x, y) in enumerate(dataloader):
x, y = x.to(device), y.to(device)
output = model(x)
loss_ln = loss(output, y)
_, pred = torch.max(output, axis=1)
acc = torch.sum(pred == y) / output.shape[0]
optimizer.zero_grad()
loss_ln.backward()
optimizer.step()
los += loss_ln
accuracy += acc
n += 1
los_sum = los / n
accuracy_sum = accuracy / n
print("训练集损失率:{}".format(los_sum))
print("训练集准确率:{}".format(accuracy_sum))
return los_sum, accuracy_sum
def test(model, dataloader, loss):
n, los, accuracy = 0, 0, 0
model.eval()
with torch.no_grad():
for batch, (x, y) in enumerate(dataloader):
x, y = x.to(device), y.to(device)
output = model(x)
loss_ln = loss(output, y)
_, pred = torch.max(output, axis=1)
acc = torch.sum(pred == y) / output.shape[0]
los += loss_ln
accuracy += acc
n += 1
los_sum = los / n
accuracy_sum = accuracy / n
print("测试集损失率:{}".format(los_sum))
print("测试集准确率:{}".format(accuracy_sum))
return los_sum, accuracy_sum
epoch = 50
acc = 0
for i in range(epoch):
print("epoch:{}\n------------------".format(i+1))
train(model, train_dataloader, loss, optimizer)
_, test_acc = test(model, train_dataloader, loss)
lr_optimizer.step()
if test_acc > acc:
torch.save(model.state_dict(), "resnet_best_model.pth")
训练时模型效果不太好,可能是网络搭的问题。
最后是预测代码:
from AlexNet import cnn
from torchvision import datasets, transforms
import torch
import cv2
from torch.autograd import Variable
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'
pic = './archive/training_set/training_set/dogs/dog.3.jpg'
model = cnn().to(device)
model.load_state_dict(torch.load('./resnet_best_model.pth'))
img = cv2.imread(pic)
img = cv2.resize(img, (224, 224))
img = img / 255
mean = np.array([0.5, 0.5, 0.5])
std = np.array([0.5, 0.5, 0.5])
img = (img - mean) / std
# img = img.reshape(3, 224, 224) / 255
img = img.transpose(2, 0, 1)
img_tensor = torch.from_numpy(img)
model.eval()
with torch.no_grad():
# 扩展张量维度为4维
# img_tensor = Variable(torch.unsqueeze(img_tensor, dim=0).float(), requires_grad=False).to(device)
img_tensor = torch.unsqueeze(img_tensor, dim=0).float().to(device)
output = model(img_tensor)
_, pred = torch.max(output, axis=1)
if pred == 0:
result = 'cats'
print(result)
elif pred == 1:
result = 'dogs'
print(result)