pytorch搭建cnn网络学习记录-CFANZ编程社区

参考resnet，简单搭了一个cnn网络，这里记录一下。
任务是做一个猫狗识别，数据集是从kaggle上下的。
下面是网络搭建代码：

import torch
from torch import nn
import torch.nn.functional as F

class cnn(nn.Module):
    def __init__(self):
        super(cnn, self).__init__()
        self.conv1 = nn.Sequential(
            # 输入3x224x224，输出64x112x112
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=8, stride=2, padding=3),
            nn.ReLU(),
            # 输出64x56x56
            nn.MaxPool2d(kernel_size=4, stride=2, padding=1)
        )
        self.conv2 = nn.Sequential(
            # 输出64x56x56
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.ReLU()
        )
        self.conv3 = nn.Sequential(
            # 输出64x56x56
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.ReLU()
        )
        self.conv4 = nn.Sequential(
            # 输出64x56x56
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.ReLU()
        )
        self.conv5 = nn.Sequential(
            # 输出64x56x56
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.ReLU()
        )
        self.conv6 = nn.Sequential(
            # 输出128x28x28
            nn.Conv2d(64, 128, 4, 2, 1),
            nn.ReLU()
        )
        self.conv7 = nn.Sequential(
            # 输出128x28x28
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.ReLU()
        )
        self.conv8 = nn.Sequential(
            # 输出128x28x28
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.ReLU()
        )
        self.conv9 = nn.Sequential(
            # 输出128x28x28
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.ReLU()
        )
        self.conv10 = nn.Sequential(
            # 输出256x14x14
            nn.Conv2d(128, 256, 4, 2, 1),
            nn.ReLU()
        )
        self.conv11 = nn.Sequential(
            # 输出256x14x14
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.ReLU()
        )
        self.conv12 = nn.Sequential(
            # 输出256x14x14
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.ReLU()
        )
        self.conv13 = nn.Sequential(
            # 输出256x14x14
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.ReLU()
        )
        self.conv14 = nn.Sequential(
            # 输出512x7x7
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
            nn.ReLU()
        )
        self.conv15 = nn.Sequential(
            # 输出512x7x7
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU()
        )
        self.conv16 = nn.Sequential(
            # 输出512x7x7
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU()
        )
        self.conv17 = nn.Sequential(
            # 输出512x7x7
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.AvgPool2d(7)
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(512*1*1, 2)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.conv8(x)
        x = self.conv9(x)
        x = self.conv10(x)
        x = self.conv11(x)
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.conv14(x)
        x = self.conv15(x)
        x = self.conv16(x)
        x = self.conv17(x)
        x = self.flatten(x)
        x = self.fc(x)
        x = F.dropout(x)
        return x

搭建完成后，编写训练代码：

import torch
import torch.optim as optim
from torchvision import datasets, transforms
from torch import nn
from Resnet import cnn

train_root = './archive/training_set/training_set'
test_root = './archive/test_set/test_set'

train_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomVerticalFlip(),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]
)

test_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]
)

train_datasets = datasets.ImageFolder(root=train_root, transform=train_transforms)
test_datasets = datasets.ImageFolder(root=test_root, transform=test_transforms)

train_dataloader = torch.utils.data.DataLoader(train_datasets, batch_size=16, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=16, shuffle=True)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = cnn().to(device)
loss = nn.CrossEntropyLoss()   #交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
lr_optimizer = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)

def train(model, dataloader, loss, optimizer):
    n, los, accuracy = 0, 0, 0
    model.train()
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)
        output = model(x)
        loss_ln = loss(output, y)
        _, pred = torch.max(output, axis=1)
        acc = torch.sum(pred == y) / output.shape[0]
        optimizer.zero_grad()
        loss_ln.backward()
        optimizer.step()

        los += loss_ln
        accuracy += acc
        n += 1

    los_sum = los / n
    accuracy_sum = accuracy / n
    print("训练集损失率：{}".format(los_sum))
    print("训练集准确率：{}".format(accuracy_sum))
    return los_sum, accuracy_sum


def test(model, dataloader, loss):
    n, los, accuracy = 0, 0, 0
    model.eval()
    with torch.no_grad():
        for batch, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)
            output = model(x)
            loss_ln = loss(output, y)
            _, pred = torch.max(output, axis=1)
            acc = torch.sum(pred == y) / output.shape[0]

            los += loss_ln
            accuracy += acc
            n += 1

        los_sum = los / n
        accuracy_sum = accuracy / n
        print("测试集损失率：{}".format(los_sum))
        print("测试集准确率：{}".format(accuracy_sum))
        return los_sum, accuracy_sum

epoch = 50
acc = 0
for i in range(epoch):
    print("epoch:{}\n------------------".format(i+1))
    train(model, train_dataloader, loss, optimizer)
    _, test_acc = test(model, train_dataloader, loss)
    lr_optimizer.step()
    if test_acc > acc:
        torch.save(model.state_dict(), "resnet_best_model.pth")

训练时模型效果不太好，可能是网络搭的问题。
最后是预测代码：

from AlexNet import cnn
from torchvision import datasets, transforms
import torch
import cv2
from torch.autograd import Variable
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
pic = './archive/training_set/training_set/dogs/dog.3.jpg'
model = cnn().to(device)
model.load_state_dict(torch.load('./resnet_best_model.pth'))

img = cv2.imread(pic)
img = cv2.resize(img, (224, 224))
img = img / 255
mean = np.array([0.5, 0.5, 0.5])
std = np.array([0.5, 0.5, 0.5])
img = (img - mean) / std
# img = img.reshape(3, 224, 224) / 255
img = img.transpose(2, 0, 1)
img_tensor = torch.from_numpy(img)

model.eval()
with torch.no_grad():
    # 扩展张量维度为4维
    # img_tensor = Variable(torch.unsqueeze(img_tensor, dim=0).float(), requires_grad=False).to(device)
    img_tensor = torch.unsqueeze(img_tensor, dim=0).float().to(device)
    output = model(img_tensor)
    _, pred = torch.max(output, axis=1)
    if pred == 0:
        result = 'cats'
        print(result)
    elif pred == 1:
        result = 'dogs'
        print(result)