VGG网络介绍

论文：《Very Deep Convolutional Networks for Large-Scale Image Recognition》

论文地址：https://arxiv.org/abs/1409.1556

VGG ⽹络可以分为两部分：第⼀部分主要由卷积层和汇聚层组成，第⼆部分由全连接层组成。

VGG网络有多种不同的配置，但是VGG块数量都是5，VGG块中的卷积层的数量不同或者filter不同，VGG网络的结构如下：

VGG网络输入为224*224*3，VGG 模块中conv_padding = 1,conv_stride = 1,maxpoling filter = 2*2,maxpoling_stride = 2,conv层之后均使用了ReLU激活函数，linear层使用dropout(p=0.2)正则化(在全连接层的前两层使用)。

总结：

VGG网络证明：浅而大的卷积核不如深而小的卷积核。

VGG网络使用3*3的卷积核来代替大的卷积核（7*7，AlexNet）,2层3*3卷积层与1层5*5的卷积层有着相同的感受野，3层3*3卷积层与1层7*7的卷积层有着相同的感受野。
多层小filter的卷积层引入了更多的非线性因素（经过了更多次激活函数），同时减少了模型参数，从而使得模型的分类性能得以提升。
VGG使⽤可复⽤的卷积块构造⽹络。不同的 VGG 模型可通过每个块中卷积层数量和输出通道数量的差异来定义。
块的使⽤导致⽹络定义的⾮常简洁。使⽤块可以有效地设计复杂的⽹络。
在VGG论⽂中，Simonyan和Ziserman尝试了各种架构。特别是他们发现深层且窄的卷积（即3 × 3）⽐较浅层且宽的卷积更有效。
VGG网络使用数据增强：水平翻转、RGB色移（图像的每个像素值减去该像素RGB三通道像素值的平均值）等。

pytorch搭建VGG网络

代码中有详细注释。

import torch
from torchvision import transforms,datasets
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
from torchvision import models

#搭建VGG-16网络
# 数据预处理
"""
Fashion-Mnist数据集大小为24*24,VGG网络输入为224,
所以进行resize,但是这样效果不咋好，实际应用时不建议这样做  
"""

train_transforms = transforms.Compose([transforms.ToTensor(),
                                    transforms.Resize((224,224)),
                                    transforms.Normalize((0.5,),(0.5,))])
test_transforms = transforms.Compose([transforms.ToTensor(),
                                transforms.Resize((224,224)),
                                transforms.Normalize((0.5,),(0.5,))])
# 加载数据集
train_data = datasets.FashionMNIST(root="./data",
                                   train=True,
                                   download=True,
                                   transform=train_transforms)
test_data = datasets.FashionMNIST(root="./data",
                                  train=False,
                                  download=True,
                                  transform=test_transforms)

# 将数据集放入迭代器
batch_size = 2
train_loader = torch.utils.data.DataLoader(train_data,batch_size,shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data,batch_size,shuffle=True)

# 查看迭代器中的图片和标签
# image,label = next(iter(train_loader))
# print("image.shape:{}\n,label.shape:{}".format(image.shape,label.shape))

# 搭建VGG-16网络

cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    # 网络初始化
    def __init__(self,vgg_name):
        super(VGG,self).__init__()
        # 在容器中构建卷积网络
        # net = models.vgg16(pretrained=False)
        self.features = self.make_layers(cfg[vgg_name])
        # self.features = net
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512*7*7,4096),nn.ReLU(),nn.Dropout(),
            nn.Linear(4096,4096),nn.ReLU(),nn.Dropout(),
            nn.Linear(4096,10),nn.ReLU(),
        )
        # 不明白
    def forward(self,x):
        feature = self.features(x)
        out = feature.view(feature.size(0), -1) # 将向量展成
        # out = self.classifier(feature.view(x.shape[0],-1))
        out = self.classifier(out)
        return out
        
    def make_layers(self,cfg):
        layers = []
        in_channel = 1
        for x in cfg:
            if x == "M":
                layers += [nn.MaxPool2d(kernel_size=2,stride=2)]
            else:
                layers += [nn.Conv2d(in_channel,x,kernel_size=3,padding=1),
                           nn.BatchNorm2d(x),nn.ReLU()]
                in_channel = x
        return nn.Sequential(*layers)   # 参数元组化
    
# 使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:",device)
# 网络实例化
net = VGG(vgg_name = 'VGG16').to(device)
# 设置损失函数
criterion = torch.nn.CrossEntropyLoss()
# 设置优化方式
optim = torch.optim.SGD(net.parameters(),lr=0.001,momentum=0.01)
epochs = 20
# 保存数据
write = torch.utils.tensorboard.SummaryWriter("run/example")

# 训练
train_loss = []
test_loss = []

for epoch in range(epochs):
    losses = 0.0
    for image,label in train_loader:
        image,label = image.to(device),label.to(device)
        optim.zero_grad()   # 梯度置零
        y_hat = net(image)
        loss = criterion(y_hat,label)
        loss.backward() # 后向传播
        optim.step() # 更新参数
        losses += loss.item()
    else:
        test_losses = 0
        pr = 0
        with torch.no_grad():
            net.eval()  # 关闭训练模式
            for image,label in test_loader:
                image,label = image.to(device),label.to(device)
                y_hat = net(image)
                loss = criterion(y_hat,label)
                losses += loss.item()
                # 返回矩阵的每一行的最大值和下标
                ps = torch.exp(y_hat)
                top_pos,top_class = ps.topk(1,dim=1)
                equals = top_class == label.view(*top_class)
                pr = torch.mean(equals.type(torch.FloatTensor))
        net.train()
        # 将训练误差和测试误差放到列表中
        train_loss.append(losses/len(train_loader))
        test_loss.append(test_loss/len(test_loader))
        
        print("训练集训练次数:{}/{}:".format((epoch+1),epochs),
                "训练误差:{:.3f}".format(losses/len(train_loader)),
                "测试误差:{:.3f}".format(test_loss/len(test_loader)),
                "模型分类准确率:{:.3f}".format(pr/len(test_loader)))
    
# # 可视化误差
# 将训练误差和测试误差数据从GPU转回CPU 并且将tensor->numpy (因为numpy 是cup only 的数据类型)
train_loss = np.array(torch.tensor(train_loss),device = "cpu")
test_loss = np.array(torch.tensor(test_loss),device = "cpu")
# 可视化
plt.plot(train_loss,labels="train_loss")
plt.plot(test_loss,label="test_loss")
plt.legend()
plt.show()