现代卷积神经网络
深度卷积神经网络(AlexNet)
使用块的网络(VGG)
网络中的网络(NiN)
含并行连结的网络(GoogleNet)
批量规范化(BN)
Tips
Codes&Homework
import torch
from torch import nn
from d2l import torch as d2l
import numpy as np
import matplotlib.pyplot as plt
def batch_norm(x,gamma,beta,moving_mean,moving_var,eps,momentum):
if not torch.is_grad_enabled():
x_hat = (x- moving_mean) / torch.sqrt(moving_var+eps)
else:
assert len(x.shape) in (2,4)
if len(x.shape) == 2:
mean = x.mean(dim = 0)
var = ((x-mean) ** 2).mean(dim = 0,keepdim = True)
else:
mean = x.mean(dim = (0,2,3),keepdim = True)
var = ((x-mean) ** 2).mean(dim = (0,2,3),keepdim = True)
x_hat = (x-mean) / torch.sqrt(var+eps)
moving_mean = momentum * moving_mean + (1.0-momentum) * mean
moving_var = momentum * moving_var + (1.0 - momentum) *var
Y = gamma * x_hat + beta
return Y,moving_mean.data,moving_var.data
'''
[实现自定义网络层的基础设计模式]
①首先用一个单独的函数定义数学原理(比如上面的batch_norm)
②然后将这个功能集成到一个自定义网络层中,代码需要处理——
数据在训练设备上的移动/变量的初始化/变量和值的追踪
'''
class BatchNorm(nn.Module):
def __init__(self,num_features,num_dims):
super().__init__()
if num_dims == 2:
shape = (1,num_features)
else:
shape = (1,num_features,1,1)
self.gamma = nn.Parameter(torch.ones(shape))
self.beta = nn.Parameter(torch.zeros(shape))
self.moving_mean = torch.zeros(shape)
self.moving_var = torch.ones(shape)
def forward(self,x):
if self.moving_mean.device != x.device:
self.moving_mean = self.moving_mean.to(x.device)
self.moving_var = self.moving_var.to(x.device)
Y, self.moving_mean,self.moving_var = batch_norm(x,self.gamma,self.beta,self.moving_mean,self.moving_var,eps = 1e-5,momentum=0.9)
return Y
net = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5), BatchNorm(6, num_dims=4), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), BatchNorm(16, num_dims=4), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(),
nn.Linear(16*4*4, 120), BatchNorm(120, num_dims=2), nn.Sigmoid(),
nn.Linear(120, 84), BatchNorm(84, num_dims=2), nn.Sigmoid(),
nn.Linear(84, 10))
lr, num_epochs, batch_size = 1.0, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())
gamma1 = net[1].gamma.data.reshape((-1,)).numpy()
print('gamma1:',net[1].gamma.reshape((-1,)))
print('beta1:',net[1].beta.reshape((-1,)))
print('gamma1:',net[5].gamma.reshape((-1,)))
print('beta1:',net[5].beta.reshape((-1,)))
print('gamma1:',net[10].gamma.reshape((-1,)))
print('beta1:',net[10].beta.reshape((-1,)))
print('gamma1:',net[13].gamma.reshape((-1,)))
print('beta1:',net[13].beta.reshape((-1,)))
nn.BatchNorm2d(num_features=2,eps=1e-5,momentum=0.9)
def dropout(x,level):
if level < 0 or level >=1:
raise Exception('Dropout level must be in interval [0,1].')
retain_prob = 1. - level
sample = np.random.binormal(n = 1,p = retain_prob,size = x.shape)
x *= sample
x /= retain_prob
return x
残差网络(ResNet)
Tips
Codes&Homework
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
class Residual(nn.Module):
'''
残差块计算逻辑的实现,包含是否使用1x1卷积来处理两个卷积层输入出通道不一致的情况
'''
def __init__(self,input_channels,num_channels,use_1x1conv = False,strides = 1):
'''
初始化残差块
:param input_channels: 输入数据的通道数
:param num_channels: 两个卷积层的输出通道数,
:param use_1x1conv: 是否使用1x1卷积,取决于input_channels == num_channels
:param strides: 卷积运算的步长
'''
super().__init__()
self.conv1 = nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1,stride=strides)
self.conv2 = nn.Conv2d(num_channels,num_channels,kernel_size=3,padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels,num_channels,kernel_size=1,stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
def forward(self,x):
y = F.relu(self.bn1(self.conv1(x)))
y = self.bn2(self.conv2(y))
if self.conv3:
x = self.conv3(x)
y += x
return F.relu(y)
blk = Residual(3,3)
x = torch.rand(4,3,6,6)
y = blk(x)
print('残差块计算后的输出:',y)
print('残差块计算后的输出大小:',y.shape)
b1 = nn.Sequential(nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),
nn.BatchNorm2d(64),nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
def resnet_block(input_channels,num_channels,num_residuals,first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(input_channels,num_channels,use_1x1conv=True,strides=2))
else:
blk.append(Residual(num_channels,num_channels))
return blk
b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))
net = nn.Sequential(b1, b2, b3, b4, b5,
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(), nn.Linear(512, 10))
X = torch.rand(size=(1, 1, 224, 224))
for layer in net:
X = layer(X)
print(layer.__class__.__name__,'output shape:\t', X.shape)
lr, num_epochs, batch_size = 0.05, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())
class ResidualBlock(nn.Module):
expansion = 1
def __init__(self,in_channels,out_channels,stride = 1):
super(Residual,self).__init__()
self.conv1 = nn.Conv2d(in_channels,out_channels,kernel_size=3,
stride = stride,padding = 1,bias = False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels,out_channels,kernel_size=3,
stride = 1,padding=1,bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels*self.expansion:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1,
stride=stride, bias=False),
nn.BatchNorm2d(out_channels * self.expansion)
)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = self.bn2(self.conv2(x))
x = x + self.shortcut(x)
out = F.relu(x)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self,in_channels,out_channels, stride = 1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_channels,out_channels,kernel_size=1,bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(in_channels,out_channels,kernel_size=3,
stride = stride,padding=1,bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.conv3 = nn.Conv2d(out_channels,self.expansion * out_channels,kernel_size=1,bias=False)
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != self.expansion * out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, self.expansion * out_channels,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion * out_channels)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,block,num_blocks,num_classes):
super(ResNet,self).__init__()
self.in_channels = 64
self.conv1 = nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1,bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self.__make_layer(block,64,num_blocks[0],stride = 1)
self.layer2 = self.__make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self.__make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self.__make_layer(block, 512, num_blocks[3], stride=2)
self.fc = nn.Linear(512 * block.expansion, num_blocks)
def __make_layer(self,block,out_channels,num_blocks,stride):
strides = [stride] + [1] * (num_blocks-1)
layers = []
for s in strides:
layers.append(block(self.in_channels,out_channels,s))
self.in_channels = out_channels*block.expansion
return nn.Sequential(*layers)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = F.avg_pool2d(x, 4)
out = self.fc(x)
return out
resnet18 = ResNet(ResidualBlock, [2, 2, 2, 2])
resnet34 = ResNet(ResidualBlock, [3, 4, 6, 3])
resnet50 = ResNet(Bottleneck, [3, 4, 6, 3])
resnet101 = ResNet(Bottleneck, [3, 4, 23, 3])
resnet152 = ResNet(Bottleneck, [3, 8, 36, 3])
稠密连接网络(DenseNet)
Tips
Codes&Homework
import torch
from torch import nn
from d2l import torch as d2l
def conv_block(in_channels,num_channels):
return nn.Sequential(
nn.BatchNorm2d(in_channels),nn.ReLU(),
nn.Conv2d(in_channels,num_channels,kernel_size=3,padding=1,)
)
class DenseBlock(nn.Module):
def __init__(self,num_convs,in_channels,num_channels):
super(DenseBlock,self).__init__()
layer = []
for i in range(num_convs):
layer.append(conv_block(num_channels*i+in_channels,num_channels))
self.net = nn.Sequential(*layer)
def forward(self,x):
for blk in self.net:
y = blk(x)
x = torch.cat((x,y),dim = 1)
return x
blk = DenseBlock(2, 3, 10)
X = torch.randn(4, 3, 8, 8)
Y = blk(X)
print(Y,Y.shape)
def transition_block(in_channels,num_channels):
return nn.Sequential(
nn.BatchNorm2d(in_channels),nn.ReLU(),
nn.Conv2d(in_channels,num_channels,kernel_size=1),
nn.AvgPool2d(kernel_size=2,stride=2)
)
transition_blk = transition_block(23,10)
b1 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]
'''
对于论文中不同的DenseNet[121,169,201,161],
只需要将上面参数的growth_rate和num_convs_in_dense_blocks相应修改即可
'''
blks = []
for i,num_convs in enumerate(num_convs_in_dense_blocks):
blks.append(DenseBlock(num_convs,num_channels,growth_rate))
num_channels += num_convs * growth_rate
if i!= len(num_convs_in_dense_blocks)-1:
blks.append(transition_block(num_channels,num_channels//2))
num_channels = num_channels // 2
net = nn.Sequential(
b1, *blks,
nn.BatchNorm2d(num_channels), nn.ReLU(),
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(),
nn.Linear(num_channels, 10))
class DenseLinearBlock(nn.Module):
def __init__(self,num_linears,in_features,num_features):
super(DenseLinearBlock,self).__init__()
layer = []
for i in range(num_linears):
layer.append(nn.Linear(num_features*i+in_features,num_features))
self.net = nn.Sequential(*layer)
def forward(self,x):
for blk in self.net:
y = blk(x)
x = torch.cat((x,y),dim = 1)
return x
class DenseMLP(nn.Module):
def __init__(self,num_features,num_linears_in_dense_blocks):
super(DenseMLP,self).__init__()
self.num_features = num_features
self.num_linears_in_dense_blocks = num_linears_in_dense_blocks
self.net = nn.Sequential()
blks = []
for i, num_linears in enumerate(self.num_linears_in_dense_blocks):
blks.append(DenseLinearBlock(num_linears,self.num_features,self.num_features//2))
self.num_features += self.num_features // 2 * num_linears
self.net = nn.Sequential(
*blks,
nn.BatchNorm1d(self.num_features), nn.ReLU(),
nn.Linear(self.num_features, 1))
def forward(self,x):
for blk in self.net:
x = blk(x)
print(x.shape)
return x
num_features = 20
num_linears_in_dense_blocks = [4, 4, 4, 4]
x = torch.rand((2,20))
DenseMLP = DenseMLP(num_features,num_convs_in_dense_blocks)
print(DenseMLP(x).shape)