PyTorch入门八 || 循环神经网络(基础)
RNN专门用于处理带序列模式的数据,如天气预测,自然语言处理
预测降雨的例子
给一个气象表,包含(温度|气压|是否下雨)这三个信息字段
我们利用前三天的气象信息来预测今天是否会下雨,那么需要将数据划分为四个一组,前三个作为input,第四个作为标签,是否下雨。
原理图
h0 与 x1 作为输入 经过 RNN Cell 得到输出h1,再将h1和x2作为输入 经过RNN Cell得到h2输出,一直循环下去,h0可以是CNN的输出,这样就将CNN与RNN接到一起,实现图片转文字,也可以将h0设置为与 h1维度相同的向量,初始为0,作为输入;
其中RNN Cell 其实就是一个LinearLayer,这几个RNN Cell都是同一个
如何使用RNN Cell
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
cell = torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)
dataset = torch.randn(seq_len,batch_size,input_size)
hidden = torch.zeros(batch_size,hidden_size) #全0张量
for idx,input in enumerate(dataset):
print('='*20,idx,'='*20)
print('Input size:',input.shape)
hidden = cell(input,hidden)
print('outputs size:',hidden.shape)
print(hidden)
如何使用RNN
使用RNN 时就是代码自动做循环
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1
cell = torch.nn.RNN(input_size=input_size,hidden_size=hidden_size,
num_layers=num_layers)
inputs = torch.randn(seq_len,batch_size,input_size)
hidden = torch.zeros(num_layers,batch_size,hidden_size) #全0张量
out,hidden = cell(inputs,hidden)
print('outputs size:',out.shape)
print('output:',out)
print('hidden size:',hidden.shape)
print('hidden:',hidden)
多层RNN结构
同样颜色的RNN Cell 是同一个
例子
第一步:将字符向量化
第二步:训练,如何计算loss
将RNN看作一个分类的分布,与分类算法的交叉熵loss一致
code:RNNCell版
import torch
batch_size = 1
input_size = 4
hidden_size = 4
idx2char = ['e','h','l','o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]
one_hot_lookup = [[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(-1,batch_size,input_size)
labels = torch.LongTensor(y_data).view(-1,1)
class Model(torch.nn.Module):
def __init__(self,input_size,hidden_size,batch_size):
super(Model, self).__init__()
#batch_size 只有在构造 h0 的时候才需要
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnncell = torch.nn.RNNCell(input_size=self.input_size,
hidden_size=self.hidden_size)
def forward(self,input,hidden):
hidden = self.rnncell(input,hidden)
return hidden
#生成初始 h0 向量
def init_hidden(self):
return torch.zeros(self.batch_size,self.hidden_size)
net = Model(input_size,hidden_size,batch_size)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.1)
if __name__=='__main__':
for epoch in range(15):
loss = 0
optimizer.zero_grad()
hidden = net.init_hidden()
print('Predicted string:',end='')
for input,label in zip(inputs,labels):
hidden = net(input,hidden)
loss += criterion(hidden,label)
_,idx = hidden.max(dim=1)
print(idx2char[idx.item()],end='')
loss.backward()
optimizer.step()
print(',Epoch [%d/15] loss=%.4f'%(epoch+1,loss.item()))
code:RNN版
import torch
batch_size = 1
input_size = 4
hidden_size = 4
num_layers = 1
seq_len = 5
idx2char = ['e','h','l','o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]
one_hot_lookup = [[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(seq_len,batch_size,input_size)
labels = torch.LongTensor(y_data)
class Model(torch.nn.Module):
def __init__(self,input_size,hidden_size,batch_size,num_layers=1):
super(Model, self).__init__()
#batch_size 只有在构造 h0 的时候才需要
self.num_layers = num_layers
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnn = torch.nn.RNN(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=num_layers)
def forward(self,input):
hidden = torch.zeros(self.num_layers,self.batch_size,self.hidden_size)
out,_ = self.rnn(input,hidden)
return out.view(-1,self.hidden_size)
net = Model(input_size,hidden_size,batch_size,num_layers)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.05)
if __name__=='__main__':
for epoch in range(50):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs,labels)
loss.backward()
optimizer.step()
_,idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted:',''.join([idx2char[x] for x in idx]),end='')
print(',Epoch [%d/15] loss=%.4f'%(epoch+1,loss.item()))
改进:
独热向量具有一些缺点,比如密度低,维度高,硬编码。因此引入Embedding的概念
网络结构
import torch
batch_size = 1
input_size = 4
hidden_size = 8
num_layers = 2
seq_len = 5
embedding_size = 10
num_class = 4
idx2char = ['e','h','l','o']
x_data = [[1,0,2,2,3]]
y_data = [3,1,2,3,2]
inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)
class Model2(torch.nn.Module):
def __init__(self):
super(Model2, self).__init__()
self.emb = torch.nn.Embedding(input_size,embedding_size)
self.rnn = torch.nn.RNN(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
self.fc = torch.nn.Linear(hidden_size,num_class)
def forward(self,x):
hidden = torch.zeros(num_layers,x.size(0),hidden_size)
x = self.emb(x)
x,_ = self.rnn(x,hidden)
x = self.fc(x)
return x.view(-1,num_class)
net = Model2()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.05)
if __name__=='__main__':
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs,labels)
loss.backward()
optimizer.step()
_,idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted:',''.join([idx2char[x] for x in idx]),end='')
print(',Epoch [%d/15] loss=%.4f'%(epoch+1,loss.item()))