创造数据
x的数据维度为(200,100)
w的数据维度为(100,1)
利用data_iter获得批次数据
import torch
from torch.utils import data
import torch.nn as nn
n_examples=200
n_features=100
true_w=torch.randn(100,1)
true_b=torch.tensor(0.54)
x_=torch.randn(200,100)
y_=x_@true_w+true_b
y_+=torch.normal(0,0.01,y_.shape)
def data_iter(x,y,batch_size):
n_example=len(x)
indices=torch.randperm(n_example)
for i in range(0,n_example,batch_size):
batch_indices=indices[i:min(i+batch_size,n_example)]
yield x[batch_indices],y[batch_indices]
只对参数w进行权重衰减,b不需要
方式一
在优化器的参数中,利用字典的方式指名对待不同的参数实行不同的执行原则
wd=3
net=nn.Linear(100,1)
loss_function=nn.MSELoss()
optimizer=torch.optim.SGD([{'params':net.weight,
'weight_decay':wd},
{'params':net.bias}],lr=0.03)
epochs=3
for epoch in range(epochs):
net.train()
losses=0.0
for x,y in data_iter(x_,y_,batch_size=20):
y_hat=net(x)
loss=loss_function(y_hat,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses+=loss.item()
print(losses)
方式二
方式二用了两个优化器,第一个掌管参数w的优化,第二个负责偏置b的优化,但是这样较为麻烦,需要两次梯度清0,且进行两次梯度更新
wd=3
net=nn.Linear(100,1)
loss_function=nn.MSELoss()
optimizer_w=torch.optim.SGD([net.weight],lr=0.03,weight_decay=wd)
optimizer_b=torch.optim.SGD([net.bias],lr=0.03)
epochs=3
for epoch in range(epochs):
net.train()
losses=0.0
for x,y in data_iter(x_,y_,batch_size=20):
y_hat=net(x)
loss=loss_function(y_hat,y)
optimizer_w.zero_grad()
optimizer_b.zero_grad()
loss.backward()
optimizer_w.step()
optimizer_b.step()
losses+=loss.item()
print(losses)