单输出感知机的梯度推导
from torch.nn import functional as F
x = torch.randn(1,10)
w = torch.randn(1,10,requires_grad=True)
o = torch.sigmoid(x@w.t())
loss = F.mse_loss(torch.ones(1,1),o)
loss.backward() #loss对W的偏导
print(loss.shape) #标量
print(w.grad)
torch.Size([])
tensor([[ 0.1487, -0.2792, -0.1376, 0.0631, 0.1375, -0.0981, 0.4706, -0.1833,
-0.0650, 0.0343]])
多输出感知机
x = torch.randn(1,10)
w = torch.randn(2,10,requires_grad=True)
o = torch.sigmoid(x@w.t())
loss = F.mse_loss(torch.ones(1,2),o)
loss.backward()
print(loss)
print(w.grad)
tensor(0.1992, grad_fn=<MseLossBackward0>)
tensor([[-0.0304, -0.1594, -0.1914, -0.2953, 0.0601, 0.0641, -0.1157, -0.1107,
0.1565, 0.0722],
[-0.0006, -0.0033, -0.0039, -0.0061, 0.0012, 0.0013, -0.0024, -0.0023,
0.0032, 0.0015]])
取中间层将不能直接求导的,间接形成链子进行求导
x = torch.tensor(1.)
w1 = torch.tensor(2.,requires_grad=True)
b1 = torch.tensor(1.)
w2 = torch.tensor(2.,requires_grad=True)
b2 = torch.tensor(1.)
y1 = x*w1 + b1
y2 = y1*w2 + b2
dy2_dy1= torch.autograd.grad(y2,[y1],retain_graph=True)[0]
dy1_dw1= torch.autograd.grad(y1,[w1],retain_graph=True)[0] #将数据从元祖中取出
dy2_dw1= torch.autograd.grad(y2,[w1],retain_graph=True)[0]
l = dy2_dy1*dy1_dw1 #链式法则等同于dy2_dw1
o = dy2_dw1 #直接求导
print(l)
print(o)
tensor(2.)
tensor(2.)
梯度下降法简单来说就是一种寻找目标函数最小化的方法
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
import torch
def himmelblau(x): #定义检测函数
return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
#画图 这部分省去
x = np.arange(-6,6,0.1)
y = np.arange(-6,6,0.1)
print('x,y range:',x.shape,y.shape)
X,Y = np.meshgrid(x,y) #快速生成坐标矩阵,
print('x,y maps:',X.shape,Y.shape)
Z = himmelblau([X,Y])
fig = plt.figure('himmeblau')
ax = fig.gca(projection='3d')
ax.view_init(60,-30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()
#梯度下降
#[1.,0.],[-4.,0.],[4.,0.] #初始化的值
x = torch.tensor([0.,0.],requires_grad=True)
optimizer = torch.optim.Adam([x],lr=1e-3) #待优化参数,学习率默认。每次更新在这里进行
for step in range(20000):
pred = himmelblau(x)
optimizer.zero_grad() #将模型参数梯度初始化为0
pred.backward() #反向传播计算梯度, 当得到的梯度值为0时是极小值,这时候的x,y值代入模型值的局部最小值
optimizer.step() #更新所有参数,将x,y更新为新得到的,x'=x-lr*x梯度,y'=y-lr*y梯度
if step % 2000 == 0:
print('step{}:x={},f(x)={}'.format(step,x.tolist(),pred.item()))
step0:x=[0.0009999999310821295, 0.0009999999310821295],f(x)=170.0
step2000:x=[2.3331806659698486, 1.9540694952011108],f(x)=13.730916023254395
step4000:x=[2.9820079803466797, 2.0270984172821045],f(x)=0.014858869835734367
step6000:x=[2.999983549118042, 2.0000221729278564],f(x)=1.1074007488787174e-08
step8000:x=[2.9999938011169434, 2.0000083446502686],f(x)=1.5572823031106964e-09
step10000:x=[2.999997854232788, 2.000002861022949],f(x)=1.8189894035458565e-10
step12000:x=[2.9999992847442627, 2.0000009536743164],f(x)=1.6370904631912708e-11
step14000:x=[2.999999761581421, 2.000000238418579],f(x)=1.8189894035458565e-12
step16000:x=[3.0, 2.0],f(x)=0.0
step18000:x=[3.0, 2.0],f(x)=0.0 #这时取得的已经是最优解
补充说明:
tolist()函数:
用于将数组或矩阵转换成列表,在对数据集预处理时常会用到
import numpy as np
a = np.ones(5) # array([1., 1., 1., 1., 1.])
print(a.tolist()) # [1.0, 1.0, 1.0, 1.0, 1.0]
b = [[1, 2, 3], [0, 9, 8, 0]]
c = np.mat(b)
print(c) #[[list([1, 2, 3]) list([0, 9, 8, 0])]]
print(c.tolist()) #[[[1, 2, 3], [0, 9, 8, 0]]]