代码:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 文件路径
path='ex1data1.txt'
# 读取文件
data=pd.read_csv(path,names=['Population','Profit'])
# 构造数据集
data.insert(0,'ones',1)
# 标签
X=data.iloc[:,0:-1]
X=X.values
# 真实值
y=data.iloc[:,-1]
y=y.values.reshape(97,1)
# 损失函数
def costfunction(X,y,theta):
inner=np.power(X@theta-y,2)
return np.sum(inner)/(2*len(X))
# theta初始化
theta=np.zeros((2,1))
# 梯度下降
def grandientDescent(X,y,theta,alpha,iters):
costs=[]
for i in range(iters):
theta=theta-(X.T@(X@theta-y))*alpha/len(X)
cost=costfunction(X,y,theta)
costs.append(cost)
return theta,costs
# 参数初始化
alpha=0.02
iters=2000
theta,costs=grandientDescent(X,y,theta,alpha,iters)
# 可视化损失函数
fig,ax=plt.subplots()
ax.plot(np.arange(iters),costs,'b')
ax.set(xlabel='iters',ylabel='costs',title='cost vs iters')
plt.show()
# 拟合函数可视化
x=np.linspace(y.min(),y.max(),100)
y_=theta[0,0]+theta[1,0]*x
fig,ax=plt.subplots()
ax.scatter(X[:,1],y,label='training data')
ax.plot(x,y_,'r',label='predict')
ax.legend()
ax.set(xlabel='Population',ylabel='profit')
plt.show()
损失函数:
拟合效果: