一元线性回归---实验部分
1. 一元线性回归
1.1 梯度下降法
下面我们就导入数据集,并查看:
代码:
import numpy as np
from numpy import genfromtxt
data=np.genfromtxt('data.csv',delimiter=',')
data[:3,]
结果:
xdata=data[:,0]
ydata=data[:,1]
import matplotlib.pyplot as plt
plt.scatter(xdata,ydata)
plt.show()
结果:
代码:
sumx=0
sumy=0
Numerator=0
denominatorX=0
denominatorY=0
for i in range(len(xdata)):
sumx=sumx+xdata[i]
sumy=sumy+ydata[i]
averX=sumx/len(xdata)
averY=sumy/len(ydata)
for i in range(len(xdata)):
Numerator = Numerator+(xdata[i]-averX)*(ydata[i]-averY)
denominatorX = denominatorX+(xdata[i]-averX)**2
denominatorY = denominatorY+(ydata[i]-averY)**2
rxy = Numerator/(np.sqrt(denominatorX*denominatorY))
print(rxy)
求的结果:
代码:
# 定义假设函数
def hypothesis(theta0,theta1,x):
return theta0+theta1*x
# 定义损失函数
def costFunction(theta0,theta1,xdata,ydata):
totalError=0
for i in range(len(xdata)):
totalError+=(ydata[i]-hypothesis(theta0,theta1,xdata[i]))
return totalError/(2*len(xdata))
# 定义梯度下降函数
def gradient_descent_run(theta0,theta1,learn_rate,xdata,ydata,epochs):
m=len(xdata) # 数据集长度
#下面进行epochs次迭代:
for i in range(epochs):
theta0_gradient=0
theta1_gradient=0
for j in range(0,len(xdata)):
theta0_gradient+=(1/m)*(hypothesis(theta0,theta1,xdata[j])-ydata[j])
theta1_gradient+=(1/m)*(hypothesis(theta0,theta1,xdata[j])-ydata[j])*xdata[j]
theta0=theta0-theta0_gradient*learn_rate
theta1=theta1-theta1_gradient*learn_rate
if i%10==0:
print("epochs={}".format(i))
plt.plot(xdata,ydata,'b.')
plt.plot(xdata,theta1*xdata+theta0,'r')
plt.show()
return theta0,theta1
#学习率
lr = 0.0001
# 截距
theta0 = 0
# 斜率
theta1 = 0
# 最大迭代次数
epochs = 50
theta0,theta1=gradient_descent_run(theta0,theta1,lr,xdata,ydata,epochs)
结果:



1.2 sklearn一元线性回归
示例:
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
# 导入数据
data=np.genfromtxt("data.csv",delimiter=',')
# 修改维度
x_data=data[:,0,np.newaxis]
y_data=data[:,1,np.newaxis]
# 建立模型
model=LinearRegression()
model.fit(x_data,y_data)
#绘制图像
plt.scatter(x_data,y_data)
plt.plot(x_data,model.predict(x_data),'r')
plt.show()
结果: