0
点赞
收藏
分享

微信扫一扫

Ex1_机器学习_吴恩达课程练习

进击的铁雾 2022-01-20 阅读 75

Ex1_机器学习_吴恩达课程

文章目录

0. Pre-condition

# Programming exercise 1 for week 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ex1_function as func  # A customized class

00. Self-created Functions

  • computeCost(X, y, theta):计算损失

    在这里插入图片描述

    # Compute the cost 计算损失
    def computeCost(X, y, theta):
        inner = np.power((np.dot(X, np.transpose(theta))) - y, 2)
        return np.sum(inner) / (2 * len(X))
    
  • gradientDescent(X, y, theta, alpha, iters):梯度下降实现

在这里插入图片描述
在这里插入图片描述

在这里插入图片描述

# Use Gradient Descent to modify parameters 利用梯度下降修正参数
def gradientDescent(X, y, theta, alpha, iters):
    row = X.shape[0]
    cost = np.zeros(iters)
    parameters = int(theta.flatten().shape[1])
    temp = np.matrix(np.zeros(theta.shape))
    for i in range(iters):
        # 通过Vectorization求解
        temp = theta - (alpha / row) * (X * theta.T - y).T * X
        # 不通过Vectorization求解
        # error = (np.dot(X, theta.T) - y)
        # for j in range(parameters):
        #     term = np.multiply(error, X[:, j])
        #     temp[0, j] = theta[0, j] - ((alpha / row) * np.sum(term))
        theta = temp
        cost[i] = computeCost(X, y, theta)
    return theta, cost
  • Feature Normalization(X):特征正规化

    (注意,此处是否包含标签仍待考虑。)

    # Feature Normalization 特征正规化
    def featureNormalize(df):
        row = df.shape[0]
        col = df.shape[1]
        mean = np.mean(df.iloc[:, 0: col])
        std = np.std(df.iloc[:, 0: col])
        df.iloc[:, 0: col] -= mean
        df.iloc[:, 0: col] /= std
        return df
    
  • Normal Equation(X, y):正规方程求 theta

在这里插入图片描述

# Normal Equation 正规方程
def normalEquation(X, y):
    # theta = np.linalg.pinv(np.transpose(X).dot(X)).dot(np.transpose(X)).dot(y)
    theta = np.linalg.pinv(X.T@(X))@(X.T)@(y)
    return theta

1. Simple function

# 1. Simple function

A = np.eye(5)
print(A)

2. Linear Regression with one variable

# 2. Linear regression with one variable

path_data1 = 'ex1data1.txt'
df_data1 = pd.read_csv(path_data1, names=['Population', 'Profit'])
print(df_data1.describe())  # 获取数据相关信息
print(df_data1.head(10))    # 读前n行,默认为5
print(df_data1.info())      # 查看索引、数据类型和内存信息

2.1 Plotting data

# 2.1 Plot the data 绘图

df_data1.plot(kind='scatter', x='Population', y='Profit', figsize=(8, 5),
              title='Predictions on Profit based on Population')

2.2 Gradient Descent

# # 2.2 Gradient Descent 梯度下降

# 为了便于运算,于首列前插入一列全1的向量
df_data1.insert(0, 'ONE', 1)
# 行数,列数,预测值,参照值,训练参数,学习率,迭代次数
row = df_data1.shape[0]
col = df_data1.shape[1]
X = np.matrix(df_data1.iloc[:, 0: col - 1])
y = np.matrix(df_data1.iloc[:, col - 1: col])
theta = np.matrix([0, 0])
alpha = 0.01
iters = 1500
# 梯度下降处理
res_theta, res_cost = func.gradientDescent(X, y, theta, alpha, iters)

2.3 Debugging

No code.

2.4 Visualizing

# 2.3 Visualization 可视化

# Figure about the linear regression prediction 线性回归预测图
x_data = np.linspace(df_data1.Population.min(), df_data1.Population.max(), 100)
hypo = res_theta[0, 0] + (res_theta[0, 1] * x_data)  # 假设函数
fig, fig_prediction = plt.subplots(figsize=(8, 5))
fig_prediction.plot(x_data, hypo, 'r', label='Prediction')
fig_prediction.scatter(df_data1['Population'], df_data1['Profit'], label='Training data')
fig_prediction.legend(loc=2)  # legend 为显示图例函数,loc 按照象限设置图例显示位置
fig_prediction.set_xlabel('Population')
fig_prediction.set_ylabel('Profit')
fig_prediction.set_title('Predictions on Profit based on Population data')

# Figure about the changes of the cost 损失值变化图
fig, fig_cost = plt.subplots(figsize=(8, 5))
x_cost = np.arange(iters)  # np.arange() 返回等差数组
fig_cost.plot(x_cost, res_cost, 'r')
fig_cost.set_xlabel('Iteration')
fig_cost.set_ylabel('Cost')
fig_cost.set_title('Value of cost of every iteration during training')

2.5 Optional lib_scikit-learn

Use an additional library to help do the linear regression.

The library is “scikit-learn”.

# 2.4 Optional Lib_scikit-learn

from sklearn import linear_model

# Model fitting
model = linear_model.LinearRegression()
model.fit(X, y)
# Visualization
x = np.array(X[:, 1].A1)
f = model.predict(X).flatten()
fig, ax = plt.subplots(figsize=(8,5))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(df_data1['Population'], df_data1['Profit'], label='Training Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')

3. Linear Regression with multiple variables

# 3. Linear regression with multiple variables

path_data2 = 'ex1data2.txt'
df_data2 = pd.read_csv(path_data2, names=['Size', 'Bedrooms', 'Price'])

3.1 Feature Normalization

# 3.1 Feature Normalization 特征正规化

df_data2 = func.featureNormalize(df_data2)

3.2 Gradient Descent

# 3.2 Gradient Descent 梯度下降

df_data2.insert(0, 'ONE', 1)
row2 = df_data2.shape[0]
col2 = df_data2.shape[1]
X2 = np.matrix(df_data2.iloc[:, 0: col2 - 1])
y2 = np.matrix(df_data2.iloc[:, col2 - 1: col2])
theta2 = np.matrix([0, 0, 0])
alpha2 = 0.03  # max value: 1
iters2 = 1000
res_theta2, res_cost2 = func.gradientDescent(X2, y2, theta2, alpha2, iters2)

# Visualization
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(np.arange(iters2), res_cost2, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Value of cost in every iteration during training')
plt.show()

3.3 Normal Equation

# 3.3 Normal Equation 正规方程

res = func.normalEquation(X2, y2)
举报

相关推荐

0 条评论