Ex1_机器学习_吴恩达课程

文章目录

Ex1_机器学习_吴恩达课程
0. Pre-condition
00. Self-created Functions
1. Simple function
2. Linear Regression with one variable
3. Linear Regression with multiple variables

0. Pre-condition

# Programming exercise 1 for week 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ex1_function as func  # A customized class

00. Self-created Functions

computeCost(X, y, theta)：计算损失

在这里插入图片描述

# Compute the cost 计算损失
def computeCost(X, y, theta):
    inner = np.power((np.dot(X, np.transpose(theta))) - y, 2)
    return np.sum(inner) / (2 * len(X))

gradientDescent(X, y, theta, alpha, iters)：梯度下降实现

在这里插入图片描述

# Use Gradient Descent to modify parameters 利用梯度下降修正参数
def gradientDescent(X, y, theta, alpha, iters):
    row = X.shape[0]
    cost = np.zeros(iters)
    parameters = int(theta.flatten().shape[1])
    temp = np.matrix(np.zeros(theta.shape))
    for i in range(iters):
        # 通过Vectorization求解
        temp = theta - (alpha / row) * (X * theta.T - y).T * X
        # 不通过Vectorization求解
        # error = (np.dot(X, theta.T) - y)
        # for j in range(parameters):
        #     term = np.multiply(error, X[:, j])
        #     temp[0, j] = theta[0, j] - ((alpha / row) * np.sum(term))
        theta = temp
        cost[i] = computeCost(X, y, theta)
    return theta, cost

Feature Normalization(X)：特征正规化

（注意，此处是否包含标签仍待考虑。）

# Feature Normalization 特征正规化
def featureNormalize(df):
    row = df.shape[0]
    col = df.shape[1]
    mean = np.mean(df.iloc[:, 0: col])
    std = np.std(df.iloc[:, 0: col])
    df.iloc[:, 0: col] -= mean
    df.iloc[:, 0: col] /= std
    return df

Normal Equation(X, y)：正规方程求 theta

在这里插入图片描述

# Normal Equation 正规方程
def normalEquation(X, y):
    # theta = np.linalg.pinv(np.transpose(X).dot(X)).dot(np.transpose(X)).dot(y)
    theta = np.linalg.pinv(X.T@(X))@(X.T)@(y)
    return theta

1. Simple function

# 1. Simple function

A = np.eye(5)
print(A)

2. Linear Regression with one variable

# 2. Linear regression with one variable

path_data1 = 'ex1data1.txt'
df_data1 = pd.read_csv(path_data1, names=['Population', 'Profit'])
print(df_data1.describe())  # 获取数据相关信息
print(df_data1.head(10))    # 读前n行，默认为5
print(df_data1.info())      # 查看索引、数据类型和内存信息

2.1 Plotting data

# 2.1 Plot the data 绘图

df_data1.plot(kind='scatter', x='Population', y='Profit', figsize=(8, 5),
              title='Predictions on Profit based on Population')

2.2 Gradient Descent

# # 2.2 Gradient Descent 梯度下降

# 为了便于运算，于首列前插入一列全1的向量
df_data1.insert(0, 'ONE', 1)
# 行数，列数，预测值，参照值，训练参数，学习率，迭代次数
row = df_data1.shape[0]
col = df_data1.shape[1]
X = np.matrix(df_data1.iloc[:, 0: col - 1])
y = np.matrix(df_data1.iloc[:, col - 1: col])
theta = np.matrix([0, 0])
alpha = 0.01
iters = 1500
# 梯度下降处理
res_theta, res_cost = func.gradientDescent(X, y, theta, alpha, iters)

2.3 Debugging

No code.

2.4 Visualizing

# 2.3 Visualization 可视化

# Figure about the linear regression prediction 线性回归预测图
x_data = np.linspace(df_data1.Population.min(), df_data1.Population.max(), 100)
hypo = res_theta[0, 0] + (res_theta[0, 1] * x_data)  # 假设函数
fig, fig_prediction = plt.subplots(figsize=(8, 5))
fig_prediction.plot(x_data, hypo, 'r', label='Prediction')
fig_prediction.scatter(df_data1['Population'], df_data1['Profit'], label='Training data')
fig_prediction.legend(loc=2)  # legend 为显示图例函数，loc 按照象限设置图例显示位置
fig_prediction.set_xlabel('Population')
fig_prediction.set_ylabel('Profit')
fig_prediction.set_title('Predictions on Profit based on Population data')

# Figure about the changes of the cost 损失值变化图
fig, fig_cost = plt.subplots(figsize=(8, 5))
x_cost = np.arange(iters)  # np.arange() 返回等差数组
fig_cost.plot(x_cost, res_cost, 'r')
fig_cost.set_xlabel('Iteration')
fig_cost.set_ylabel('Cost')
fig_cost.set_title('Value of cost of every iteration during training')

2.5 Optional lib_scikit-learn

Use an additional library to help do the linear regression.

The library is “scikit-learn”.

# 2.4 Optional Lib_scikit-learn

from sklearn import linear_model

# Model fitting
model = linear_model.LinearRegression()
model.fit(X, y)
# Visualization
x = np.array(X[:, 1].A1)
f = model.predict(X).flatten()
fig, ax = plt.subplots(figsize=(8,5))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(df_data1['Population'], df_data1['Profit'], label='Training Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')

3. Linear Regression with multiple variables

# 3. Linear regression with multiple variables

path_data2 = 'ex1data2.txt'
df_data2 = pd.read_csv(path_data2, names=['Size', 'Bedrooms', 'Price'])

3.1 Feature Normalization

# 3.1 Feature Normalization 特征正规化

df_data2 = func.featureNormalize(df_data2)

3.2 Gradient Descent

# 3.2 Gradient Descent 梯度下降

df_data2.insert(0, 'ONE', 1)
row2 = df_data2.shape[0]
col2 = df_data2.shape[1]
X2 = np.matrix(df_data2.iloc[:, 0: col2 - 1])
y2 = np.matrix(df_data2.iloc[:, col2 - 1: col2])
theta2 = np.matrix([0, 0, 0])
alpha2 = 0.03  # max value: 1
iters2 = 1000
res_theta2, res_cost2 = func.gradientDescent(X2, y2, theta2, alpha2, iters2)

# Visualization
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(np.arange(iters2), res_cost2, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Value of cost in every iteration during training')
plt.show()

3.3 Normal Equation

# 3.3 Normal Equation 正规方程

res = func.normalEquation(X2, y2)

Ex1_机器学习_吴恩达课程练习

Ex1_机器学习_吴恩达课程

文章目录

0. Pre-condition

00. Self-created Functions

computeCost(X, y, theta)：计算损失

gradientDescent(X, y, theta, alpha, iters)：梯度下降实现

Feature Normalization(X)：特征正规化

Normal Equation(X, y)：正规方程求 theta

1. Simple function

2. Linear Regression with one variable

2.1 Plotting data

2.2 Gradient Descent

2.3 Debugging

2.4 Visualizing

2.5 Optional lib_scikit-learn

3. Linear Regression with multiple variables

3.1 Feature Normalization

3.2 Gradient Descent

3.3 Normal Equation