使用python 机器学习给出代码设计指数回归模型研究不同宽度,厚度,重量,车间温度,退火类型(O态,H2态),冷却类型(自然冷却,单面风机,双面风机)下, 物料温度随时间增长而指数衰减的规律-CFANZ编程社区

# 根据以下代码 使用python 机器学习 给出代码 设计指数回归模型 研究不同宽度,厚度,重量,车间温度,退火类型(O态,H2态),冷却类型(自然冷却,单面风机,双面风机)下, 物料温度随时间增长而指数衰减的规律

# 导入所需的库
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# 指定支持中文的字体，例如SimHei或者Microsoft YaHei
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

# 生成数据
def fun_data():
    # 定义物料数量
    num_materials = 40

    # 定义每个物料的温度测量次数和时间间隔
    num_measurements = 10
    time_interval_hours = 6

    # 创建一个时间数组，模拟测量时间点
    measurement_times = np.arange(0, num_measurements * time_interval_hours, time_interval_hours)

    # 创建一个空的DataFrame来存储数据
    data = pd.DataFrame(columns=['Material_ID', 'Measurement_Time', 'Width', 'Thickness', 'Weight', 'Workshop_Temperature',
                                 'Annealing_Type', 'Cooling_Type', 'Temperature'])

    # 模拟每个物料的数据
    for material_id in range(1, num_materials + 1):
        # 生成物料特征数据（宽度、厚度、重量、车间温度、退火类型、冷却类型）
        width = np.random.uniform(5, 20)  # 宽度范围在5到20之间
        thickness = np.random.uniform(1, 5)  # 厚度范围在1到5之间
        weight = np.random.uniform(10, 100)  # 重量范围在10到100之间
        workshop_temperature = np.random.uniform(20, 30)  # 车间温度范围在20到30之间
        annealing_type = np.random.choice(['O态', 'H2态'])  # 随机选择退火类型
        cooling_type = np.random.choice(['自然冷却', '单面风机', '双面风机'])  # 随机选择冷却类型

        # 模拟温度数据（指数衰减）
        initial_temperature = np.random.uniform(100, 200)  # 初始温度范围在100到200之间
        decay_rate = np.random.uniform(0.01, 0.1)  # 衰减速率范围在0.01到0.1之间
        temperature_data = initial_temperature * np.exp(-decay_rate * measurement_times)

        # 创建一个临时DataFrame来存储物料的数据
        material_data = pd.DataFrame({
            'Material_ID': [material_id] * num_measurements,
            'Measurement_Time': measurement_times,
            'Width': [width] * num_measurements,
            'Thickness': [thickness] * num_measurements,
            'Weight': [weight] * num_measurements,
            'Workshop_Temperature': [workshop_temperature] * num_measurements,
            'Annealing_Type': [annealing_type] * num_measurements,
            'Cooling_Type': [cooling_type] * num_measurements,
            'Temperature': temperature_data
        })

        # 将物料数据添加到总体数据中
        data = pd.concat([data, material_data], ignore_index=True)
        
        # 修改数据类型
        data['Measurement_Time'] = data['Measurement_Time'].astype("float64")
        
    return data

# 生成数据
data = fun_data()

# 导入所需的库
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# 指定支持中文的字体，例如SimHei或者Microsoft YaHei
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

# 生成数据
def fun_data():
    # 定义物料数量
    num_materials = 40

    # 定义每个物料的温度测量次数和时间间隔
    num_measurements = 10
    time_interval_hours = 6

    # 创建一个时间数组，模拟测量时间点
    measurement_times = np.arange(0, num_measurements * time_interval_hours, time_interval_hours)

    # 创建一个空的DataFrame来存储数据
    data = pd.DataFrame(columns=['Material_ID', 'Measurement_Time', 'Width', 'Thickness', 'Weight', 'Workshop_Temperature',
                                 'Annealing_Type', 'Cooling_Type', 'Temperature'])

    # 模拟每个物料的数据
    for material_id in range(1, num_materials + 1):
        # 生成物料特征数据（宽度、厚度、重量、车间温度、退火类型、冷却类型）
        width = np.random.uniform(5, 20)  # 宽度范围在5到20之间
        thickness = np.random.uniform(1, 5)  # 厚度范围在1到5之间
        weight = np.random.uniform(10, 100)  # 重量范围在10到100之间
        workshop_temperature = np.random.uniform(20, 30)  # 车间温度范围在20到30之间
        annealing_type = np.random.choice(['O态', 'H2态'])  # 随机选择退火类型
        cooling_type = np.random.choice(['自然冷却', '单面风机', '双面风机'])  # 随机选择冷却类型

        # 模拟温度数据（指数衰减）
        initial_temperature = np.random.uniform(100, 200)  # 初始温度范围在100到200之间
        decay_rate = np.random.uniform(0.01, 0.1)  # 衰减速率范围在0.01到0.1之间
        temperature_data = initial_temperature * np.exp(-decay_rate * measurement_times)

        # 创建一个临时DataFrame来存储物料的数据
        material_data = pd.DataFrame({
            'Material_ID': [material_id] * num_measurements,
            'Measurement_Time': measurement_times,
            'Width': [width] * num_measurements,
            'Thickness': [thickness] * num_measurements,
            'Weight': [weight] * num_measurements,
            'Workshop_Temperature': [workshop_temperature] * num_measurements,
            'Annealing_Type': [annealing_type] * num_measurements,
            'Cooling_Type': [cooling_type] * num_measurements,
            'Temperature': temperature_data
        })

        # 将物料数据添加到总体数据中
        data = pd.concat([data, material_data], ignore_index=True)
        
        # 修改数据类型
        data['Measurement_Time'] = data['Measurement_Time'].astype("float64")
        
    return data

# 生成数据
data = fun_data()

# 查看数据概览
data.head()

# 查看数据描述统计信息
data.describe()

# 查看数据分布情况（直方图）
data.hist(figsize=(12,10))
plt.show()

# 查看不同退火类型和冷却类型下的物料温度分布情况（箱线图）
sns.boxplot(x='Annealing_Type', y='Temperature', hue='Cooling_Type', data=data)
plt.show()

# 查看物料温度与其他特征的相关性（热力图）
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.show()

# 选择特征变量和目标变量
X = data[['Width', 'Thickness', 'Weight', 'Workshop_Temperature', 'Measurement_Time', 'Annealing_Type', 'Cooling_Type']]
y = data['Temperature']

# 对类别变量进行独热编码
X = pd.get_dummies(X, columns=['Annealing_Type', 'Cooling_Type'])

# 划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建线性回归模型
model = LinearRegression()

# 训练模型
model.fit(X_train, y_train)

# 预测测试集
y_pred = model.predict(X_test)

# 评估模型
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print('MSE:', mse)
print('RMSE:', rmse)
print('R2:', r2)

# 查看模型系数
coefficients = pd.DataFrame(model.coef_, index=X.columns, columns=['Coefficient'])
coefficients

# 查看模型截距
intercept = model.intercept_
print('Intercept:', intercept)

# 绘制真实值和预测值的散点图
plt.scatter(y_test, y_pred)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()

# 绘制残差图
residuals = y_test - y_pred
plt.scatter(y_pred, residuals)
plt.xlabel('Predictions')
plt.ylabel('Residuals')
plt.show()

# 对数变换目标变量，使其更接近正态分布
y_log = np.log(y)

# 重新划分训练集和测试集
X_train, X_test, y_train_log, y_test_log = train_test_split(X, y_log, test_size=0.2, random_state=42)

# 重新训练模型
model.fit(X_train, y_train_log)

# 重新预测测试集
y_pred_log = model.predict(X_test)

# 重新评估模型
mse_log = mean_squared_error(y_test_log, y_pred_log)
rmse_log = np.sqrt(mse_log)
r2_log = r2_score(y_test_log, y_pred_log)
print('MSE (log):', mse_log)
print('RMSE (log):', rmse_log)
print('R2 (log):', r2_log)

# 重新查看模型系数
coefficients_log = pd.DataFrame(model.coef_, index=X.columns, columns=['Coefficient (log)'])
coefficients_log

# 重新查看模型截距
intercept_log = model.intercept_
print('Intercept (log):', intercept_log)

# 重新绘制真实值和预测值的散点图（指数还原）
plt.scatter(np.exp(y_test_log), np.exp(y_pred_log))
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()

# 重新绘制残差图（指数还原）
residuals_log = np.exp(y_test_log) - np.exp(y_pred_log)
plt.scatter(np.exp(y_pred_log), residuals_log)
plt.xlabel('Predictions')
plt.ylabel('Residuals')
plt.show()