代码
https://blog.51cto.com/u_16055028/7673781
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
In [2]:
# 生成模拟数据
np.random.seed(0)
n_samples = 1000
n_features = 3 # 物料类型、冷却方式、时间
In [3]:
# 随机生成物料类型和冷却方式
materials = np.random.choice(['宽料', '窄料', '厚料', '小卷料'], size=n_samples)
cooling_methods = np.random.choice(['自然冷却', '1号风机', '2号风机', '1号和2号风机'], size=n_samples)
# 随机生成时间数据
time = np.random.uniform(0, 10, size=n_samples)
# 生成随机物料温度数据(这里只是示例,实际应用中需要根据模型生成)
temperature = 20 + 5 * time + np.random.normal(0, 2, size=n_samples)
In [26]:
# 将分类变量编码为数值
encoder = OneHotEncoder(sparse_output=False)
materials_encoded = encoder.fit_transform(materials.reshape(-1, 1))
cooling_methods_encoded = encoder.fit_transform(cooling_methods.reshape(-1, 1))
In [27]:
# 合并特征
X = np.column_stack((materials_encoded, cooling_methods_encoded, time))
In [6]:
# 拟合数据和解释模型(六种方法)
# 1. 线性回归
linear_model = LinearRegression()
linear_model.fit(X, temperature)
print("Linear Regression Coefficients:")
print(linear_model.coef_)
print("Linear Regression Intercept:", linear_model.intercept_)
# 2. 多项式回归
from sklearn.preprocessing import PolynomialFeatures
poly_features = PolynomialFeatures(degree=2)
X_poly = poly_features.fit_transform(X)
poly_model = LinearRegression()
poly_model.fit(X_poly, temperature)
print("Polynomial Regression Coefficients:")
print(poly_model.coef_)
print("Polynomial Regression Intercept:", poly_model.intercept_)
# 3. 决策树回归
tree_model = DecisionTreeRegressor()
tree_model.fit(X, temperature)
print("Decision Tree Feature Importances:")
print(tree_model.feature_importances_)
# 4. 随机森林回归
rf_model = RandomForestRegressor()
rf_model.fit(X, temperature)
print("Random Forest Feature Importances:")
print(rf_model.feature_importances_)
# 5. 支持向量机回归
svm_model = SVR()
svm_model.fit(X, temperature)
print("Support Vector Machine Coefficients:")
# print(svm_model.coef_) # 错误 svm_model.coef_ AttributeError: coef_ is only available when using a linear kernel svm_model.fit(X, temperature)
print(svm_model.support_vectors_)
print(svm_model.dual_coef_)
# 6. 神经网络
nn_model = MLPRegressor(hidden_layer_sizes=(10, 10), max_iter=1000)
nn_model.fit(X, temperature)
# 神经网络没有直接解释性参数,通常使用模型的预测能力
# 这里只是示例代码,实际应用中可能需要更复杂的网络结构
Linear Regression Coefficients:
[ 0.09738255 0.09813695 -0.09434166 -0.10117784 -0.04251578 0.01205836
-0.02354086 0.05399828 4.97497016]
Linear Regression Intercept: 20.07918549347334
Polynomial Regression Coefficients:
[-3.55663651e+07 -2.46440447e+11 -2.70021102e+11 -7.88059614e+10
-8.11398359e+10 -2.91102428e+10 -1.20136820e+10 1.13551818e+10
-1.73739022e+10 7.09367018e+10 5.24750931e+10 2.68453159e+07
1.29473196e+07 -2.17156558e+07 2.86211077e+10 1.16705735e+10
8.76292665e+09 1.67218805e+10 -1.05961698e+10 7.10109434e+10
7.38255522e+00 -4.82728086e-01 3.36659124e+10 1.67153782e+10
1.38077314e+10 2.17666852e+10 -1.05961698e+10 -8.16416041e+10
0.00000000e+00 -4.89668096e+09 -2.18472152e+10 -2.47548620e+10
-1.67959082e+10 -1.05961698e+10 -7.58178180e+10 -8.38659259e+09
-2.53371268e+10 -2.82447736e+10 -2.02858198e+10 -1.05961698e+10
-8.29216045e+09 0.00000000e+00 0.00000000e+00 0.00000000e+00
-6.03405320e+10 -8.43818705e+09 0.00000000e+00 0.00000000e+00
-6.03405320e+10 -2.88994040e+10 0.00000000e+00 -6.03405320e+10
-8.12927384e+09 -6.03405320e+10 -2.38037109e-03]
Polynomial Regression Intercept: 202782216111.90613
Decision Tree Feature Importances:
[4.81361663e-04 7.29456493e-04 1.01830280e-03 7.48103052e-04
6.76530127e-04 5.90113556e-04 6.57851309e-04 7.32742416e-04
9.94365539e-01]
Random Forest Feature Importances:
[6.87663836e-04 7.56478794e-04 8.67089363e-04 8.31802663e-04
7.98470513e-04 7.20358675e-04 5.81217235e-04 7.52520218e-04
9.94004399e-01]
Out[6]:
MLPRegressor
MLPRegressor(hidden_layer_sizes=(10, 10), max_iter=1000)
In [44]:
# 应用模型进行预测
# 例如,给定新的物料类型、冷却方式和时间,可以使用训练好的模型进行温度预测
# materials = np.random.choice(['宽料', '窄料', '厚料', '小卷料'], size=n_samples)
# cooling_methods = np.random.choice(['自然冷却', '1号风机', '2号风机', '1号和2号风机'], size=n_samples)
new_material = np.array([[0, 1, 0, 0]]) # 如何根据指定元素在分类列表中的位置生成对应编码 例如 指定4种物料类型里选择宽料
new_cooling_method = np.array([[0, 0, 0, 1]])
new_time = np.array([5])
# 将分类变量编码为数值
# new_material_encoded = encoder.transform(np.array([[new_material]]))
# new_cooling_method_encoded = encoder.transform(np.array([[new_cooling_method]]))
# transform 应为fit_transform或fit().transfomr()
# new_material_encoded = encoder.fit_transform(np.array([[new_material]]))
# new_cooling_method_encoded = encoder.fit_transform(np.array([[new_cooling_method]]))
# new_material_encoded = encoder.fit_transform(new_material.reshape(-1, 1))
# new_cooling_method_encoded = encoder.fit_transform(new_cooling_method.reshape(-1, 1)) # 已经手动转码过了
# new_input = np.column_stack((new_material_encoded, new_cooling_method_encoded, new_time))
# ValueError: X has 3 features, but * is expecting 9 features as input.
print(materials_encoded.shape, cooling_methods_encoded.shape, time.shape, X.shape)
new_input = np.column_stack((new_material, new_cooling_method, new_time))
print(new_material.shape, new_cooling_method.shape, new_time.shape, new_input.shape)
(1000, 4) (1000, 4) (1000,) (1000, 9)
(1, 4) (1, 4) (1,) (1, 9)
In [45]:
# 使用各个模型进行温度预测
linear_prediction = linear_model.predict(new_input)
poly_prediction = poly_model.predict(poly_features.transform(new_input))
tree_prediction = tree_model.predict(new_input)
rf_prediction = rf_model.predict(new_input)
svm_prediction = svm_model.predict(new_input)
nn_prediction = nn_model.predict(new_input)
# 输出预测结果
print("Linear Regression Prediction:", linear_prediction)
print("Polynomial Regression Prediction:", poly_prediction)
print("Decision Tree Regression Prediction:", tree_prediction)
print("Random Forest Regression Prediction:", rf_prediction)
print("Support Vector Machine Regression Prediction:", svm_prediction)
print("Neural Network Regression Prediction:", nn_prediction)
Linear Regression Prediction: [45.10617151]
Polynomial Regression Prediction: [45.1194458]
Decision Tree Regression Prediction: [43.32600977]
Random Forest Regression Prediction: [42.97839963]
Support Vector Machine Regression Prediction: [45.25602304]
Neural Network Regression Prediction: [45.09695762]
In [ ]: