import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.cluster import KMeans
data = pd.read_csv("./data.csv",encoding="utf-8")
data.head()
x1 = data.loc[:,"V1"]
x2 = data.loc[:,"V2"]
x3 = data.drop(['labels'],axis=1)
y = data.loc[:,"labels"]
#建立KMeans模型并训练
KM = KMeans(n_clusters=3,init='random',random_state= 0)
#模型训练
KM.fit(x3)
#查看聚类中心
centers = KM.cluster_centers_
print(centers)
#无监督聚类的结果预测
y_predict = KM.predict(x3)
print(y_predict)
print(pd.value_counts(y_predict))#统计元素个数
print(pd.value_counts(y))
#结果矫正
y_corrected = []
for i in y_predict:
if i==0:
y_corrected.append(1)
elif i==1:
y_corrected.append(2)
elif i==2:
y_corrected.append(0)
print(pd.value_counts(y_corrected))
#准确率计算
accuray = accuracy_score(y,y_corrected)
print(accuray)
y_corrected = np.array(y_corrected)
flg1=plt.figure(figsize=(12,8))
#真实结果可视化
plt.subplot(1,3,1,)
plt.scatter(x1[y==0],x2[y==0],label='class 0')
plt.scatter(x1[y==1],x2[y==1],label='class 1')
plt.scatter(x1[y==2],x2[y==2],label='class 2')
plt.legend(loc='upper left')#显示数据label内容
plt.title("true")
plt.xlabel('x')
plt.ylabel('y')
#训练结果的展示
plt.subplot(1,3,2,)
plt.scatter(x1[y_corrected==0],x2[y_corrected==0],label='class 0')
plt.scatter(x1[y_corrected==1],x2[y_corrected==1],label='class 1')
plt.scatter(x1[y_corrected==2],x2[y_corrected==2],label='class 2')
plt.legend(loc='upper left')
plt.title("train_date")
plt.xlabel('x')
plt.ylabel('y')
#数据可视化
plt.subplot(1,3,3,)
plt.scatter(x1,x2)
# #中心点的可视化
plt.scatter(centers[:,0],centers[:,1],100,marker='x',c = 'r')#c表示颜色设置
plt.title("original_data")
plt.legend(loc='upper left')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
结果展示: