# -*- coding: utf-8 -*-
import numpy as np
from sklearn.cluster import KMeans #导入Kmeans算法包
from sklearn.metrics import silhouette_score #计算轮廓系数
import matplotlib.pyplot as plt #画图工具
plt.subplot(3,2,1)
x1=np.array([1,2,3,1,5,6,5,5,6,7,8,9,7,9])
x2=np.array([1,3,2,2,8,6,7,6,7,1,2,1,1,3])
x=np.array(zip(x1,x2)).reshape(len(x1),2)#原始数据
plt.xlim([0,10])
plt.ylim([0,10])
plt.title('Instances')
plt.scatter(x1,x2)
#在1号子图做出原始数据点阵的分布
colors=['b','g','r','c','m','y','k','b']
markers=['o','s','D','v','^','p','*','+']
clusters=[2,3,4,5,8]
subplot_counter=1
sc_scores=[]
for t in clusters:
subplot_counter+=1
plt.subplot(3,2,subplot_counter)
kmeans_model=KMeans(n_clusters=t).fit(x)
for i,l in enumerate(kmeans_model.labels_):
plt.plot(x1[i],x2[i],color=colors[l],marker=markers[l],ls='None')
plt.xlim([0,10])
plt.ylim([0,10])
sc_score=silhouette_score(x,kmeans_model.labels_,metric='euclidean')
sc_scores.append(sc_score)
plt.title('K=%s,silhouette coefficient=%0.03f'%(t,sc_score))
#绘制轮廓系数与不同类簇数量的直观显示图
plt.figure()
#绘制轮廓系数与不同类簇数量的直观显示图
plt.plot(clusters,sc_scores,'*-')
plt.xlabel('Numbers of clusters')
plt.ylabel('Silhouette Coefficient score')
plt.show()
联系客服