1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
| ''' 手肘法: 判断标准——明显的拐点,曲率最大的地方 ''' from sklearn.cluster import KMeans
model = KMeans(n_clusters = 2) model.fit(Xstd)
model.inertia_
result_list = []
for i in range(2,12): model = KMeans(n_clusters = i,random_state = 1) model.fit(Xstd) result_list.append(model.inertia_)
import matplotlib.pyplot as plt xs = list(range(2,12)) plt.plot(xs, result_list) plt.show()
''' 轮廓系数: 轮廓系数越大越好。 ''' from sklearn import metrics
for i in range(2,12): model = KMeans(n_clusters = i,random_state = 1) model.fit(Xstd) labels = model.labels_ res = metrics.silhouette_score(Xstd,labels) print(i,':',res)
''' 展示一下结果(K=3) '''
model = KMeans(n_clusters = 3,random_state = 1)
model.fit(Xstd)
labels = model.labels_
iris['labels'] = labels
list(set(list(iris['Species'])))
for label in list(set(list(iris['Species']))): df = iris[iris['Species'] == label] plt.plot(df['Sepal.Length'],df['Sepal.Width'],'o',label = label) plt.legend() plt.show()
for label in list(set(list(iris['labels']))): df = iris[iris['labels'] == label] plt.plot(df['Sepal.Length'],df['Sepal.Width'],'o',label = label) plt.legend() plt.show()
|