├── data
├── hh.png
└── iris.data
├── README.md
├── FCM.py
└── K_means.py
/data/hh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Luxlios/Clustering/HEAD/data/hh.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### Clustering
2 | 手写K均值K_means和模糊C均值FCM算法对Iris鸢尾花数据集聚类以及图像聚类分割
3 |
4 | 数据集:[Iris](http://archive.ics.uci.edu/ml/datasets/Iris)
5 |
6 | ### Requirement
7 | `pandas`
8 | `numpy`
9 | `cv2`
10 | `matplotlib`
11 |
12 | ### Content
13 | - [算法流程图](#算法流程图)
14 | - [结果展示](#结果展示)
15 | - [K_means](#K_mean算法在Iris数据集聚类和图片聚类分割结果)
16 | - [FCM](#FCM算法在Iris数据集聚类结果)
17 |
18 | ### 算法流程图
19 |
20 |

21 |
22 | 左边为K均值K_means算法流程图,右边为模糊C均值FCM算法流程图
23 |
24 | ### 结果展示
25 | #### K_mean算法在Iris数据集聚类和图片聚类分割结果
26 | 鸢尾花数据集有四维特征,这里只展示前三维。
27 |
28 |

29 |
30 | 原图片如下。
31 |
32 |

33 |
34 | 聚类分割结果如下,簇数分别为4、6、8。
35 |
36 |

37 |
38 |
39 | #### FCM算法在Iris数据集聚类结果
40 | 鸢尾花数据集有四维特征,这里只展示前三维。同时,由于FCM算法得到的是隶属度矩阵,为了展示,取每个样本隶属度最大的作为其聚类结果。
41 |
42 |

43 |
44 |
--------------------------------------------------------------------------------
/FCM.py:
--------------------------------------------------------------------------------
1 | # -*- coding = utf-8 -*-
2 | # @Time : 2021/11/21 13:33
3 | # @Author : Luxlios
4 | # @File : FCM.py
5 | # @Software : PyCharm
6 |
7 | import pandas as pd
8 | import numpy as np
9 | import matplotlib.pyplot as plt
10 |
11 | # 数据标准化函数
12 | def generalization(data):
13 | num = data.shape[1] # 读取列数,对每一列标准化
14 | _data = np.zeros([num,1], np.float)
15 | for i in range(num):
16 | _data = data[:,i]
17 | _range = np.max(_data)-np.min(_data)
18 | data[:,i] = (_data-np.min(_data))/_range
19 | return data
20 |
21 |
22 | # FCM聚类算法(软聚类)
23 | # 采用初始化隶属度矩阵,计算聚类中心,计算代价行数后再更新隶属度矩阵的策略
24 | def FCM(data, K, alpha):
25 | # 输入数据data和簇数K和柔性参数alpha
26 | num = data.shape[0]
27 |
28 | # 每一行均匀分布并且和为1,对隶属度矩阵u初始化
29 | u = np.random.random([num, K])
30 | # np.sum后化作行array向量,因此需要添加一个低位轴转化为列array
31 | u = np.divide(u, np.sum(u, axis=1)[:, np.newaxis])
32 |
33 | change = True # 隶属度矩阵与阈值大小关系的flag
34 | while change:
35 | # 目标函数(总的欧式距离点乘隶属矩阵,所有元素之和)最小,
36 | # 用lagrange乘数法得到隶属矩阵和聚类中心的更新公式
37 | center = np.divide(np.dot((u ** alpha).T, data), np.sum(u ** alpha, axis=0)[:, np.newaxis]) # dot为矩阵乘法
38 |
39 | change = False
40 | # 计算样本与聚类中心的距离
41 | distance = np.zeros([num, K])
42 | for i in range(num):
43 | for j in range(K):
44 | distance[i, j] = np.linalg.norm(data[i, :] - center[j, :], ord=2) # L2距离
45 |
46 | # 更新隶属度矩阵
47 | u_new = np.zeros([num, K])
48 | for i in range(num):
49 | for j in range(K):
50 | u_new[i, j] = 1. / np.sum((distance[i, j] / distance[i, :]) ** (2 / (alpha - 1)))
51 |
52 | # 判断隶属度矩阵变化与阈值的比较
53 | if np.sum(abs(u_new - u)) > 10:
54 | change = True
55 | u = u_new
56 | # 返回聚类中心和样本隶属度矩阵
57 | return center, u_new
58 |
59 | # 展示聚类结果的函数
60 | # iris数据集有四维,只选用前三维数据进行展示
61 | def show(data, center, _class, k, name):
62 | # 输入数据,聚类中心,分类结果,簇数
63 | color = ['r', 'g', 'b', 'c', 'y', 'm']
64 | num = data.shape[0]
65 | picture = plt.subplot(111, projection='3d')
66 | for i in range(k):
67 | picture.scatter(center[i][0], center[i][1], center[i][2], c=color[i], marker='x')
68 | for i in range(num):
69 | picture.scatter(data[i][0], data[i][1], data[i][2], c=color[_class[i]], marker='.')
70 | plt.title(name)
71 | plt.show()
72 |
73 | if __name__ == '__main__':
74 | # IRIS数据集读取
75 | data = pd.read_csv('.\data\iris.data', header=None) # 有header会把第一行数据当列名
76 | data = np.array(data)
77 | x = data[:, [0, 1, 2, 3]] # 数据
78 | y = data[:, 4] # 标签
79 | x = generalization(x) # 标准化
80 | center2, u = FCM(x, 3, 2)
81 | # 选取隶属度最大的作为种类,方便后续评价效果
82 | class2 = np.argmax(u, axis=1)
83 | show(x, center2, class2, 3, 'FCM')
84 |
--------------------------------------------------------------------------------
/K_means.py:
--------------------------------------------------------------------------------
1 | # -*- coding = utf-8 -*-
2 | # @Time : 2021/11/21 13:26
3 | # @Author : Luxlios
4 | # @File : K_means.py
5 | # @Software : PyCharm
6 |
7 | import pandas as pd
8 | import numpy as np
9 | import cv2
10 | import matplotlib.pyplot as plt
11 |
12 | # 数据标准化函数
13 | def generalization(data):
14 | num = data.shape[1] # 读取列数,对每一列标准化
15 | _data = np.zeros([num,1], np.float)
16 | for i in range(num):
17 | _data = data[:,i]
18 | _range = np.max(_data)-np.min(_data)
19 | data[:,i] = (_data-np.min(_data))/_range
20 | return data
21 |
22 | # Kmeans聚类算法(硬聚类)
23 | def K_means(data, K):
24 | # 输入数据data和簇数K
25 | num = data.shape[0]
26 | _class = np.zeros([num], np.int) # 保存每一个元素的分类
27 |
28 | # 初始化中心点,随机选取k个索引,得到k个初始中心点
29 | rand = np.random.random(size=K) # np.random.randint(size)
30 | rand = rand * num
31 | rand = np.floor(rand).astype(int)
32 | center = data[rand]
33 | # print(rand)
34 |
35 | # 主体循环,通过初始中心对于所有数据点分类
36 | # 对于这些分类,每一类计算每一个维度的均值,得到新的中心点
37 | # 循环下去直到中心点不发生变化
38 | change = True # 中心点是否变化的flag
39 | while change:
40 | distance = np.zeros([num, K])
41 | for i in range(num):
42 | for j in range(K):
43 | # L2距离,得到样本与所有中心的距离
44 | distance[i, j] = np.sqrt(np.sum((data[i, :] - center[j, :]) ** 2))
45 |
46 | for i in range(num):
47 | _class[i] = np.argmin(distance[i, :]) # 得到最近距离的索引
48 |
49 | change = False
50 | for i in range(K):
51 | cluster = data[_class == i] # 得到分类索引为i的所有数据
52 | center_new = np.mean(cluster, axis=0)
53 | if np.sum(np.abs(center[i] - center_new), axis=0) > 10:
54 | center[i] = center_new
55 | change = True
56 | # 返回聚类中心和样本类别
57 | return center, _class
58 |
59 |
60 | # 展示聚类结果的函数
61 | # iris数据集有四维,只选用前三维数据进行展示
62 | def show(data, center, _class, k, name):
63 | # 输入数据,聚类中心,分类结果,簇数
64 | plt.figure()
65 | color = ['r', 'g', 'b', 'c', 'y', 'm']
66 | num = data.shape[0]
67 | picture = plt.subplot(111, projection='3d')
68 | for i in range(k):
69 | picture.scatter(center[i][0], center[i][1], center[i][2], c=color[i], marker='x')
70 | for i in range(num):
71 | picture.scatter(data[i][0], data[i][1], data[i][2], c=color[_class[i]], marker='.')
72 | plt.title(name)
73 |
74 | if __name__ == '__main__':
75 | # Iris数据集聚类
76 | data = pd.read_csv('.\data\iris.data', header=None) # 有header会把第一行数据当列名
77 | data = np.array(data)
78 | x = data[:, [0, 1, 2, 3]] # 数据
79 | y = data[:, 4] # 标签
80 | x = generalization(x) # 标准化
81 | center1, class1 = K_means(x, 3)
82 | show(x, center1, class1, 3, 'K_means')
83 |
84 | # 图像聚类分割
85 | img = cv2.imread('.\data\hh.png')
86 | _img = img.copy()
87 | _img = _img[:, :, ::-1]
88 | plt.figure()
89 | plt.subplot(111), plt.imshow(_img), plt.title('hh'), plt.axis('off')
90 |
91 | # 将图像像素矩阵读成一列
92 | pixel = img.reshape(-1, 3)
93 | pixel = np.float32(pixel)
94 | # K_means聚类
95 | center3, class3 = K_means(pixel, 4)
96 | center4, class4 = K_means(pixel, 6)
97 | center5, class5 = K_means(pixel, 8)
98 |
99 | # 聚类结果展示
100 | k_picture3 = class3.reshape(img.shape[0], img.shape[1])
101 | k_picture4 = class4.reshape(img.shape[0], img.shape[1])
102 | k_picture5 = class5.reshape(img.shape[0], img.shape[1])
103 | plt.figure()
104 | plt.subplot(131), plt.imshow(k_picture3), plt.title('k=4'), plt.axis('off')
105 | plt.subplot(132), plt.imshow(k_picture4), plt.title('k=6'), plt.axis('off')
106 | plt.subplot(133), plt.imshow(k_picture5), plt.title('k=8'), plt.axis('off')
107 | plt.show()
108 |
--------------------------------------------------------------------------------
/data/iris.data:
--------------------------------------------------------------------------------
1 | 5.1,3.5,1.4,0.2,Iris-setosa
2 | 4.9,3.0,1.4,0.2,Iris-setosa
3 | 4.7,3.2,1.3,0.2,Iris-setosa
4 | 4.6,3.1,1.5,0.2,Iris-setosa
5 | 5.0,3.6,1.4,0.2,Iris-setosa
6 | 5.4,3.9,1.7,0.4,Iris-setosa
7 | 4.6,3.4,1.4,0.3,Iris-setosa
8 | 5.0,3.4,1.5,0.2,Iris-setosa
9 | 4.4,2.9,1.4,0.2,Iris-setosa
10 | 4.9,3.1,1.5,0.1,Iris-setosa
11 | 5.4,3.7,1.5,0.2,Iris-setosa
12 | 4.8,3.4,1.6,0.2,Iris-setosa
13 | 4.8,3.0,1.4,0.1,Iris-setosa
14 | 4.3,3.0,1.1,0.1,Iris-setosa
15 | 5.8,4.0,1.2,0.2,Iris-setosa
16 | 5.7,4.4,1.5,0.4,Iris-setosa
17 | 5.4,3.9,1.3,0.4,Iris-setosa
18 | 5.1,3.5,1.4,0.3,Iris-setosa
19 | 5.7,3.8,1.7,0.3,Iris-setosa
20 | 5.1,3.8,1.5,0.3,Iris-setosa
21 | 5.4,3.4,1.7,0.2,Iris-setosa
22 | 5.1,3.7,1.5,0.4,Iris-setosa
23 | 4.6,3.6,1.0,0.2,Iris-setosa
24 | 5.1,3.3,1.7,0.5,Iris-setosa
25 | 4.8,3.4,1.9,0.2,Iris-setosa
26 | 5.0,3.0,1.6,0.2,Iris-setosa
27 | 5.0,3.4,1.6,0.4,Iris-setosa
28 | 5.2,3.5,1.5,0.2,Iris-setosa
29 | 5.2,3.4,1.4,0.2,Iris-setosa
30 | 4.7,3.2,1.6,0.2,Iris-setosa
31 | 4.8,3.1,1.6,0.2,Iris-setosa
32 | 5.4,3.4,1.5,0.4,Iris-setosa
33 | 5.2,4.1,1.5,0.1,Iris-setosa
34 | 5.5,4.2,1.4,0.2,Iris-setosa
35 | 4.9,3.1,1.5,0.1,Iris-setosa
36 | 5.0,3.2,1.2,0.2,Iris-setosa
37 | 5.5,3.5,1.3,0.2,Iris-setosa
38 | 4.9,3.1,1.5,0.1,Iris-setosa
39 | 4.4,3.0,1.3,0.2,Iris-setosa
40 | 5.1,3.4,1.5,0.2,Iris-setosa
41 | 5.0,3.5,1.3,0.3,Iris-setosa
42 | 4.5,2.3,1.3,0.3,Iris-setosa
43 | 4.4,3.2,1.3,0.2,Iris-setosa
44 | 5.0,3.5,1.6,0.6,Iris-setosa
45 | 5.1,3.8,1.9,0.4,Iris-setosa
46 | 4.8,3.0,1.4,0.3,Iris-setosa
47 | 5.1,3.8,1.6,0.2,Iris-setosa
48 | 4.6,3.2,1.4,0.2,Iris-setosa
49 | 5.3,3.7,1.5,0.2,Iris-setosa
50 | 5.0,3.3,1.4,0.2,Iris-setosa
51 | 7.0,3.2,4.7,1.4,Iris-versicolor
52 | 6.4,3.2,4.5,1.5,Iris-versicolor
53 | 6.9,3.1,4.9,1.5,Iris-versicolor
54 | 5.5,2.3,4.0,1.3,Iris-versicolor
55 | 6.5,2.8,4.6,1.5,Iris-versicolor
56 | 5.7,2.8,4.5,1.3,Iris-versicolor
57 | 6.3,3.3,4.7,1.6,Iris-versicolor
58 | 4.9,2.4,3.3,1.0,Iris-versicolor
59 | 6.6,2.9,4.6,1.3,Iris-versicolor
60 | 5.2,2.7,3.9,1.4,Iris-versicolor
61 | 5.0,2.0,3.5,1.0,Iris-versicolor
62 | 5.9,3.0,4.2,1.5,Iris-versicolor
63 | 6.0,2.2,4.0,1.0,Iris-versicolor
64 | 6.1,2.9,4.7,1.4,Iris-versicolor
65 | 5.6,2.9,3.6,1.3,Iris-versicolor
66 | 6.7,3.1,4.4,1.4,Iris-versicolor
67 | 5.6,3.0,4.5,1.5,Iris-versicolor
68 | 5.8,2.7,4.1,1.0,Iris-versicolor
69 | 6.2,2.2,4.5,1.5,Iris-versicolor
70 | 5.6,2.5,3.9,1.1,Iris-versicolor
71 | 5.9,3.2,4.8,1.8,Iris-versicolor
72 | 6.1,2.8,4.0,1.3,Iris-versicolor
73 | 6.3,2.5,4.9,1.5,Iris-versicolor
74 | 6.1,2.8,4.7,1.2,Iris-versicolor
75 | 6.4,2.9,4.3,1.3,Iris-versicolor
76 | 6.6,3.0,4.4,1.4,Iris-versicolor
77 | 6.8,2.8,4.8,1.4,Iris-versicolor
78 | 6.7,3.0,5.0,1.7,Iris-versicolor
79 | 6.0,2.9,4.5,1.5,Iris-versicolor
80 | 5.7,2.6,3.5,1.0,Iris-versicolor
81 | 5.5,2.4,3.8,1.1,Iris-versicolor
82 | 5.5,2.4,3.7,1.0,Iris-versicolor
83 | 5.8,2.7,3.9,1.2,Iris-versicolor
84 | 6.0,2.7,5.1,1.6,Iris-versicolor
85 | 5.4,3.0,4.5,1.5,Iris-versicolor
86 | 6.0,3.4,4.5,1.6,Iris-versicolor
87 | 6.7,3.1,4.7,1.5,Iris-versicolor
88 | 6.3,2.3,4.4,1.3,Iris-versicolor
89 | 5.6,3.0,4.1,1.3,Iris-versicolor
90 | 5.5,2.5,4.0,1.3,Iris-versicolor
91 | 5.5,2.6,4.4,1.2,Iris-versicolor
92 | 6.1,3.0,4.6,1.4,Iris-versicolor
93 | 5.8,2.6,4.0,1.2,Iris-versicolor
94 | 5.0,2.3,3.3,1.0,Iris-versicolor
95 | 5.6,2.7,4.2,1.3,Iris-versicolor
96 | 5.7,3.0,4.2,1.2,Iris-versicolor
97 | 5.7,2.9,4.2,1.3,Iris-versicolor
98 | 6.2,2.9,4.3,1.3,Iris-versicolor
99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica
151 |
152 |
--------------------------------------------------------------------------------