├── FCM.py ├── K_means.py ├── README.md └── data ├── hh.png └── iris.data /FCM.py: -------------------------------------------------------------------------------- 1 | # -*- coding = utf-8 -*- 2 | # @Time : 2021/11/21 13:33 3 | # @Author : Luxlios 4 | # @File : FCM.py 5 | # @Software : PyCharm 6 | 7 | import pandas as pd 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | # 数据标准化函数 12 | def generalization(data): 13 | num = data.shape[1] # 读取列数,对每一列标准化 14 | _data = np.zeros([num,1], np.float) 15 | for i in range(num): 16 | _data = data[:,i] 17 | _range = np.max(_data)-np.min(_data) 18 | data[:,i] = (_data-np.min(_data))/_range 19 | return data 20 | 21 | 22 | # FCM聚类算法(软聚类) 23 | # 采用初始化隶属度矩阵,计算聚类中心,计算代价行数后再更新隶属度矩阵的策略 24 | def FCM(data, K, alpha): 25 | # 输入数据data和簇数K和柔性参数alpha 26 | num = data.shape[0] 27 | 28 | # 每一行均匀分布并且和为1,对隶属度矩阵u初始化 29 | u = np.random.random([num, K]) 30 | # np.sum后化作行array向量,因此需要添加一个低位轴转化为列array 31 | u = np.divide(u, np.sum(u, axis=1)[:, np.newaxis]) 32 | 33 | change = True # 隶属度矩阵与阈值大小关系的flag 34 | while change: 35 | # 目标函数(总的欧式距离点乘隶属矩阵,所有元素之和)最小, 36 | # 用lagrange乘数法得到隶属矩阵和聚类中心的更新公式 37 | center = np.divide(np.dot((u ** alpha).T, data), np.sum(u ** alpha, axis=0)[:, np.newaxis]) # dot为矩阵乘法 38 | 39 | change = False 40 | # 计算样本与聚类中心的距离 41 | distance = np.zeros([num, K]) 42 | for i in range(num): 43 | for j in range(K): 44 | distance[i, j] = np.linalg.norm(data[i, :] - center[j, :], ord=2) # L2距离 45 | 46 | # 更新隶属度矩阵 47 | u_new = np.zeros([num, K]) 48 | for i in range(num): 49 | for j in range(K): 50 | u_new[i, j] = 1. / np.sum((distance[i, j] / distance[i, :]) ** (2 / (alpha - 1))) 51 | 52 | # 判断隶属度矩阵变化与阈值的比较 53 | if np.sum(abs(u_new - u)) > 10: 54 | change = True 55 | u = u_new 56 | # 返回聚类中心和样本隶属度矩阵 57 | return center, u_new 58 | 59 | # 展示聚类结果的函数 60 | # iris数据集有四维,只选用前三维数据进行展示 61 | def show(data, center, _class, k, name): 62 | # 输入数据,聚类中心,分类结果,簇数 63 | color = ['r', 'g', 'b', 'c', 'y', 'm'] 64 | num = data.shape[0] 65 | picture = plt.subplot(111, projection='3d') 66 | for i in range(k): 67 | picture.scatter(center[i][0], center[i][1], center[i][2], c=color[i], marker='x') 68 | for i in range(num): 69 | picture.scatter(data[i][0], data[i][1], data[i][2], c=color[_class[i]], marker='.') 70 | plt.title(name) 71 | plt.show() 72 | 73 | if __name__ == '__main__': 74 | # IRIS数据集读取 75 | data = pd.read_csv('.\data\iris.data', header=None) # 有header会把第一行数据当列名 76 | data = np.array(data) 77 | x = data[:, [0, 1, 2, 3]] # 数据 78 | y = data[:, 4] # 标签 79 | x = generalization(x) # 标准化 80 | center2, u = FCM(x, 3, 2) 81 | # 选取隶属度最大的作为种类,方便后续评价效果 82 | class2 = np.argmax(u, axis=1) 83 | show(x, center2, class2, 3, 'FCM') 84 | -------------------------------------------------------------------------------- /K_means.py: -------------------------------------------------------------------------------- 1 | # -*- coding = utf-8 -*- 2 | # @Time : 2021/11/21 13:26 3 | # @Author : Luxlios 4 | # @File : K_means.py 5 | # @Software : PyCharm 6 | 7 | import pandas as pd 8 | import numpy as np 9 | import cv2 10 | import matplotlib.pyplot as plt 11 | 12 | # 数据标准化函数 13 | def generalization(data): 14 | num = data.shape[1] # 读取列数,对每一列标准化 15 | _data = np.zeros([num,1], np.float) 16 | for i in range(num): 17 | _data = data[:,i] 18 | _range = np.max(_data)-np.min(_data) 19 | data[:,i] = (_data-np.min(_data))/_range 20 | return data 21 | 22 | # Kmeans聚类算法(硬聚类) 23 | def K_means(data, K): 24 | # 输入数据data和簇数K 25 | num = data.shape[0] 26 | _class = np.zeros([num], np.int) # 保存每一个元素的分类 27 | 28 | # 初始化中心点,随机选取k个索引,得到k个初始中心点 29 | rand = np.random.random(size=K) # np.random.randint(size) 30 | rand = rand * num 31 | rand = np.floor(rand).astype(int) 32 | center = data[rand] 33 | # print(rand) 34 | 35 | # 主体循环,通过初始中心对于所有数据点分类 36 | # 对于这些分类,每一类计算每一个维度的均值,得到新的中心点 37 | # 循环下去直到中心点不发生变化 38 | change = True # 中心点是否变化的flag 39 | while change: 40 | distance = np.zeros([num, K]) 41 | for i in range(num): 42 | for j in range(K): 43 | # L2距离,得到样本与所有中心的距离 44 | distance[i, j] = np.sqrt(np.sum((data[i, :] - center[j, :]) ** 2)) 45 | 46 | for i in range(num): 47 | _class[i] = np.argmin(distance[i, :]) # 得到最近距离的索引 48 | 49 | change = False 50 | for i in range(K): 51 | cluster = data[_class == i] # 得到分类索引为i的所有数据 52 | center_new = np.mean(cluster, axis=0) 53 | if np.sum(np.abs(center[i] - center_new), axis=0) > 10: 54 | center[i] = center_new 55 | change = True 56 | # 返回聚类中心和样本类别 57 | return center, _class 58 | 59 | 60 | # 展示聚类结果的函数 61 | # iris数据集有四维,只选用前三维数据进行展示 62 | def show(data, center, _class, k, name): 63 | # 输入数据,聚类中心,分类结果,簇数 64 | plt.figure() 65 | color = ['r', 'g', 'b', 'c', 'y', 'm'] 66 | num = data.shape[0] 67 | picture = plt.subplot(111, projection='3d') 68 | for i in range(k): 69 | picture.scatter(center[i][0], center[i][1], center[i][2], c=color[i], marker='x') 70 | for i in range(num): 71 | picture.scatter(data[i][0], data[i][1], data[i][2], c=color[_class[i]], marker='.') 72 | plt.title(name) 73 | 74 | if __name__ == '__main__': 75 | # Iris数据集聚类 76 | data = pd.read_csv('.\data\iris.data', header=None) # 有header会把第一行数据当列名 77 | data = np.array(data) 78 | x = data[:, [0, 1, 2, 3]] # 数据 79 | y = data[:, 4] # 标签 80 | x = generalization(x) # 标准化 81 | center1, class1 = K_means(x, 3) 82 | show(x, center1, class1, 3, 'K_means') 83 | 84 | # 图像聚类分割 85 | img = cv2.imread('.\data\hh.png') 86 | _img = img.copy() 87 | _img = _img[:, :, ::-1] 88 | plt.figure() 89 | plt.subplot(111), plt.imshow(_img), plt.title('hh'), plt.axis('off') 90 | 91 | # 将图像像素矩阵读成一列 92 | pixel = img.reshape(-1, 3) 93 | pixel = np.float32(pixel) 94 | # K_means聚类 95 | center3, class3 = K_means(pixel, 4) 96 | center4, class4 = K_means(pixel, 6) 97 | center5, class5 = K_means(pixel, 8) 98 | 99 | # 聚类结果展示 100 | k_picture3 = class3.reshape(img.shape[0], img.shape[1]) 101 | k_picture4 = class4.reshape(img.shape[0], img.shape[1]) 102 | k_picture5 = class5.reshape(img.shape[0], img.shape[1]) 103 | plt.figure() 104 | plt.subplot(131), plt.imshow(k_picture3), plt.title('k=4'), plt.axis('off') 105 | plt.subplot(132), plt.imshow(k_picture4), plt.title('k=6'), plt.axis('off') 106 | plt.subplot(133), plt.imshow(k_picture5), plt.title('k=8'), plt.axis('off') 107 | plt.show() 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Clustering 2 | 手写K均值K_means和模糊C均值FCM算法对Iris鸢尾花数据集聚类以及图像聚类分割 3 | 4 | 数据集:[Iris](http://archive.ics.uci.edu/ml/datasets/Iris) 5 | 6 | ### Requirement 7 | `pandas` 8 | `numpy` 9 | `cv2` 10 | `matplotlib` 11 | 12 | ### Content 13 | - [算法流程图](#算法流程图) 14 | - [结果展示](#结果展示) 15 | - [K_means](#K_mean算法在Iris数据集聚类和图片聚类分割结果) 16 | - [FCM](#FCM算法在Iris数据集聚类结果) 17 | 18 | ### 算法流程图 19 |
20 | 21 |
22 | 左边为K均值K_means算法流程图,右边为模糊C均值FCM算法流程图 23 | 24 | ### 结果展示 25 | #### K_mean算法在Iris数据集聚类和图片聚类分割结果 26 | 鸢尾花数据集有四维特征,这里只展示前三维。 27 |
28 | 29 |
30 | 原图片如下。 31 |
32 | 33 |
34 | 聚类分割结果如下,簇数分别为4、6、8。 35 |
36 | 37 |
38 | 39 | #### FCM算法在Iris数据集聚类结果 40 | 鸢尾花数据集有四维特征,这里只展示前三维。同时,由于FCM算法得到的是隶属度矩阵,为了展示,取每个样本隶属度最大的作为其聚类结果。 41 |
42 | 43 |
44 | -------------------------------------------------------------------------------- /data/hh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Luxlios/Clustering/60925f742d22af0d172ba31d8a1ccb1128b1f93a/data/hh.png -------------------------------------------------------------------------------- /data/iris.data: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica 151 | 152 | --------------------------------------------------------------------------------