├── KNN数据集 └── iris.csv ├── README.md ├── bpnn_V1数据集 ├── iris_test.csv └── iris_training.csv ├── bpnn_V2数据集 ├── iris_test.csv └── iris_training.csv ├── iris_data_classification_bpnn_V1.py ├── iris_data_classification_bpnn_V2.py ├── iris_data_classification_knn.py ├── iris_data_cluster_sklearn.py ├── iris_data_decision_tree_sklearn.py ├── iris_data_knn_sklearn.py ├── sklearn数据集 └── iris.csv ├── 原始数据集 └── iris.csv └── 格式处理数据集 └── iris.csv /KNN数据集/iris.csv: -------------------------------------------------------------------------------- 1 | 150,4,setosa,versicolor,virginica 2 | 6.4,2.8,5.6,2.2,2 3 | 5,2.3,3.3,1,1 4 | 4.9,2.5,4.5,1.7,2 5 | 4.9,3.1,1.5,0.1,0 6 | 5.7,3.8,1.7,0.3,0 7 | 4.4,3.2,1.3,0.2,0 8 | 5.4,3.4,1.5,0.4,0 9 | 6.9,3.1,5.1,2.3,2 10 | 6.7,3.1,4.4,1.4,1 11 | 5.1,3.7,1.5,0.4,0 12 | 5.2,2.7,3.9,1.4,1 13 | 6.9,3.1,4.9,1.5,1 14 | 5.8,4,1.2,0.2,0 15 | 5.4,3.9,1.7,0.4,0 16 | 7.7,3.8,6.7,2.2,2 17 | 6.3,3.3,4.7,1.6,1 18 | 6.8,3.2,5.9,2.3,2 19 | 7.6,3,6.6,2.1,2 20 | 6.4,3.2,5.3,2.3,2 21 | 5.7,4.4,1.5,0.4,0 22 | 6.7,3.3,5.7,2.1,2 23 | 6.4,2.8,5.6,2.1,2 24 | 5.4,3.9,1.3,0.4,0 25 | 6.1,2.6,5.6,1.4,2 26 | 7.2,3,5.8,1.6,2 27 | 5.2,3.5,1.5,0.2,0 28 | 5.8,2.6,4,1.2,1 29 | 5.9,3,5.1,1.8,2 30 | 5.4,3,4.5,1.5,1 31 | 6.7,3,5,1.7,1 32 | 6.3,2.3,4.4,1.3,1 33 | 5.1,2.5,3,1.1,1 34 | 6.4,3.2,4.5,1.5,1 35 | 6.8,3,5.5,2.1,2 36 | 6.2,2.8,4.8,1.8,2 37 | 6.9,3.2,5.7,2.3,2 38 | 6.5,3.2,5.1,2,2 39 | 5.8,2.8,5.1,2.4,2 40 | 5.1,3.8,1.5,0.3,0 41 | 4.8,3,1.4,0.3,0 42 | 7.9,3.8,6.4,2,2 43 | 5.8,2.7,5.1,1.9,2 44 | 6.7,3,5.2,2.3,2 45 | 5.1,3.8,1.9,0.4,0 46 | 4.7,3.2,1.6,0.2,0 47 | 6,2.2,5,1.5,2 48 | 4.8,3.4,1.6,0.2,0 49 | 7.7,2.6,6.9,2.3,2 50 | 4.6,3.6,1,0.2,0 51 | 7.2,3.2,6,1.8,2 52 | 5,3.3,1.4,0.2,0 53 | 6.6,3,4.4,1.4,1 54 | 6.1,2.8,4,1.3,1 55 | 5,3.2,1.2,0.2,0 56 | 7,3.2,4.7,1.4,1 57 | 6,3,4.8,1.8,2 58 | 7.4,2.8,6.1,1.9,2 59 | 5.8,2.7,5.1,1.9,2 60 | 6.2,3.4,5.4,2.3,2 61 | 5,2,3.5,1,1 62 | 5.6,2.5,3.9,1.1,1 63 | 6.7,3.1,5.6,2.4,2 64 | 6.3,2.5,5,1.9,2 65 | 6.4,3.1,5.5,1.8,2 66 | 6.2,2.2,4.5,1.5,1 67 | 7.3,2.9,6.3,1.8,2 68 | 4.4,3,1.3,0.2,0 69 | 7.2,3.6,6.1,2.5,2 70 | 6.5,3,5.5,1.8,2 71 | 5,3.4,1.5,0.2,0 72 | 4.7,3.2,1.3,0.2,0 73 | 6.6,2.9,4.6,1.3,1 74 | 5.5,3.5,1.3,0.2,0 75 | 7.7,3,6.1,2.3,2 76 | 6.1,3,4.9,1.8,2 77 | 4.9,3.1,1.5,0.1,0 78 | 5.5,2.4,3.8,1.1,1 79 | 5.7,2.9,4.2,1.3,1 80 | 6,2.9,4.5,1.5,1 81 | 6.4,2.7,5.3,1.9,2 82 | 5.4,3.7,1.5,0.2,0 83 | 6.1,2.9,4.7,1.4,1 84 | 6.5,2.8,4.6,1.5,1 85 | 5.6,2.7,4.2,1.3,1 86 | 6.3,3.4,5.6,2.4,2 87 | 4.9,3.1,1.5,0.1,0 88 | 6.8,2.8,4.8,1.4,1 89 | 5.7,2.8,4.5,1.3,1 90 | 6,2.7,5.1,1.6,1 91 | 5,3.5,1.3,0.3,0 92 | 6.5,3,5.2,2,2 93 | 6.1,2.8,4.7,1.2,1 94 | 5.1,3.5,1.4,0.3,0 95 | 4.6,3.1,1.5,0.2,0 96 | 6.5,3,5.8,2.2,2 97 | 4.6,3.4,1.4,0.3,0 98 | 4.6,3.2,1.4,0.2,0 99 | 7.7,2.8,6.7,2,2 100 | 5.9,3.2,4.8,1.8,1 101 | 5.1,3.8,1.6,0.2,0 102 | 4.9,3,1.4,0.2,0 103 | 4.9,2.4,3.3,1,1 104 | 4.5,2.3,1.3,0.3,0 105 | 5.8,2.7,4.1,1,1 106 | 5,3.4,1.6,0.4,0 107 | 5.2,3.4,1.4,0.2,0 108 | 5.3,3.7,1.5,0.2,0 109 | 5,3.6,1.4,0.2,0 110 | 5.6,2.9,3.6,1.3,1 111 | 4.8,3.1,1.6,0.2,0 112 | 6.3,2.7,4.9,1.8,2 113 | 5.7,2.8,4.1,1.3,1 114 | 5,3,1.6,0.2,0 115 | 6.3,3.3,6,2.5,2 116 | 5,3.5,1.6,0.6,0 117 | 5.5,2.6,4.4,1.2,1 118 | 5.7,3,4.2,1.2,1 119 | 4.4,2.9,1.4,0.2,0 120 | 4.8,3,1.4,0.1,0 121 | 5.5,2.4,3.7,1,1 122 | 5.9,3,4.2,1.5,1 123 | 6.9,3.1,5.4,2.1,2 124 | 5.1,3.3,1.7,0.5,0 125 | 6,3.4,4.5,1.6,1 126 | 5.5,2.5,4,1.3,1 127 | 6.2,2.9,4.3,1.3,1 128 | 5.5,4.2,1.4,0.2,0 129 | 6.3,2.8,5.1,1.5,2 130 | 5.6,3,4.1,1.3,1 131 | 6.7,2.5,5.8,1.8,2 132 | 7.1,3,5.9,2.1,2 133 | 4.3,3,1.1,0.1,0 134 | 5.6,2.8,4.9,2,2 135 | 5.5,2.3,4,1.3,1 136 | 6,2.2,4,1,1 137 | 5.1,3.5,1.4,0.2,0 138 | 5.7,2.6,3.5,1,1 139 | 4.8,3.4,1.9,0.2,0 140 | 5.1,3.4,1.5,0.2,0 141 | 5.7,2.5,5,2,2 142 | 5.4,3.4,1.7,0.2,0 143 | 5.6,3,4.5,1.5,1 144 | 6.3,2.9,5.6,1.8,2 145 | 6.3,2.5,4.9,1.5,1 146 | 5.8,2.7,3.9,1.2,1 147 | 6.1,3,4.6,1.4,1 148 | 5.2,4.1,1.5,0.1,0 149 | 6.7,3.1,4.7,1.5,1 150 | 6.7,3.3,5.7,2.5,2 151 | 6.4,2.9,4.3,1.3,1 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iris_classification_BPNeuralNetwork 2 | 3 | > 本文用Python实现了BP神经网络分类算法,根据鸢尾花的4个特征,实现3种鸢尾花的分类。 4 | > 算法参考文章:[纯Python实现鸢尾属植物数据集神经网络模型](https://yq.aliyun.com/articles/614411?utm_content=m_1000007130#) 5 | 6 | iris_data_classification_bpnn_V1.py 需使用 **bpnn_V1数据集** 文件夹中的数据 7 | 8 | iris_data_classification_bpnn_V2.py 需使用 **bpnn_V2数据集** 文件夹中的数据 9 | 10 | iris_data_classification_knn.py 需使用 **原始数据集** 文件夹中的数据 11 | 12 | iris_data_cluster_sklearn.py 需使用 **sklearn数据集** 文件夹中的数据 13 | 14 | 不同数据集里数据都是一样的,只是为了程序使用方便而做了一些格式的变动。 15 | 16 | --- 17 | 18 | **2020.07.21更新:** 增加了分类结果可视化result_visualization。 19 | 20 | **2020.07.09更新:** 完善代码中取数据部分的操作。 21 | 22 | --- 23 | 24 | ## 1.数据准备 25 | 鸢尾花数据集包含4种特征,萼片长度(Sepal Length)、萼片宽度(Sepal Width)、花瓣长度(Petal Length)和花瓣宽度(Petal Width),以及3种鸢尾花Versicolor、Virginica和Setosa。 26 | 27 | 数据集共151行,5列: 28 | - 第1行是数据说明,“150”表示共150条数据;“4”表示特征数;“setosa、versicolor、virginica”是三类花的名字 29 | - 第2行至第151行是150条数据 30 | - 第1至4列是Sepal Length、Sepal Width、Petal Length、Petal 31 | Width 4个特征 32 | - 第5列是花的类别,用0、1、2表示 33 | ![iris数据集](https://img-blog.csdnimg.cn/20191227134604250.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L21pY2hhZWxfZjIwMDg=,size_16,color_FFFFFF,t_70)为方便起见,需要对数据集稍作处理: 34 | 35 | 1. 将150条数据分隔为两个文件,前120条另存为`iris_training.csv`,即训练集;后30条另存为`iris_test.csv`,即测试集; 36 | 2. 训练集和测试集都删去第1行; 37 | 3. 训练集和测试集都删去原来的最后1列,并新增加3列,目的是用3列来表示鸢尾花的分类:如果原来最后一列是0,则新增加的3列为(0,0,0);如果原来最后一列是1,则新增加的3列为(0,1,0);如果原来最后一列是2,则新增加的3列为(0,0,1)。 38 | ![iris训练集](https://img-blog.csdnimg.cn/20191227141640748.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L21pY2hhZWxfZjIwMDg=,size_16,color_FFFFFF,t_70) 39 | ## 2.算法实现 40 | [纯Python实现鸢尾属植物数据集神经网络模型](https://yq.aliyun.com/articles/614411?utm_content=m_1000007130#) 这篇文章中讲解得更为详细。本人对代码做了略微的修改,并增加了评估模型准确率的`predict()`函数。 41 | 42 | **以下代码对应的是iris_data_classification_bpnn_V2.py文件** 43 | 44 | ```python 45 | import pandas as pd 46 | import numpy as np 47 | import datetime 48 | import matplotlib.pyplot as plt 49 | from pandas.plotting import radviz 50 | ''' 51 | 构建一个具有1个隐藏层的神经网络,隐层的大小为10 52 | 输入层为4个特征,输出层为3个分类 53 | (1,0,0)为第一类,(0,1,0)为第二类,(0,0,1)为第三类 54 | ''' 55 | 56 | 57 | # 1.初始化参数 58 | def initialize_parameters(n_x, n_h, n_y): 59 | np.random.seed(2) 60 | 61 | # 权重和偏置矩阵 62 | w1 = np.random.randn(n_h, n_x) * 0.01 63 | b1 = np.zeros(shape=(n_h, 1)) 64 | w2 = np.random.randn(n_y, n_h) * 0.01 65 | b2 = np.zeros(shape=(n_y, 1)) 66 | 67 | # 通过字典存储参数 68 | parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2} 69 | 70 | return parameters 71 | 72 | 73 | # 2.前向传播 74 | def forward_propagation(X, parameters): 75 | w1 = parameters['w1'] 76 | b1 = parameters['b1'] 77 | w2 = parameters['w2'] 78 | b2 = parameters['b2'] 79 | 80 | # 通过前向传播来计算a2 81 | z1 = np.dot(w1, X) + b1 # 这个地方需注意矩阵加法:虽然(w1*X)和b1的维度不同,但可以相加 82 | a1 = np.tanh(z1) # 使用tanh作为第一层的激活函数 83 | z2 = np.dot(w2, a1) + b2 84 | a2 = 1 / (1 + np.exp(-z2)) # 使用sigmoid作为第二层的激活函数 85 | 86 | # 通过字典存储参数 87 | cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2} 88 | 89 | return a2, cache 90 | 91 | 92 | # 3.计算代价函数 93 | def compute_cost(a2, Y, parameters): 94 | m = Y.shape[1] # Y的列数即为总的样本数 95 | 96 | # 采用交叉熵(cross-entropy)作为代价函数 97 | logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2)) 98 | cost = - np.sum(logprobs) / m 99 | 100 | return cost 101 | 102 | 103 | # 4.反向传播(计算代价函数的导数) 104 | def backward_propagation(parameters, cache, X, Y): 105 | m = Y.shape[1] 106 | 107 | w2 = parameters['w2'] 108 | 109 | a1 = cache['a1'] 110 | a2 = cache['a2'] 111 | 112 | # 反向传播,计算dw1、db1、dw2、db2 113 | dz2 = a2 - Y 114 | dw2 = (1 / m) * np.dot(dz2, a1.T) 115 | db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True) 116 | dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2)) 117 | dw1 = (1 / m) * np.dot(dz1, X.T) 118 | db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True) 119 | 120 | grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2} 121 | 122 | return grads 123 | 124 | 125 | # 5.更新参数 126 | def update_parameters(parameters, grads, learning_rate=0.4): 127 | w1 = parameters['w1'] 128 | b1 = parameters['b1'] 129 | w2 = parameters['w2'] 130 | b2 = parameters['b2'] 131 | 132 | dw1 = grads['dw1'] 133 | db1 = grads['db1'] 134 | dw2 = grads['dw2'] 135 | db2 = grads['db2'] 136 | 137 | # 更新参数 138 | w1 = w1 - dw1 * learning_rate 139 | b1 = b1 - db1 * learning_rate 140 | w2 = w2 - dw2 * learning_rate 141 | b2 = b2 - db2 * learning_rate 142 | 143 | parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2} 144 | 145 | return parameters 146 | 147 | 148 | # 6.模型评估 149 | def predict(parameters, x_test, y_test): 150 | w1 = parameters['w1'] 151 | b1 = parameters['b1'] 152 | w2 = parameters['w2'] 153 | b2 = parameters['b2'] 154 | 155 | z1 = np.dot(w1, x_test) + b1 156 | a1 = np.tanh(z1) 157 | z2 = np.dot(w2, a1) + b2 158 | a2 = 1 / (1 + np.exp(-z2)) 159 | 160 | # 结果的维度 161 | n_rows = y_test.shape[0] 162 | n_cols = y_test.shape[1] 163 | 164 | # 预测值结果存储 165 | output = np.empty(shape=(n_rows, n_cols), dtype=int) 166 | 167 | for i in range(n_rows): 168 | for j in range(n_cols): 169 | if a2[i][j] > 0.5: 170 | output[i][j] = 1 171 | else: 172 | output[i][j] = 0 173 | 174 | print('预测结果:') 175 | print(output) 176 | print('真实结果:') 177 | print(y_test) 178 | 179 | count = 0 180 | for k in range(0, n_cols): 181 | if output[0][k] == y_test[0][k] and output[1][k] == y_test[1][k] and output[2][k] == y_test[2][k]: 182 | count = count + 1 183 | else: 184 | print(k) 185 | 186 | acc = count / int(y_test.shape[1]) * 100 187 | print('准确率:%.2f%%' % acc) 188 | 189 | return output 190 | 191 | 192 | # 建立神经网络 193 | def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False): 194 | np.random.seed(3) 195 | 196 | n_x = n_input # 输入层节点数 197 | n_y = n_output # 输出层节点数 198 | 199 | # 1.初始化参数 200 | parameters = initialize_parameters(n_x, n_h, n_y) 201 | 202 | # 梯度下降循环 203 | for i in range(0, num_iterations): 204 | # 2.前向传播 205 | a2, cache = forward_propagation(X, parameters) 206 | # 3.计算代价函数 207 | cost = compute_cost(a2, Y, parameters) 208 | # 4.反向传播 209 | grads = backward_propagation(parameters, cache, X, Y) 210 | # 5.更新参数 211 | parameters = update_parameters(parameters, grads) 212 | 213 | # 每1000次迭代,输出一次代价函数 214 | if print_cost and i % 1000 == 0: 215 | print('迭代第%i次,代价函数为:%f' % (i, cost)) 216 | 217 | return parameters 218 | 219 | 220 | # 结果可视化 221 | # 特征有4个维度,类别有1个维度,一共5个维度,故采用了RadViz图 222 | def result_visualization(x_test, y_test, result): 223 | cols = y_test.shape[1] 224 | y = [] 225 | pre = [] 226 | 227 | # 反转换类别的独热编码 228 | for i in range(cols): 229 | if y_test[0][i] == 0 and y_test[1][i] == 0 and y_test[2][i] == 1: 230 | y.append('setosa') 231 | elif y_test[0][i] == 0 and y_test[1][i] == 1 and y_test[2][i] == 0: 232 | y.append('versicolor') 233 | elif y_test[0][i] == 1 and y_test[1][i] == 0 and y_test[2][i] == 0: 234 | y.append('virginica') 235 | 236 | for j in range(cols): 237 | if result[0][j] == 0 and result[1][j] == 0 and result[2][j] == 1: 238 | pre.append('setosa') 239 | elif result[0][j] == 0 and result[1][j] == 1 and result[2][j] == 0: 240 | pre.append('versicolor') 241 | elif result[0][j] == 1 and result[1][j] == 0 and result[2][j] == 0: 242 | pre.append('virginica') 243 | else: 244 | pre.append('unknown') 245 | 246 | # 将特征和类别矩阵拼接起来 247 | real = np.column_stack((x_test.T, y)) 248 | prediction = np.column_stack((x_test.T, pre)) 249 | 250 | # 转换成DataFrame类型,并添加columns 251 | df_real = pd.DataFrame(real, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species']) 252 | df_prediction = pd.DataFrame(prediction, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species']) 253 | 254 | # 将特征列转换为float类型,否则radviz会报错 255 | df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float) 256 | df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float) 257 | 258 | # 绘图 259 | plt.figure('真实分类') 260 | radviz(df_real, 'Species', color=['blue', 'green', 'red', 'yellow']) 261 | plt.figure('预测分类') 262 | radviz(df_prediction, 'Species', color=['blue', 'green', 'red', 'yellow']) 263 | plt.show() 264 | 265 | 266 | if __name__ == "__main__": 267 | # 读取数据 268 | data_set = pd.read_csv('D:\\iris_training.csv', header=None) 269 | 270 | # 第1种取数据方法: 271 | X = data_set.iloc[:, 0:4].values.T # 前四列是特征,T表示转置 272 | Y = data_set.iloc[:, 4:].values.T # 后三列是标签 273 | 274 | # 第2种取数据方法: 275 | # X = data_set.ix[:, 0:3].values.T 276 | # Y = data_set.ix[:, 4:6].values.T 277 | 278 | # 第3种取数据方法: 279 | # X = data_set.loc[:, 0:3].values.T 280 | # Y = data_set.loc[:, 4:6].values.T 281 | 282 | # 第4种取数据方法: 283 | # X = data_set[data_set.columns[0:4]].values.T 284 | # Y = data_set[data_set.columns[4:7]].values.T 285 | Y = Y.astype('uint8') 286 | 287 | # 开始训练 288 | start_time = datetime.datetime.now() 289 | # 输入4个节点,隐层10个节点,输出3个节点,迭代10000次 290 | parameters = nn_model(X, Y, n_h=10, n_input=4, n_output=3, num_iterations=10000, print_cost=True) 291 | end_time = datetime.datetime.now() 292 | print("用时:" + str((end_time - start_time).seconds) + 's' + str(round((end_time - start_time).microseconds / 1000)) + 'ms') 293 | 294 | # 对模型进行测试 295 | data_test = pd.read_csv('D:\\iris_test.csv', header=None) 296 | x_test = data_test.iloc[:, 0:4].values.T 297 | y_test = data_test.iloc[:, 4:].values.T 298 | y_test = y_test.astype('uint8') 299 | 300 | result = predict(parameters, x_test, y_test) 301 | 302 | # 分类结果可视化 303 | result_visualization(x_test, y_test, result) 304 | 305 | ``` 306 | 最终结果: 307 | ![结果](https://img-blog.csdnimg.cn/20191227152325990.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L21pY2hhZWxfZjIwMDg=,size_16,color_FFFFFF,t_70) 308 | 309 | 分类的可视化效果,左侧为测试集的真实分类,右侧为模型的预测分类结果,采用的是**RadViz图**: 310 | ![分类可视化](https://img-blog.csdnimg.cn/20200721132114814.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L21pY2hhZWxfZjIwMDg=,size_16,color_FFFFFF,t_70#pic_center) 311 | 312 | 每次运行时准确率可能都不一样,可以通过调整**学习率、隐节点数、迭代次数**等参数来改善模型的效果。 313 | 314 | ## 3.总结 315 | 算法的实现总共分为6步: 316 | 317 | 1. 初始化参数 318 | 2. 前向传播 319 | 3. 计算代价函数 320 | 4. 反向传播 321 | 5. 更新参数 322 | 6. 模型评估 323 | 324 | 325 | -------------------------------------------------------------------------------- /bpnn_V1数据集/iris_test.csv: -------------------------------------------------------------------------------- 1 | SepalLength,SepalWidth,PetalLength,PetalWidth,species 2 | 5.9,3,4.2,1.5,1 3 | 6.9,3.1,5.4,2.1,2 4 | 5.1,3.3,1.7,0.5,0 5 | 6,3.4,4.5,1.6,1 6 | 5.5,2.5,4,1.3,1 7 | 6.2,2.9,4.3,1.3,1 8 | 5.5,4.2,1.4,0.2,0 9 | 6.3,2.8,5.1,1.5,2 10 | 5.6,3,4.1,1.3,1 11 | 6.7,2.5,5.8,1.8,2 12 | 7.1,3,5.9,2.1,2 13 | 4.3,3,1.1,0.1,0 14 | 5.6,2.8,4.9,2,2 15 | 5.5,2.3,4,1.3,1 16 | 6,2.2,4,1,1 17 | 5.1,3.5,1.4,0.2,0 18 | 5.7,2.6,3.5,1,1 19 | 4.8,3.4,1.9,0.2,0 20 | 5.1,3.4,1.5,0.2,0 21 | 5.7,2.5,5,2,2 22 | 5.4,3.4,1.7,0.2,0 23 | 5.6,3,4.5,1.5,1 24 | 6.3,2.9,5.6,1.8,2 25 | 6.3,2.5,4.9,1.5,1 26 | 5.8,2.7,3.9,1.2,1 27 | 6.1,3,4.6,1.4,1 28 | 5.2,4.1,1.5,0.1,0 29 | 6.7,3.1,4.7,1.5,1 30 | 6.7,3.3,5.7,2.5,2 31 | 6.4,2.9,4.3,1.3,1 32 | -------------------------------------------------------------------------------- /bpnn_V1数据集/iris_training.csv: -------------------------------------------------------------------------------- 1 | SepalLength,SepalWidth,PetalLength,PetalWidth,species 2 | 6.4,2.8,5.6,2.2,2 3 | 5,2.3,3.3,1,1 4 | 4.9,2.5,4.5,1.7,2 5 | 4.9,3.1,1.5,0.1,0 6 | 5.7,3.8,1.7,0.3,0 7 | 4.4,3.2,1.3,0.2,0 8 | 5.4,3.4,1.5,0.4,0 9 | 6.9,3.1,5.1,2.3,2 10 | 6.7,3.1,4.4,1.4,1 11 | 5.1,3.7,1.5,0.4,0 12 | 5.2,2.7,3.9,1.4,1 13 | 6.9,3.1,4.9,1.5,1 14 | 5.8,4,1.2,0.2,0 15 | 5.4,3.9,1.7,0.4,0 16 | 7.7,3.8,6.7,2.2,2 17 | 6.3,3.3,4.7,1.6,1 18 | 6.8,3.2,5.9,2.3,2 19 | 7.6,3,6.6,2.1,2 20 | 6.4,3.2,5.3,2.3,2 21 | 5.7,4.4,1.5,0.4,0 22 | 6.7,3.3,5.7,2.1,2 23 | 6.4,2.8,5.6,2.1,2 24 | 5.4,3.9,1.3,0.4,0 25 | 6.1,2.6,5.6,1.4,2 26 | 7.2,3,5.8,1.6,2 27 | 5.2,3.5,1.5,0.2,0 28 | 5.8,2.6,4,1.2,1 29 | 5.9,3,5.1,1.8,2 30 | 5.4,3,4.5,1.5,1 31 | 6.7,3,5,1.7,1 32 | 6.3,2.3,4.4,1.3,1 33 | 5.1,2.5,3,1.1,1 34 | 6.4,3.2,4.5,1.5,1 35 | 6.8,3,5.5,2.1,2 36 | 6.2,2.8,4.8,1.8,2 37 | 6.9,3.2,5.7,2.3,2 38 | 6.5,3.2,5.1,2,2 39 | 5.8,2.8,5.1,2.4,2 40 | 5.1,3.8,1.5,0.3,0 41 | 4.8,3,1.4,0.3,0 42 | 7.9,3.8,6.4,2,2 43 | 5.8,2.7,5.1,1.9,2 44 | 6.7,3,5.2,2.3,2 45 | 5.1,3.8,1.9,0.4,0 46 | 4.7,3.2,1.6,0.2,0 47 | 6,2.2,5,1.5,2 48 | 4.8,3.4,1.6,0.2,0 49 | 7.7,2.6,6.9,2.3,2 50 | 4.6,3.6,1,0.2,0 51 | 7.2,3.2,6,1.8,2 52 | 5,3.3,1.4,0.2,0 53 | 6.6,3,4.4,1.4,1 54 | 6.1,2.8,4,1.3,1 55 | 5,3.2,1.2,0.2,0 56 | 7,3.2,4.7,1.4,1 57 | 6,3,4.8,1.8,2 58 | 7.4,2.8,6.1,1.9,2 59 | 5.8,2.7,5.1,1.9,2 60 | 6.2,3.4,5.4,2.3,2 61 | 5,2,3.5,1,1 62 | 5.6,2.5,3.9,1.1,1 63 | 6.7,3.1,5.6,2.4,2 64 | 6.3,2.5,5,1.9,2 65 | 6.4,3.1,5.5,1.8,2 66 | 6.2,2.2,4.5,1.5,1 67 | 7.3,2.9,6.3,1.8,2 68 | 4.4,3,1.3,0.2,0 69 | 7.2,3.6,6.1,2.5,2 70 | 6.5,3,5.5,1.8,2 71 | 5,3.4,1.5,0.2,0 72 | 4.7,3.2,1.3,0.2,0 73 | 6.6,2.9,4.6,1.3,1 74 | 5.5,3.5,1.3,0.2,0 75 | 7.7,3,6.1,2.3,2 76 | 6.1,3,4.9,1.8,2 77 | 4.9,3.1,1.5,0.1,0 78 | 5.5,2.4,3.8,1.1,1 79 | 5.7,2.9,4.2,1.3,1 80 | 6,2.9,4.5,1.5,1 81 | 6.4,2.7,5.3,1.9,2 82 | 5.4,3.7,1.5,0.2,0 83 | 6.1,2.9,4.7,1.4,1 84 | 6.5,2.8,4.6,1.5,1 85 | 5.6,2.7,4.2,1.3,1 86 | 6.3,3.4,5.6,2.4,2 87 | 4.9,3.1,1.5,0.1,0 88 | 6.8,2.8,4.8,1.4,1 89 | 5.7,2.8,4.5,1.3,1 90 | 6,2.7,5.1,1.6,1 91 | 5,3.5,1.3,0.3,0 92 | 6.5,3,5.2,2,2 93 | 6.1,2.8,4.7,1.2,1 94 | 5.1,3.5,1.4,0.3,0 95 | 4.6,3.1,1.5,0.2,0 96 | 6.5,3,5.8,2.2,2 97 | 4.6,3.4,1.4,0.3,0 98 | 4.6,3.2,1.4,0.2,0 99 | 7.7,2.8,6.7,2,2 100 | 5.9,3.2,4.8,1.8,1 101 | 5.1,3.8,1.6,0.2,0 102 | 4.9,3,1.4,0.2,0 103 | 4.9,2.4,3.3,1,1 104 | 4.5,2.3,1.3,0.3,0 105 | 5.8,2.7,4.1,1,1 106 | 5,3.4,1.6,0.4,0 107 | 5.2,3.4,1.4,0.2,0 108 | 5.3,3.7,1.5,0.2,0 109 | 5,3.6,1.4,0.2,0 110 | 5.6,2.9,3.6,1.3,1 111 | 4.8,3.1,1.6,0.2,0 112 | 6.3,2.7,4.9,1.8,2 113 | 5.7,2.8,4.1,1.3,1 114 | 5,3,1.6,0.2,0 115 | 6.3,3.3,6,2.5,2 116 | 5,3.5,1.6,0.6,0 117 | 5.5,2.6,4.4,1.2,1 118 | 5.7,3,4.2,1.2,1 119 | 4.4,2.9,1.4,0.2,0 120 | 4.8,3,1.4,0.1,0 121 | 5.5,2.4,3.7,1,1 122 | -------------------------------------------------------------------------------- /bpnn_V2数据集/iris_test.csv: -------------------------------------------------------------------------------- 1 | 5.9,3,4.2,1.5,0,1,0 2 | 6.9,3.1,5.4,2.1,0,0,1 3 | 5.1,3.3,1.7,0.5,1,0,0 4 | 6,3.4,4.5,1.6,0,1,0 5 | 5.5,2.5,4,1.3,0,1,0 6 | 6.2,2.9,4.3,1.3,0,1,0 7 | 5.5,4.2,1.4,0.2,1,0,0 8 | 6.3,2.8,5.1,1.5,0,0,1 9 | 5.6,3,4.1,1.3,0,1,0 10 | 6.7,2.5,5.8,1.8,0,0,1 11 | 7.1,3,5.9,2.1,0,0,1 12 | 4.3,3,1.1,0.1,1,0,0 13 | 5.6,2.8,4.9,2,0,0,1 14 | 5.5,2.3,4,1.3,0,1,0 15 | 6,2.2,4,1,0,1,0 16 | 5.1,3.5,1.4,0.2,1,0,0 17 | 5.7,2.6,3.5,1,0,1,0 18 | 4.8,3.4,1.9,0.2,1,0,0 19 | 5.1,3.4,1.5,0.2,1,0,0 20 | 5.7,2.5,5,2,0,0,1 21 | 5.4,3.4,1.7,0.2,1,0,0 22 | 5.6,3,4.5,1.5,0,1,0 23 | 6.3,2.9,5.6,1.8,0,0,1 24 | 6.3,2.5,4.9,1.5,0,1,0 25 | 5.8,2.7,3.9,1.2,0,1,0 26 | 6.1,3,4.6,1.4,0,1,0 27 | 5.2,4.1,1.5,0.1,1,0,0 28 | 6.7,3.1,4.7,1.5,0,1,0 29 | 6.7,3.3,5.7,2.5,0,0,1 30 | 6.4,2.9,4.3,1.3,0,1,0 31 | -------------------------------------------------------------------------------- /bpnn_V2数据集/iris_training.csv: -------------------------------------------------------------------------------- 1 | 6.4,2.8,5.6,2.2,0,0,1 2 | 5,2.3,3.3,1,0,1,0 3 | 4.9,2.5,4.5,1.7,0,0,1 4 | 4.9,3.1,1.5,0.1,1,0,0 5 | 5.7,3.8,1.7,0.3,1,0,0 6 | 4.4,3.2,1.3,0.2,1,0,0 7 | 5.4,3.4,1.5,0.4,1,0,0 8 | 6.9,3.1,5.1,2.3,0,0,1 9 | 6.7,3.1,4.4,1.4,0,1,0 10 | 5.1,3.7,1.5,0.4,1,0,0 11 | 5.2,2.7,3.9,1.4,0,1,0 12 | 6.9,3.1,4.9,1.5,0,1,0 13 | 5.8,4,1.2,0.2,1,0,0 14 | 5.4,3.9,1.7,0.4,1,0,0 15 | 7.7,3.8,6.7,2.2,0,0,1 16 | 6.3,3.3,4.7,1.6,0,1,0 17 | 6.8,3.2,5.9,2.3,0,0,1 18 | 7.6,3,6.6,2.1,0,0,1 19 | 6.4,3.2,5.3,2.3,0,0,1 20 | 5.7,4.4,1.5,0.4,1,0,0 21 | 6.7,3.3,5.7,2.1,0,0,1 22 | 6.4,2.8,5.6,2.1,0,0,1 23 | 5.4,3.9,1.3,0.4,1,0,0 24 | 6.1,2.6,5.6,1.4,0,0,1 25 | 7.2,3,5.8,1.6,0,0,1 26 | 5.2,3.5,1.5,0.2,1,0,0 27 | 5.8,2.6,4,1.2,0,1,0 28 | 5.9,3,5.1,1.8,0,0,1 29 | 5.4,3,4.5,1.5,0,1,0 30 | 6.7,3,5,1.7,0,1,0 31 | 6.3,2.3,4.4,1.3,0,1,0 32 | 5.1,2.5,3,1.1,0,1,0 33 | 6.4,3.2,4.5,1.5,0,1,0 34 | 6.8,3,5.5,2.1,0,0,1 35 | 6.2,2.8,4.8,1.8,0,0,1 36 | 6.9,3.2,5.7,2.3,0,0,1 37 | 6.5,3.2,5.1,2,0,0,1 38 | 5.8,2.8,5.1,2.4,0,0,1 39 | 5.1,3.8,1.5,0.3,1,0,0 40 | 4.8,3,1.4,0.3,1,0,0 41 | 7.9,3.8,6.4,2,0,0,1 42 | 5.8,2.7,5.1,1.9,0,0,1 43 | 6.7,3,5.2,2.3,0,0,1 44 | 5.1,3.8,1.9,0.4,1,0,0 45 | 4.7,3.2,1.6,0.2,1,0,0 46 | 6,2.2,5,1.5,0,0,1 47 | 4.8,3.4,1.6,0.2,1,0,0 48 | 7.7,2.6,6.9,2.3,0,0,1 49 | 4.6,3.6,1,0.2,1,0,0 50 | 7.2,3.2,6,1.8,0,0,1 51 | 5,3.3,1.4,0.2,1,0,0 52 | 6.6,3,4.4,1.4,0,1,0 53 | 6.1,2.8,4,1.3,0,1,0 54 | 5,3.2,1.2,0.2,1,0,0 55 | 7,3.2,4.7,1.4,0,1,0 56 | 6,3,4.8,1.8,0,0,1 57 | 7.4,2.8,6.1,1.9,0,0,1 58 | 5.8,2.7,5.1,1.9,0,0,1 59 | 6.2,3.4,5.4,2.3,0,0,1 60 | 5,2,3.5,1,0,1,0 61 | 5.6,2.5,3.9,1.1,0,1,0 62 | 6.7,3.1,5.6,2.4,0,0,1 63 | 6.3,2.5,5,1.9,0,0,1 64 | 6.4,3.1,5.5,1.8,0,0,1 65 | 6.2,2.2,4.5,1.5,0,1,0 66 | 7.3,2.9,6.3,1.8,0,0,1 67 | 4.4,3,1.3,0.2,1,0,0 68 | 7.2,3.6,6.1,2.5,0,0,1 69 | 6.5,3,5.5,1.8,0,0,1 70 | 5,3.4,1.5,0.2,1,0,0 71 | 4.7,3.2,1.3,0.2,1,0,0 72 | 6.6,2.9,4.6,1.3,0,1,0 73 | 5.5,3.5,1.3,0.2,1,0,0 74 | 7.7,3,6.1,2.3,0,0,1 75 | 6.1,3,4.9,1.8,0,0,1 76 | 4.9,3.1,1.5,0.1,0,0,0 77 | 5.5,2.4,3.8,1.1,0,1,0 78 | 5.7,2.9,4.2,1.3,0,1,0 79 | 6,2.9,4.5,1.5,0,1,0 80 | 6.4,2.7,5.3,1.9,0,0,1 81 | 5.4,3.7,1.5,0.2,1,0,0 82 | 6.1,2.9,4.7,1.4,0,1,0 83 | 6.5,2.8,4.6,1.5,0,1,0 84 | 5.6,2.7,4.2,1.3,0,1,0 85 | 6.3,3.4,5.6,2.4,0,0,1 86 | 4.9,3.1,1.5,0.1,1,0,0 87 | 6.8,2.8,4.8,1.4,0,1,0 88 | 5.7,2.8,4.5,1.3,0,1,0 89 | 6,2.7,5.1,1.6,0,1,0 90 | 5,3.5,1.3,0.3,1,0,0 91 | 6.5,3,5.2,2,0,0,1 92 | 6.1,2.8,4.7,1.2,0,1,0 93 | 5.1,3.5,1.4,0.3,1,0,0 94 | 4.6,3.1,1.5,0.2,1,0,0 95 | 6.5,3,5.8,2.2,0,0,1 96 | 4.6,3.4,1.4,0.3,1,0,0 97 | 4.6,3.2,1.4,0.2,1,0,0 98 | 7.7,2.8,6.7,2,0,0,1 99 | 5.9,3.2,4.8,1.8,0,1,0 100 | 5.1,3.8,1.6,0.2,1,0,0 101 | 4.9,3,1.4,0.2,1,0,0 102 | 4.9,2.4,3.3,1,0,1,0 103 | 4.5,2.3,1.3,0.3,1,0,0 104 | 5.8,2.7,4.1,1,0,1,0 105 | 5,3.4,1.6,0.4,1,0,0 106 | 5.2,3.4,1.4,0.2,1,0,0 107 | 5.3,3.7,1.5,0.2,1,0,0 108 | 5,3.6,1.4,0.2,1,0,0 109 | 5.6,2.9,3.6,1.3,0,1,0 110 | 4.8,3.1,1.6,0.2,1,0,0 111 | 6.3,2.7,4.9,1.8,0,0,1 112 | 5.7,2.8,4.1,1.3,0,1,0 113 | 5,3,1.6,0.2,1,0,0 114 | 6.3,3.3,6,2.5,0,0,1 115 | 5,3.5,1.6,0.6,1,0,0 116 | 5.5,2.6,4.4,1.2,0,1,0 117 | 5.7,3,4.2,1.2,0,1,0 118 | 4.4,2.9,1.4,0.2,1,0,0 119 | 4.8,3,1.4,0.1,1,0,0 120 | 5.5,2.4,3.7,1,0,1,0 121 | -------------------------------------------------------------------------------- /iris_data_classification_bpnn_V1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | import numpy as np 4 | import datetime 5 | from sklearn.preprocessing import OneHotEncoder 6 | from pandas.plotting import radviz 7 | ''' 8 | 构建一个具有1个隐藏层的神经网络,隐层的大小为10 9 | 输入层为2(或4)个特征;输出层1个节点,结果为0或1 10 | 当特征为2个时,表头为:'SepalLength', 'SepalWidth', 'species',迭代1000次,正确率为100% 11 | 当特征为4个时,表头为:'SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'species',迭代1000次,正确率为63.64% 12 | ''' 13 | 14 | 15 | # 画图看原始数据 16 | def draw_plot(X, Y): 17 | # 用来正常显示中文标签 18 | plt.rcParams['font.sans-serif'] = ['SimHei'] 19 | 20 | plt.scatter(X[0, :], X[1, :], c=Y[0, :], s=50, cmap=plt.cm.Spectral) 21 | plt.title('蓝色-Versicolor, 红色-Virginica') 22 | plt.xlabel('花瓣长度') 23 | plt.ylabel('花瓣宽度') 24 | plt.show() 25 | 26 | 27 | # 1.初始化参数 28 | def initialize_parameters(n_x, n_h, n_y): 29 | np.random.seed(2) 30 | 31 | # 权重和偏置矩阵 32 | w1 = np.random.randn(n_h, n_x) * 0.01 33 | b1 = np.zeros(shape=(n_h, 1)) 34 | w2 = np.random.randn(n_y, n_h) * 0.01 35 | b2 = np.zeros(shape=(n_y, 1)) 36 | 37 | # 通过字典存储参数 38 | parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2} 39 | 40 | return parameters 41 | 42 | 43 | # 2.前向传播 44 | def forward_propagation(X, parameters): 45 | w1 = parameters['w1'] 46 | b1 = parameters['b1'] 47 | w2 = parameters['w2'] 48 | b2 = parameters['b2'] 49 | 50 | # 通过前向传播来计算a2 51 | z1 = np.dot(w1, X) + b1 # 这个地方需注意矩阵加法:虽然(w1*X)和b1的维度不同,但可以相加 52 | a1 = np.tanh(z1) # 使用tanh作为第一层的激活函数 53 | z2 = np.dot(w2, a1) + b2 54 | a2 = 1 / (1 + np.exp(-z2)) # 使用sigmoid作为第二层的激活函数 55 | 56 | # 通过字典存储参数 57 | cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2} 58 | 59 | return a2, cache 60 | 61 | 62 | # 3.计算代价函数 63 | def compute_cost(a2, Y): 64 | m = Y.shape[1] # Y的列数即为总的样本数 65 | 66 | # 采用交叉熵(cross-entropy)作为代价函数 67 | logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2)) 68 | cost = - np.sum(logprobs) / m 69 | 70 | return cost 71 | 72 | 73 | # 4.反向传播(计算代价函数的导数) 74 | def backward_propagation(parameters, cache, X, Y): 75 | m = Y.shape[1] 76 | 77 | w2 = parameters['w2'] 78 | 79 | a1 = cache['a1'] 80 | a2 = cache['a2'] 81 | 82 | # 反向传播,计算dw1、db1、dw2、db2 83 | dz2 = a2 - Y 84 | dw2 = (1 / m) * np.dot(dz2, a1.T) 85 | db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True) 86 | dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2)) 87 | dw1 = (1 / m) * np.dot(dz1, X.T) 88 | db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True) 89 | 90 | grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2} 91 | 92 | return grads 93 | 94 | 95 | # 5.更新参数 96 | def update_parameters(parameters, grads, learning_rate=0.4): 97 | w1 = parameters['w1'] 98 | b1 = parameters['b1'] 99 | w2 = parameters['w2'] 100 | b2 = parameters['b2'] 101 | 102 | dw1 = grads['dw1'] 103 | db1 = grads['db1'] 104 | dw2 = grads['dw2'] 105 | db2 = grads['db2'] 106 | 107 | # 更新参数 108 | w1 = w1 - dw1 * learning_rate 109 | b1 = b1 - db1 * learning_rate 110 | w2 = w2 - dw2 * learning_rate 111 | b2 = b2 - db2 * learning_rate 112 | 113 | parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2} 114 | 115 | return parameters 116 | 117 | 118 | # 建立神经网络 119 | def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False): 120 | np.random.seed(3) 121 | 122 | n_x = n_input # 输入层节点数 123 | n_y = n_output # 输出层节点数 124 | 125 | # 1.初始化参数 126 | parameters = initialize_parameters(n_x, n_h, n_y) 127 | 128 | # 梯度下降循环 129 | for i in range(0, num_iterations): 130 | # 2.前向传播 131 | a2, cache = forward_propagation(X, parameters) 132 | # 3.计算代价函数 133 | cost = compute_cost(a2, Y) 134 | # 4.反向传播 135 | grads = backward_propagation(parameters, cache, X, Y) 136 | # 5.更新参数 137 | parameters = update_parameters(parameters, grads) 138 | 139 | # 每1000次迭代,输出一次代价函数 140 | if print_cost and i % 1000 == 0: 141 | print('迭代第%i次,代价函数为:%f' % (i, cost)) 142 | 143 | return parameters 144 | 145 | 146 | # 6.模型评估 147 | def predict(parameters, x_test, y_test): 148 | w1 = parameters['w1'] 149 | b1 = parameters['b1'] 150 | w2 = parameters['w2'] 151 | b2 = parameters['b2'] 152 | 153 | z1 = np.dot(w1, x_test) + b1 154 | a1 = np.tanh(z1) 155 | z2 = np.dot(w2, a1) + b2 156 | a2 = 1 / (1 + np.exp(-z2)) 157 | 158 | # 结果的维度 159 | n_rows = a2.shape[0] 160 | n_cols = a2.shape[1] 161 | 162 | # 预测值结果存储 163 | output = np.empty(shape=(n_rows, n_cols), dtype=int) 164 | 165 | for i in range(n_rows): 166 | for j in range(n_cols): 167 | if a2[i][j] > 0.5: 168 | output[i][j] = 1 169 | else: 170 | output[i][j] = 0 171 | 172 | # 将独热编码反转为标签 173 | output = encoder.inverse_transform(output.T) 174 | output = output.reshape(1, output.shape[0]) 175 | output = output.flatten() 176 | 177 | print('预测结果:', output) 178 | print('真实结果:', y_test) 179 | 180 | count = 0 181 | for k in range(0, n_cols): 182 | if output[k] == y_test[k]: 183 | count = count + 1 184 | else: 185 | print('错误分类样本的序号:', k + 1) 186 | 187 | acc = count / int(a2.shape[1]) * 100 188 | print('准确率:%.2f%%' % acc) 189 | 190 | return output 191 | 192 | 193 | # 7.结果可视化 194 | # 特征有4个维度,类别有1个维度,一共5个维度,故采用了RadViz图 195 | def result_visualization(x_test, y_test, result): 196 | cols = y_test.shape[0] 197 | y = [] 198 | pre = [] 199 | labels = ['setosa', 'versicolor', 'virginica'] 200 | 201 | # 将0、1、2转换成setosa、versicolor、virginica 202 | for i in range(cols): 203 | y.append(labels[y_test[i]]) 204 | pre.append(labels[result[i]]) 205 | 206 | # 将特征和类别矩阵拼接起来 207 | real = np.column_stack((x_test.T, y)) 208 | prediction = np.column_stack((x_test.T, pre)) 209 | 210 | # 转换成DataFrame类型,并添加columns 211 | df_real = pd.DataFrame(real, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species']) 212 | df_prediction = pd.DataFrame(prediction, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species']) 213 | 214 | # 将特征列转换为float类型,否则radviz会报错 215 | df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float) 216 | df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float) 217 | 218 | # 绘图 219 | plt.figure('真实分类') 220 | radviz(df_real, 'Species', color=['blue', 'green', 'red', 'yellow']) 221 | plt.figure('预测分类') 222 | radviz(df_prediction, 'Species', color=['blue', 'green', 'red', 'yellow']) 223 | plt.show() 224 | 225 | 226 | if __name__ == "__main__": 227 | # 读取数据 228 | iris = pd.read_csv('E:\\GitHub\\iris_classification_BPNeuralNetwork\\bpnn_V1数据集\\iris_training.csv') 229 | X = iris[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']].values.T # T是转置 230 | Y = iris['species'].values 231 | 232 | # 将标签转换为独热编码 233 | encoder = OneHotEncoder() 234 | Y = encoder.fit_transform(Y.reshape(Y.shape[0], 1)) 235 | Y = Y.toarray().T 236 | Y = Y.astype('uint8') 237 | 238 | # 开始训练 239 | start_time = datetime.datetime.now() 240 | # 输入4个节点,隐层10个节点,输出3个节点,迭代10000次 241 | parameters = nn_model(X, Y, n_h=10, n_input=4, n_output=3, num_iterations=10000, print_cost=True) 242 | end_time = datetime.datetime.now() 243 | print("用时:" + str(round((end_time - start_time).microseconds / 1000)) + 'ms') 244 | 245 | # 对模型进行测试 246 | data_test = pd.read_csv('E:\\GitHub\\iris_classification_BPNeuralNetwork\\bpnn_V1数据集\\iris_test.csv') 247 | x_test = data_test[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']].values.T 248 | y_test = data_test['species'].values 249 | 250 | result = predict(parameters, x_test, y_test) 251 | 252 | # 分类结果可视化 253 | result_visualization(x_test, y_test, result) 254 | -------------------------------------------------------------------------------- /iris_data_classification_bpnn_V2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import datetime 4 | import matplotlib.pyplot as plt 5 | from pandas.plotting import radviz 6 | ''' 7 | 构建一个具有1个隐藏层的神经网络,隐层的大小为10 8 | 输入层为4个特征,输出层为3个分类 9 | (1,0,0)为第一类,(0,1,0)为第二类,(0,0,1)为第三类 10 | ''' 11 | 12 | 13 | # 1.初始化参数 14 | def initialize_parameters(n_x, n_h, n_y): 15 | np.random.seed(2) 16 | 17 | # 权重和偏置矩阵 18 | w1 = np.random.randn(n_h, n_x) * 0.01 19 | b1 = np.zeros(shape=(n_h, 1)) 20 | w2 = np.random.randn(n_y, n_h) * 0.01 21 | b2 = np.zeros(shape=(n_y, 1)) 22 | 23 | # 通过字典存储参数 24 | parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2} 25 | 26 | return parameters 27 | 28 | 29 | # 2.前向传播 30 | def forward_propagation(X, parameters): 31 | w1 = parameters['w1'] 32 | b1 = parameters['b1'] 33 | w2 = parameters['w2'] 34 | b2 = parameters['b2'] 35 | 36 | # 通过前向传播来计算a2 37 | z1 = np.dot(w1, X) + b1 # 这个地方需注意矩阵加法:虽然(w1*X)和b1的维度不同,但可以相加 38 | a1 = np.tanh(z1) # 使用tanh作为第一层的激活函数 39 | z2 = np.dot(w2, a1) + b2 40 | a2 = 1 / (1 + np.exp(-z2)) # 使用sigmoid作为第二层的激活函数 41 | 42 | # 通过字典存储参数 43 | cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2} 44 | 45 | return a2, cache 46 | 47 | 48 | # 3.计算代价函数 49 | def compute_cost(a2, Y): 50 | m = Y.shape[1] # Y的列数即为总的样本数 51 | 52 | # 采用交叉熵(cross-entropy)作为代价函数 53 | logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2)) 54 | cost = - np.sum(logprobs) / m 55 | 56 | return cost 57 | 58 | 59 | # 4.反向传播(计算代价函数的导数) 60 | def backward_propagation(parameters, cache, X, Y): 61 | m = Y.shape[1] 62 | 63 | w2 = parameters['w2'] 64 | 65 | a1 = cache['a1'] 66 | a2 = cache['a2'] 67 | 68 | # 反向传播,计算dw1、db1、dw2、db2 69 | dz2 = a2 - Y 70 | dw2 = (1 / m) * np.dot(dz2, a1.T) 71 | db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True) 72 | dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2)) 73 | dw1 = (1 / m) * np.dot(dz1, X.T) 74 | db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True) 75 | 76 | grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2} 77 | 78 | return grads 79 | 80 | 81 | # 5.更新参数 82 | def update_parameters(parameters, grads, learning_rate=0.4): 83 | w1 = parameters['w1'] 84 | b1 = parameters['b1'] 85 | w2 = parameters['w2'] 86 | b2 = parameters['b2'] 87 | 88 | dw1 = grads['dw1'] 89 | db1 = grads['db1'] 90 | dw2 = grads['dw2'] 91 | db2 = grads['db2'] 92 | 93 | # 更新参数 94 | w1 = w1 - dw1 * learning_rate 95 | b1 = b1 - db1 * learning_rate 96 | w2 = w2 - dw2 * learning_rate 97 | b2 = b2 - db2 * learning_rate 98 | 99 | parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2} 100 | 101 | return parameters 102 | 103 | 104 | # 建立神经网络 105 | def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False): 106 | np.random.seed(3) 107 | 108 | n_x = n_input # 输入层节点数 109 | n_y = n_output # 输出层节点数 110 | 111 | # 1.初始化参数 112 | parameters = initialize_parameters(n_x, n_h, n_y) 113 | 114 | # 梯度下降循环 115 | for i in range(0, num_iterations): 116 | # 2.前向传播 117 | a2, cache = forward_propagation(X, parameters) 118 | # 3.计算代价函数 119 | cost = compute_cost(a2, Y) 120 | # 4.反向传播 121 | grads = backward_propagation(parameters, cache, X, Y) 122 | # 5.更新参数 123 | parameters = update_parameters(parameters, grads) 124 | 125 | # 每1000次迭代,输出一次代价函数 126 | if print_cost and i % 1000 == 0: 127 | print('迭代第%i次,代价函数为:%f' % (i, cost)) 128 | 129 | return parameters 130 | 131 | 132 | # 6.模型评估 133 | def predict(parameters, x_test, y_test): 134 | w1 = parameters['w1'] 135 | b1 = parameters['b1'] 136 | w2 = parameters['w2'] 137 | b2 = parameters['b2'] 138 | 139 | z1 = np.dot(w1, x_test) + b1 140 | a1 = np.tanh(z1) 141 | z2 = np.dot(w2, a1) + b2 142 | a2 = 1 / (1 + np.exp(-z2)) 143 | 144 | # 结果的维度 145 | n_rows = y_test.shape[0] 146 | n_cols = y_test.shape[1] 147 | 148 | # 预测值结果存储 149 | output = np.empty(shape=(n_rows, n_cols), dtype=int) 150 | 151 | for i in range(n_rows): 152 | for j in range(n_cols): 153 | if a2[i][j] > 0.5: 154 | output[i][j] = 1 155 | else: 156 | output[i][j] = 0 157 | 158 | print('预测结果:', output) 159 | print('真实结果:', y_test) 160 | 161 | count = 0 162 | for k in range(0, n_cols): 163 | if output[0][k] == y_test[0][k] and output[1][k] == y_test[1][k] and output[2][k] == y_test[2][k]: 164 | count = count + 1 165 | else: 166 | print('错误分类样本的序号:', k + 1) 167 | 168 | acc = count / int(y_test.shape[1]) * 100 169 | print('准确率:%.2f%%' % acc) 170 | 171 | return output 172 | 173 | 174 | # 7.结果可视化 175 | # 特征有4个维度,类别有1个维度,一共5个维度,故采用了RadViz图 176 | def result_visualization(x_test, y_test, result): 177 | cols = y_test.shape[1] 178 | y = [] 179 | pre = [] 180 | 181 | # 反转换类别的独热编码 182 | for i in range(cols): 183 | if y_test[0][i] == 0 and y_test[1][i] == 0 and y_test[2][i] == 1: 184 | y.append('setosa') 185 | elif y_test[0][i] == 0 and y_test[1][i] == 1 and y_test[2][i] == 0: 186 | y.append('versicolor') 187 | elif y_test[0][i] == 1 and y_test[1][i] == 0 and y_test[2][i] == 0: 188 | y.append('virginica') 189 | 190 | for j in range(cols): 191 | if result[0][j] == 0 and result[1][j] == 0 and result[2][j] == 1: 192 | pre.append('setosa') 193 | elif result[0][j] == 0 and result[1][j] == 1 and result[2][j] == 0: 194 | pre.append('versicolor') 195 | elif result[0][j] == 1 and result[1][j] == 0 and result[2][j] == 0: 196 | pre.append('virginica') 197 | else: 198 | pre.append('unknown') 199 | 200 | # 将特征和类别矩阵拼接起来 201 | real = np.column_stack((x_test.T, y)) 202 | prediction = np.column_stack((x_test.T, pre)) 203 | 204 | # 转换成DataFrame类型,并添加columns 205 | df_real = pd.DataFrame(real, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species']) 206 | df_prediction = pd.DataFrame(prediction, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species']) 207 | 208 | # 将特征列转换为float类型,否则radviz会报错 209 | df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float) 210 | df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float) 211 | 212 | # 绘图 213 | plt.figure('真实分类') 214 | radviz(df_real, 'Species', color=['blue', 'green', 'red', 'yellow']) 215 | plt.figure('预测分类') 216 | radviz(df_prediction, 'Species', color=['blue', 'green', 'red', 'yellow']) 217 | plt.show() 218 | 219 | 220 | if __name__ == "__main__": 221 | # 读取数据 222 | data_set = pd.read_csv('E:\\GitHub\\iris_classification_BPNeuralNetwork\\bpnn_V2数据集\\iris_training.csv', header=None) 223 | 224 | # 第1种取数据方法: 225 | X = data_set.iloc[:, 0:4].values.T # 前四列是特征,T表示转置 226 | Y = data_set.iloc[:, 4:].values.T # 后三列是标签 227 | 228 | # 第2种取数据方法: 229 | # X = data_set.ix[:, 0:3].values.T 230 | # Y = data_set.ix[:, 4:6].values.T 231 | 232 | # 第3种取数据方法: 233 | # X = data_set.loc[:, 0:3].values.T 234 | # Y = data_set.loc[:, 4:6].values.T 235 | 236 | # 第4种取数据方法: 237 | # X = data_set[data_set.columns[0:4]].values.T 238 | # Y = data_set[data_set.columns[4:7]].values.T 239 | Y = Y.astype('uint8') 240 | 241 | # 开始训练 242 | start_time = datetime.datetime.now() 243 | # 输入4个节点,隐层10个节点,输出3个节点,迭代10000次 244 | parameters = nn_model(X, Y, n_h=10, n_input=4, n_output=3, num_iterations=10000, print_cost=True) 245 | end_time = datetime.datetime.now() 246 | print("用时:" + str((end_time - start_time).seconds) + 's' + str(round((end_time - start_time).microseconds / 1000)) + 'ms') 247 | 248 | # 对模型进行测试 249 | data_test = pd.read_csv('E:\\GitHub\\iris_classification_BPNeuralNetwork\\bpnn_V2数据集\\iris_test.csv', header=None) 250 | x_test = data_test.iloc[:, 0:4].values.T 251 | y_test = data_test.iloc[:, 4:].values.T 252 | y_test = y_test.astype('uint8') 253 | 254 | result = predict(parameters, x_test, y_test) 255 | 256 | # 分类结果可视化 257 | result_visualization(x_test, y_test, result) 258 | -------------------------------------------------------------------------------- /iris_data_classification_knn.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import random 3 | import numpy as np 4 | import operator 5 | import datetime 6 | 7 | 8 | def open_file(file_name): 9 | """ 10 | 打开数据集,进行数据处理 11 | :param file_name: 数据集的路径 12 | :return: 返回数据集的 特征、标签、标签名 13 | """ 14 | with open(file_name) as csv_file: 15 | data_file = csv.reader(csv_file) 16 | 17 | # temp读取的是csv文件的第一行,相当于表头 18 | temp = next(data_file) 19 | 20 | # 数据集中数据的总数量 21 | n_samples = int(temp[0]) 22 | 23 | # 数据集中特征值的种类个数 24 | n_features = int(temp[1]) 25 | 26 | # 标签名 27 | labels_names = np.array(temp[2:]) 28 | 29 | # 特征集,行数为数据集数量,列数为特征值的种类个数 30 | features = np.empty((n_samples, n_features)) 31 | 32 | # 标签集,行数为数据集数量,1列,数据格式为int 33 | labels = np.empty((n_samples,), dtype=np.int) 34 | 35 | for i, j in enumerate(data_file): 36 | # 将数据集中的将数据转化为矩阵,数据格式为float 37 | # 将数据中从第一列到倒数第二列中的数据保存在data中 38 | features[i] = np.asarray(j[:-1], dtype=np.float64) 39 | 40 | # 将数据集中的将数据转化为矩阵,数据格式为int 41 | # 将数据集中倒数第一列中的数据保存在target中 42 | labels[i] = np.asarray(j[-1], dtype=np.int) 43 | 44 | # 返回 数据,标签 和标签名 45 | return features, labels, labels_names 46 | 47 | 48 | def random_number(data_size): 49 | """ 50 | 该函数使用shuffle()打乱一个包含从0到数据集大小的整数列表。因此每次运行程序划分不同,导致结果不同 51 | :param data_size: 数据集大小 52 | :return: 返回一个列表 53 | """ 54 | number_set = [] 55 | for i in range(data_size): 56 | number_set.append(i) 57 | 58 | random.shuffle(number_set) 59 | 60 | return number_set 61 | 62 | 63 | def split_data_set(features_set, labels_set, rate=0.20): 64 | """ 65 | 分割数据集,默认数据集的25%是测试集 66 | :param features_set: 数据集 67 | :param labels_set: 标签数据 68 | :param rate: 测试集所占的比率 69 | :return: 返回训练集数据、训练集标签、测试集数据、测试集标签 70 | """ 71 | # 计算训练集的数据个数 72 | train_size = int((1-rate)*len(features_set)) 73 | 74 | # 调用random_number获得随机数据索引 75 | data_index = random_number(len(features_set)) 76 | 77 | # 分隔数据集 78 | # x是自变量,即输入(分类特征);y是因变量,即输出(分类结果) 79 | x_train = features_set[data_index[:train_size]] 80 | x_test = features_set[data_index[train_size:]] 81 | 82 | y_train = labels_set[data_index[:train_size]] 83 | y_test = labels_set[data_index[train_size:]] 84 | 85 | return x_train, x_test, y_train, y_test 86 | 87 | 88 | def data_distance(x_test, x_train): 89 | """ 90 | :param x_test: 测试集 91 | :param x_train: 训练集 92 | :return: 返回计算的距离 93 | """ 94 | distances = np.sqrt(sum((x_test - x_train) ** 2)) 95 | return distances 96 | 97 | 98 | def knn(x_train, y_train, x_test, k): 99 | """ 100 | :param x_train: 训练集特征数据 101 | :param y_train: 训练集标签数据 102 | :param x_test: 测试集特征数据 103 | :param k: 邻居数 104 | :return: 返回一个列表包含预测结果 105 | """ 106 | # 预测结果列表,用于存储测试集预测出来的结果 107 | predict_result_set = [] 108 | 109 | # 训练集的长度 110 | train_set_size = len(x_train) 111 | 112 | # 创建一个全零的矩阵,长度为训练集的长度 113 | distances = np.array(np.zeros(train_set_size)) 114 | 115 | # 计算每一个测试集与每一个训练集的距离 116 | for x in x_test: 117 | for index in range(train_set_size): 118 | # 计算数据之间的距离 119 | distances[index] = data_distance(x, x_train[index]) 120 | 121 | # 排序后的距离的下标(从小到大) 122 | sorted_dist = np.argsort(distances) 123 | 124 | class_count = {} 125 | 126 | # 取出k个最短距离 127 | for j in range(k): 128 | # 获得下标所对应的标签值 129 | sort_label = y_train[sorted_dist[j]] 130 | 131 | # 将标签存入字典之中并存入个数 132 | class_count[sort_label] = class_count.get(sort_label, 0) + 1 133 | 134 | # 对标签进行排序 135 | sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True) 136 | 137 | # 将出现频次最高的放入预测结果列表 138 | predict_result_set.append(sorted_class_count[0][0]) 139 | 140 | # 返回预测结果列表 141 | return predict_result_set 142 | 143 | 144 | def predict_score(predict_result_set, y_test): 145 | """ 146 | :param predict_result_set: 预测结果列表 147 | :param y_test: 测试集标签数据 148 | :return: 返回测试精度 149 | """ 150 | count = 0 151 | for i in range(0, len(predict_result_set)): 152 | if predict_result_set[i] == y_test[i]: 153 | count = count + 1 154 | 155 | accuracy = count / len(predict_result_set) 156 | return accuracy 157 | 158 | 159 | if __name__ == "__main__": 160 | # 1.读入数据 161 | iris_data_set = open_file("D:\\iris.csv") 162 | 163 | # 2.分割训练集和测试集 164 | x_train, x_test, y_train, y_test = split_data_set(iris_data_set[0], iris_data_set[1]) 165 | 166 | # 3.调用KNN算法 167 | start_time = datetime.datetime.now() 168 | result = knn(x_train, y_train, x_test, 8) 169 | end_time = datetime.datetime.now() 170 | 171 | # 4.准确率 172 | acc = predict_score(result, y_test) 173 | 174 | print("正确标签:", y_test) 175 | print("预测结果:", np.array(result)) 176 | print("准确率:" + str(acc * 100) + "%") 177 | # print("测试集的精度:%.2f" % acc) 178 | print("用时:" + str((end_time - start_time).microseconds / 1000) + 'ms') 179 | -------------------------------------------------------------------------------- /iris_data_cluster_sklearn.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.cluster import KMeans 3 | from sklearn.cluster import DBSCAN 4 | 5 | data = pd.read_csv('D:\\iris.csv', sep=',', encoding='utf-8') 6 | x = data[['A', 'B', 'C', 'D']].as_matrix() 7 | real = data['E'].as_matrix() 8 | 9 | # 第1种实现:KMeans算法 10 | # kms = KMeans(n_clusters=3) # 传入要分类的数目 11 | # y = kms.fit_predict(x) 12 | 13 | # 第2种实现:DBSCAN算法 14 | dbscan = DBSCAN(eps=0.5, min_samples=13) 15 | dbscan.fit(x) 16 | y = dbscan.labels_ 17 | 18 | count = 0 19 | for i in range(len(real)): 20 | if abs(int(y[i])) == abs(int(real[i])): 21 | count = count + 1 22 | print('正确:' + str(count)) 23 | acc = round(count / len(real), 4) * 100 24 | print('正确率:' + str(acc) + '%') 25 | -------------------------------------------------------------------------------- /iris_data_decision_tree_sklearn.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.tree import DecisionTreeClassifier 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.metrics import confusion_matrix 5 | from sklearn.metrics import classification_report 6 | from sklearn.model_selection import GridSearchCV 7 | import matplotlib.pyplot as plt 8 | 9 | iris_data_set = pd.read_csv('E:\\PyCharm-Workspace\\DataAnalysis\\data\\03 Iris\\DecisionTreeClassifier\\iris.csv') 10 | 11 | # x是4列特征 12 | x = iris_data_set.iloc[:, 0:4].values 13 | # y是1列标签 14 | y = iris_data_set.iloc[:, -1].values 15 | 16 | # 划分训练集和测试集 17 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1) 18 | 19 | # 利用GridSearchCV选择最优参数 20 | model = DecisionTreeClassifier() 21 | param = {'criterion': ['gini', 'entropy'], 'max_depth': [30, 50, 60, 100], 'min_samples_leaf': [2, 3, 5, 10], 'min_impurity_decrease': [0.1, 0.2, 0.5]} 22 | grid = GridSearchCV(model, param_grid=param, cv=5) 23 | grid.fit(x_train, y_train) 24 | print('最优分类器:', grid.best_estimator_) 25 | print('最优超参数:', grid.best_params_) 26 | print('最优分数:', grid.best_score_) 27 | 28 | # 利用决策树分类器构建分类模型 29 | model = grid.best_estimator_ 30 | y_pre = model.predict(x_test) 31 | 32 | print('正确标签:', y_test) 33 | print('预测结果:', y_pre) 34 | 35 | print('训练集分数:', model.score(x_train, y_train)) 36 | print('测试集分数:', model.score(x_test, y_test)) 37 | 38 | # 混淆矩阵 39 | conf_mat = confusion_matrix(y_test, y_pre) 40 | print('混淆矩阵:') 41 | print(conf_mat) 42 | 43 | # 分类指标文本报告(精确率、召回率、F1值等) 44 | print('分类指标报告:') 45 | print(classification_report(y_test, y_pre)) 46 | 47 | # 特征重要性 48 | print(model.feature_importances_) 49 | 50 | # 画图展示训练结果 51 | fig = plt.figure() 52 | ax = fig.add_subplot(111) 53 | f1 = ax.scatter(list(range(len(x_test))), y_test, marker='*') 54 | f2 = ax.scatter(list(range(len(x_test))), y_pre, marker='o') 55 | plt.legend(handles=[f1, f2], labels=['True', 'Prediction']) 56 | plt.show() 57 | -------------------------------------------------------------------------------- /iris_data_knn_sklearn.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.model_selection import train_test_split 3 | from sklearn.neighbors import KNeighborsClassifier 4 | from sklearn.metrics import confusion_matrix 5 | from sklearn.metrics import classification_report 6 | from sklearn.model_selection import GridSearchCV 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | # 读取数据 11 | iris_data_set = pd.read_csv('E:\\PyCharm-Workspace\\DataAnalysis\\data\\03 Iris\\DecisionTreeClassifier\\iris.csv') 12 | # x是4列特征 13 | x = iris_data_set.iloc[:, 0:4].values 14 | # y是1列标签 15 | y = iris_data_set.iloc[:, -1].values 16 | 17 | # 划分训练集和测试集 18 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1) 19 | 20 | # 将特征转为一维数组 21 | y_train = y_train.flatten() 22 | y_test = y_test.flatten() 23 | 24 | # 利用GridSearchCV选择最优参数 25 | knn_model = KNeighborsClassifier() 26 | param_grid = [ 27 | { 28 | 'weights': ['uniform'], 29 | 'n_neighbors': [i for i in range(1, 20)] 30 | }, 31 | { 32 | 'weights': ['distance'], 33 | 'n_neighbors': [i for i in range(1, 20)], 34 | 'p':[i for i in range(1, 6)] 35 | } 36 | ] 37 | grid = GridSearchCV(knn_model, param_grid=param_grid, cv=5) 38 | grid.fit(x_train, y_train) 39 | print('最优分类器:', grid.best_estimator_) 40 | print('最优超参数:', grid.best_params_) 41 | print('最优分数:', grid.best_score_) 42 | 43 | # 预测 44 | knn_model = grid.best_estimator_ 45 | y_pre = knn_model.predict(x_test) 46 | 47 | print('正确标签:', y_test) 48 | print('预测结果:', y_pre) 49 | 50 | print('训练集分数:', knn_model.score(x_train, y_train)) 51 | print('测试集分数:', knn_model.score(x_test, y_test)) 52 | 53 | # 混淆矩阵 54 | conf_mat = confusion_matrix(y_test, y_pre) 55 | print('混淆矩阵:') 56 | print(conf_mat) 57 | 58 | # 分类指标文本报告(精确率、召回率、F1值等) 59 | print('分类指标报告:') 60 | print(classification_report(y_test, y_pre)) 61 | 62 | # 画图展示训练结果 63 | fig = plt.figure() 64 | ax = fig.add_subplot(111) 65 | f1 = ax.scatter(list(range(len(x_test))), y_test, marker='*') 66 | f2 = ax.scatter(list(range(len(x_test))), y_pre, marker='o') 67 | plt.legend(handles=[f1, f2], labels=['True', 'Prediction']) 68 | plt.show() 69 | -------------------------------------------------------------------------------- /sklearn数据集/iris.csv: -------------------------------------------------------------------------------- 1 | SepalLength,SepalWidth,PetalLength,PetalWidth,species 2 | 6.4,2.8,5.6,2.2,2 3 | 5,2.3,3.3,1,1 4 | 4.9,2.5,4.5,1.7,2 5 | 4.9,3.1,1.5,0.1,0 6 | 5.7,3.8,1.7,0.3,0 7 | 4.4,3.2,1.3,0.2,0 8 | 5.4,3.4,1.5,0.4,0 9 | 6.9,3.1,5.1,2.3,2 10 | 6.7,3.1,4.4,1.4,1 11 | 5.1,3.7,1.5,0.4,0 12 | 5.2,2.7,3.9,1.4,1 13 | 6.9,3.1,4.9,1.5,1 14 | 5.8,4,1.2,0.2,0 15 | 5.4,3.9,1.7,0.4,0 16 | 7.7,3.8,6.7,2.2,2 17 | 6.3,3.3,4.7,1.6,1 18 | 6.8,3.2,5.9,2.3,2 19 | 7.6,3,6.6,2.1,2 20 | 6.4,3.2,5.3,2.3,2 21 | 5.7,4.4,1.5,0.4,0 22 | 6.7,3.3,5.7,2.1,2 23 | 6.4,2.8,5.6,2.1,2 24 | 5.4,3.9,1.3,0.4,0 25 | 6.1,2.6,5.6,1.4,2 26 | 7.2,3,5.8,1.6,2 27 | 5.2,3.5,1.5,0.2,0 28 | 5.8,2.6,4,1.2,1 29 | 5.9,3,5.1,1.8,2 30 | 5.4,3,4.5,1.5,1 31 | 6.7,3,5,1.7,1 32 | 6.3,2.3,4.4,1.3,1 33 | 5.1,2.5,3,1.1,1 34 | 6.4,3.2,4.5,1.5,1 35 | 6.8,3,5.5,2.1,2 36 | 6.2,2.8,4.8,1.8,2 37 | 6.9,3.2,5.7,2.3,2 38 | 6.5,3.2,5.1,2,2 39 | 5.8,2.8,5.1,2.4,2 40 | 5.1,3.8,1.5,0.3,0 41 | 4.8,3,1.4,0.3,0 42 | 7.9,3.8,6.4,2,2 43 | 5.8,2.7,5.1,1.9,2 44 | 6.7,3,5.2,2.3,2 45 | 5.1,3.8,1.9,0.4,0 46 | 4.7,3.2,1.6,0.2,0 47 | 6,2.2,5,1.5,2 48 | 4.8,3.4,1.6,0.2,0 49 | 7.7,2.6,6.9,2.3,2 50 | 4.6,3.6,1,0.2,0 51 | 7.2,3.2,6,1.8,2 52 | 5,3.3,1.4,0.2,0 53 | 6.6,3,4.4,1.4,1 54 | 6.1,2.8,4,1.3,1 55 | 5,3.2,1.2,0.2,0 56 | 7,3.2,4.7,1.4,1 57 | 6,3,4.8,1.8,2 58 | 7.4,2.8,6.1,1.9,2 59 | 5.8,2.7,5.1,1.9,2 60 | 6.2,3.4,5.4,2.3,2 61 | 5,2,3.5,1,1 62 | 5.6,2.5,3.9,1.1,1 63 | 6.7,3.1,5.6,2.4,2 64 | 6.3,2.5,5,1.9,2 65 | 6.4,3.1,5.5,1.8,2 66 | 6.2,2.2,4.5,1.5,1 67 | 7.3,2.9,6.3,1.8,2 68 | 4.4,3,1.3,0.2,0 69 | 7.2,3.6,6.1,2.5,2 70 | 6.5,3,5.5,1.8,2 71 | 5,3.4,1.5,0.2,0 72 | 4.7,3.2,1.3,0.2,0 73 | 6.6,2.9,4.6,1.3,1 74 | 5.5,3.5,1.3,0.2,0 75 | 7.7,3,6.1,2.3,2 76 | 6.1,3,4.9,1.8,2 77 | 4.9,3.1,1.5,0.1,0 78 | 5.5,2.4,3.8,1.1,1 79 | 5.7,2.9,4.2,1.3,1 80 | 6,2.9,4.5,1.5,1 81 | 6.4,2.7,5.3,1.9,2 82 | 5.4,3.7,1.5,0.2,0 83 | 6.1,2.9,4.7,1.4,1 84 | 6.5,2.8,4.6,1.5,1 85 | 5.6,2.7,4.2,1.3,1 86 | 6.3,3.4,5.6,2.4,2 87 | 4.9,3.1,1.5,0.1,0 88 | 6.8,2.8,4.8,1.4,1 89 | 5.7,2.8,4.5,1.3,1 90 | 6,2.7,5.1,1.6,1 91 | 5,3.5,1.3,0.3,0 92 | 6.5,3,5.2,2,2 93 | 6.1,2.8,4.7,1.2,1 94 | 5.1,3.5,1.4,0.3,0 95 | 4.6,3.1,1.5,0.2,0 96 | 6.5,3,5.8,2.2,2 97 | 4.6,3.4,1.4,0.3,0 98 | 4.6,3.2,1.4,0.2,0 99 | 7.7,2.8,6.7,2,2 100 | 5.9,3.2,4.8,1.8,1 101 | 5.1,3.8,1.6,0.2,0 102 | 4.9,3,1.4,0.2,0 103 | 4.9,2.4,3.3,1,1 104 | 4.5,2.3,1.3,0.3,0 105 | 5.8,2.7,4.1,1,1 106 | 5,3.4,1.6,0.4,0 107 | 5.2,3.4,1.4,0.2,0 108 | 5.3,3.7,1.5,0.2,0 109 | 5,3.6,1.4,0.2,0 110 | 5.6,2.9,3.6,1.3,1 111 | 4.8,3.1,1.6,0.2,0 112 | 6.3,2.7,4.9,1.8,2 113 | 5.7,2.8,4.1,1.3,1 114 | 5,3,1.6,0.2,0 115 | 6.3,3.3,6,2.5,2 116 | 5,3.5,1.6,0.6,0 117 | 5.5,2.6,4.4,1.2,1 118 | 5.7,3,4.2,1.2,1 119 | 4.4,2.9,1.4,0.2,0 120 | 4.8,3,1.4,0.1,0 121 | 5.5,2.4,3.7,1,1 122 | 5.9,3,4.2,1.5,1 123 | 6.9,3.1,5.4,2.1,2 124 | 5.1,3.3,1.7,0.5,0 125 | 6,3.4,4.5,1.6,1 126 | 5.5,2.5,4,1.3,1 127 | 6.2,2.9,4.3,1.3,1 128 | 5.5,4.2,1.4,0.2,0 129 | 6.3,2.8,5.1,1.5,2 130 | 5.6,3,4.1,1.3,1 131 | 6.7,2.5,5.8,1.8,2 132 | 7.1,3,5.9,2.1,2 133 | 4.3,3,1.1,0.1,0 134 | 5.6,2.8,4.9,2,2 135 | 5.5,2.3,4,1.3,1 136 | 6,2.2,4,1,1 137 | 5.1,3.5,1.4,0.2,0 138 | 5.7,2.6,3.5,1,1 139 | 4.8,3.4,1.9,0.2,0 140 | 5.1,3.4,1.5,0.2,0 141 | 5.7,2.5,5,2,2 142 | 5.4,3.4,1.7,0.2,0 143 | 5.6,3,4.5,1.5,1 144 | 6.3,2.9,5.6,1.8,2 145 | 6.3,2.5,4.9,1.5,1 146 | 5.8,2.7,3.9,1.2,1 147 | 6.1,3,4.6,1.4,1 148 | 5.2,4.1,1.5,0.1,0 149 | 6.7,3.1,4.7,1.5,1 150 | 6.7,3.3,5.7,2.5,2 151 | 6.4,2.9,4.3,1.3,1 152 | -------------------------------------------------------------------------------- /原始数据集/iris.csv: -------------------------------------------------------------------------------- 1 | 150,4,setosa,versicolor,virginica 2 | 6.4,2.8,5.6,2.2,2 3 | 5,2.3,3.3,1,1 4 | 4.9,2.5,4.5,1.7,2 5 | 4.9,3.1,1.5,0.1,0 6 | 5.7,3.8,1.7,0.3,0 7 | 4.4,3.2,1.3,0.2,0 8 | 5.4,3.4,1.5,0.4,0 9 | 6.9,3.1,5.1,2.3,2 10 | 6.7,3.1,4.4,1.4,1 11 | 5.1,3.7,1.5,0.4,0 12 | 5.2,2.7,3.9,1.4,1 13 | 6.9,3.1,4.9,1.5,1 14 | 5.8,4,1.2,0.2,0 15 | 5.4,3.9,1.7,0.4,0 16 | 7.7,3.8,6.7,2.2,2 17 | 6.3,3.3,4.7,1.6,1 18 | 6.8,3.2,5.9,2.3,2 19 | 7.6,3,6.6,2.1,2 20 | 6.4,3.2,5.3,2.3,2 21 | 5.7,4.4,1.5,0.4,0 22 | 6.7,3.3,5.7,2.1,2 23 | 6.4,2.8,5.6,2.1,2 24 | 5.4,3.9,1.3,0.4,0 25 | 6.1,2.6,5.6,1.4,2 26 | 7.2,3,5.8,1.6,2 27 | 5.2,3.5,1.5,0.2,0 28 | 5.8,2.6,4,1.2,1 29 | 5.9,3,5.1,1.8,2 30 | 5.4,3,4.5,1.5,1 31 | 6.7,3,5,1.7,1 32 | 6.3,2.3,4.4,1.3,1 33 | 5.1,2.5,3,1.1,1 34 | 6.4,3.2,4.5,1.5,1 35 | 6.8,3,5.5,2.1,2 36 | 6.2,2.8,4.8,1.8,2 37 | 6.9,3.2,5.7,2.3,2 38 | 6.5,3.2,5.1,2,2 39 | 5.8,2.8,5.1,2.4,2 40 | 5.1,3.8,1.5,0.3,0 41 | 4.8,3,1.4,0.3,0 42 | 7.9,3.8,6.4,2,2 43 | 5.8,2.7,5.1,1.9,2 44 | 6.7,3,5.2,2.3,2 45 | 5.1,3.8,1.9,0.4,0 46 | 4.7,3.2,1.6,0.2,0 47 | 6,2.2,5,1.5,2 48 | 4.8,3.4,1.6,0.2,0 49 | 7.7,2.6,6.9,2.3,2 50 | 4.6,3.6,1,0.2,0 51 | 7.2,3.2,6,1.8,2 52 | 5,3.3,1.4,0.2,0 53 | 6.6,3,4.4,1.4,1 54 | 6.1,2.8,4,1.3,1 55 | 5,3.2,1.2,0.2,0 56 | 7,3.2,4.7,1.4,1 57 | 6,3,4.8,1.8,2 58 | 7.4,2.8,6.1,1.9,2 59 | 5.8,2.7,5.1,1.9,2 60 | 6.2,3.4,5.4,2.3,2 61 | 5,2,3.5,1,1 62 | 5.6,2.5,3.9,1.1,1 63 | 6.7,3.1,5.6,2.4,2 64 | 6.3,2.5,5,1.9,2 65 | 6.4,3.1,5.5,1.8,2 66 | 6.2,2.2,4.5,1.5,1 67 | 7.3,2.9,6.3,1.8,2 68 | 4.4,3,1.3,0.2,0 69 | 7.2,3.6,6.1,2.5,2 70 | 6.5,3,5.5,1.8,2 71 | 5,3.4,1.5,0.2,0 72 | 4.7,3.2,1.3,0.2,0 73 | 6.6,2.9,4.6,1.3,1 74 | 5.5,3.5,1.3,0.2,0 75 | 7.7,3,6.1,2.3,2 76 | 6.1,3,4.9,1.8,2 77 | 4.9,3.1,1.5,0.1,0 78 | 5.5,2.4,3.8,1.1,1 79 | 5.7,2.9,4.2,1.3,1 80 | 6,2.9,4.5,1.5,1 81 | 6.4,2.7,5.3,1.9,2 82 | 5.4,3.7,1.5,0.2,0 83 | 6.1,2.9,4.7,1.4,1 84 | 6.5,2.8,4.6,1.5,1 85 | 5.6,2.7,4.2,1.3,1 86 | 6.3,3.4,5.6,2.4,2 87 | 4.9,3.1,1.5,0.1,0 88 | 6.8,2.8,4.8,1.4,1 89 | 5.7,2.8,4.5,1.3,1 90 | 6,2.7,5.1,1.6,1 91 | 5,3.5,1.3,0.3,0 92 | 6.5,3,5.2,2,2 93 | 6.1,2.8,4.7,1.2,1 94 | 5.1,3.5,1.4,0.3,0 95 | 4.6,3.1,1.5,0.2,0 96 | 6.5,3,5.8,2.2,2 97 | 4.6,3.4,1.4,0.3,0 98 | 4.6,3.2,1.4,0.2,0 99 | 7.7,2.8,6.7,2,2 100 | 5.9,3.2,4.8,1.8,1 101 | 5.1,3.8,1.6,0.2,0 102 | 4.9,3,1.4,0.2,0 103 | 4.9,2.4,3.3,1,1 104 | 4.5,2.3,1.3,0.3,0 105 | 5.8,2.7,4.1,1,1 106 | 5,3.4,1.6,0.4,0 107 | 5.2,3.4,1.4,0.2,0 108 | 5.3,3.7,1.5,0.2,0 109 | 5,3.6,1.4,0.2,0 110 | 5.6,2.9,3.6,1.3,1 111 | 4.8,3.1,1.6,0.2,0 112 | 6.3,2.7,4.9,1.8,2 113 | 5.7,2.8,4.1,1.3,1 114 | 5,3,1.6,0.2,0 115 | 6.3,3.3,6,2.5,2 116 | 5,3.5,1.6,0.6,0 117 | 5.5,2.6,4.4,1.2,1 118 | 5.7,3,4.2,1.2,1 119 | 4.4,2.9,1.4,0.2,0 120 | 4.8,3,1.4,0.1,0 121 | 5.5,2.4,3.7,1,1 122 | 5.9,3,4.2,1.5,1 123 | 6.9,3.1,5.4,2.1,2 124 | 5.1,3.3,1.7,0.5,0 125 | 6,3.4,4.5,1.6,1 126 | 5.5,2.5,4,1.3,1 127 | 6.2,2.9,4.3,1.3,1 128 | 5.5,4.2,1.4,0.2,0 129 | 6.3,2.8,5.1,1.5,2 130 | 5.6,3,4.1,1.3,1 131 | 6.7,2.5,5.8,1.8,2 132 | 7.1,3,5.9,2.1,2 133 | 4.3,3,1.1,0.1,0 134 | 5.6,2.8,4.9,2,2 135 | 5.5,2.3,4,1.3,1 136 | 6,2.2,4,1,1 137 | 5.1,3.5,1.4,0.2,0 138 | 5.7,2.6,3.5,1,1 139 | 4.8,3.4,1.9,0.2,0 140 | 5.1,3.4,1.5,0.2,0 141 | 5.7,2.5,5,2,2 142 | 5.4,3.4,1.7,0.2,0 143 | 5.6,3,4.5,1.5,1 144 | 6.3,2.9,5.6,1.8,2 145 | 6.3,2.5,4.9,1.5,1 146 | 5.8,2.7,3.9,1.2,1 147 | 6.1,3,4.6,1.4,1 148 | 5.2,4.1,1.5,0.1,0 149 | 6.7,3.1,4.7,1.5,1 150 | 6.7,3.3,5.7,2.5,2 151 | 6.4,2.9,4.3,1.3,1 152 | -------------------------------------------------------------------------------- /格式处理数据集/iris.csv: -------------------------------------------------------------------------------- 1 | Sepal Length,Sepal Width,Petal Length,Petal Width,Species 2 | 6.4,2.8,5.6,2.2,virginica 3 | 5,2.3,3.3,1,versicolor 4 | 4.9,2.5,4.5,1.7,virginica 5 | 4.9,3.1,1.5,0.1,setosa 6 | 5.7,3.8,1.7,0.3,setosa 7 | 4.4,3.2,1.3,0.2,setosa 8 | 5.4,3.4,1.5,0.4,setosa 9 | 6.9,3.1,5.1,2.3,virginica 10 | 6.7,3.1,4.4,1.4,versicolor 11 | 5.1,3.7,1.5,0.4,setosa 12 | 5.2,2.7,3.9,1.4,versicolor 13 | 6.9,3.1,4.9,1.5,versicolor 14 | 5.8,4,1.2,0.2,setosa 15 | 5.4,3.9,1.7,0.4,setosa 16 | 7.7,3.8,6.7,2.2,virginica 17 | 6.3,3.3,4.7,1.6,versicolor 18 | 6.8,3.2,5.9,2.3,virginica 19 | 7.6,3,6.6,2.1,virginica 20 | 6.4,3.2,5.3,2.3,virginica 21 | 5.7,4.4,1.5,0.4,setosa 22 | 6.7,3.3,5.7,2.1,virginica 23 | 6.4,2.8,5.6,2.1,virginica 24 | 5.4,3.9,1.3,0.4,setosa 25 | 6.1,2.6,5.6,1.4,virginica 26 | 7.2,3,5.8,1.6,virginica 27 | 5.2,3.5,1.5,0.2,setosa 28 | 5.8,2.6,4,1.2,versicolor 29 | 5.9,3,5.1,1.8,virginica 30 | 5.4,3,4.5,1.5,versicolor 31 | 6.7,3,5,1.7,versicolor 32 | 6.3,2.3,4.4,1.3,versicolor 33 | 5.1,2.5,3,1.1,versicolor 34 | 6.4,3.2,4.5,1.5,versicolor 35 | 6.8,3,5.5,2.1,virginica 36 | 6.2,2.8,4.8,1.8,virginica 37 | 6.9,3.2,5.7,2.3,virginica 38 | 6.5,3.2,5.1,2,virginica 39 | 5.8,2.8,5.1,2.4,virginica 40 | 5.1,3.8,1.5,0.3,setosa 41 | 4.8,3,1.4,0.3,setosa 42 | 7.9,3.8,6.4,2,virginica 43 | 5.8,2.7,5.1,1.9,virginica 44 | 6.7,3,5.2,2.3,virginica 45 | 5.1,3.8,1.9,0.4,setosa 46 | 4.7,3.2,1.6,0.2,setosa 47 | 6,2.2,5,1.5,virginica 48 | 4.8,3.4,1.6,0.2,setosa 49 | 7.7,2.6,6.9,2.3,virginica 50 | 4.6,3.6,1,0.2,setosa 51 | 7.2,3.2,6,1.8,virginica 52 | 5,3.3,1.4,0.2,setosa 53 | 6.6,3,4.4,1.4,versicolor 54 | 6.1,2.8,4,1.3,versicolor 55 | 5,3.2,1.2,0.2,setosa 56 | 7,3.2,4.7,1.4,versicolor 57 | 6,3,4.8,1.8,virginica 58 | 7.4,2.8,6.1,1.9,virginica 59 | 5.8,2.7,5.1,1.9,virginica 60 | 6.2,3.4,5.4,2.3,virginica 61 | 5,2,3.5,1,versicolor 62 | 5.6,2.5,3.9,1.1,versicolor 63 | 6.7,3.1,5.6,2.4,virginica 64 | 6.3,2.5,5,1.9,virginica 65 | 6.4,3.1,5.5,1.8,virginica 66 | 6.2,2.2,4.5,1.5,versicolor 67 | 7.3,2.9,6.3,1.8,virginica 68 | 4.4,3,1.3,0.2,setosa 69 | 7.2,3.6,6.1,2.5,virginica 70 | 6.5,3,5.5,1.8,virginica 71 | 5,3.4,1.5,0.2,setosa 72 | 4.7,3.2,1.3,0.2,setosa 73 | 6.6,2.9,4.6,1.3,versicolor 74 | 5.5,3.5,1.3,0.2,setosa 75 | 7.7,3,6.1,2.3,virginica 76 | 6.1,3,4.9,1.8,virginica 77 | 4.9,3.1,1.5,0.1,setosa 78 | 5.5,2.4,3.8,1.1,versicolor 79 | 5.7,2.9,4.2,1.3,versicolor 80 | 6,2.9,4.5,1.5,versicolor 81 | 6.4,2.7,5.3,1.9,virginica 82 | 5.4,3.7,1.5,0.2,setosa 83 | 6.1,2.9,4.7,1.4,versicolor 84 | 6.5,2.8,4.6,1.5,versicolor 85 | 5.6,2.7,4.2,1.3,versicolor 86 | 6.3,3.4,5.6,2.4,virginica 87 | 4.9,3.1,1.5,0.1,setosa 88 | 6.8,2.8,4.8,1.4,versicolor 89 | 5.7,2.8,4.5,1.3,versicolor 90 | 6,2.7,5.1,1.6,versicolor 91 | 5,3.5,1.3,0.3,setosa 92 | 6.5,3,5.2,2,virginica 93 | 6.1,2.8,4.7,1.2,versicolor 94 | 5.1,3.5,1.4,0.3,setosa 95 | 4.6,3.1,1.5,0.2,setosa 96 | 6.5,3,5.8,2.2,virginica 97 | 4.6,3.4,1.4,0.3,setosa 98 | 4.6,3.2,1.4,0.2,setosa 99 | 7.7,2.8,6.7,2,virginica 100 | 5.9,3.2,4.8,1.8,versicolor 101 | 5.1,3.8,1.6,0.2,setosa 102 | 4.9,3,1.4,0.2,setosa 103 | 4.9,2.4,3.3,1,versicolor 104 | 4.5,2.3,1.3,0.3,setosa 105 | 5.8,2.7,4.1,1,versicolor 106 | 5,3.4,1.6,0.4,setosa 107 | 5.2,3.4,1.4,0.2,setosa 108 | 5.3,3.7,1.5,0.2,setosa 109 | 5,3.6,1.4,0.2,setosa 110 | 5.6,2.9,3.6,1.3,versicolor 111 | 4.8,3.1,1.6,0.2,setosa 112 | 6.3,2.7,4.9,1.8,virginica 113 | 5.7,2.8,4.1,1.3,versicolor 114 | 5,3,1.6,0.2,setosa 115 | 6.3,3.3,6,2.5,virginica 116 | 5,3.5,1.6,0.6,setosa 117 | 5.5,2.6,4.4,1.2,versicolor 118 | 5.7,3,4.2,1.2,versicolor 119 | 4.4,2.9,1.4,0.2,setosa 120 | 4.8,3,1.4,0.1,setosa 121 | 5.5,2.4,3.7,1,versicolor 122 | 5.9,3,4.2,1.5,versicolor 123 | 6.9,3.1,5.4,2.1,virginica 124 | 5.1,3.3,1.7,0.5,setosa 125 | 6,3.4,4.5,1.6,versicolor 126 | 5.5,2.5,4,1.3,versicolor 127 | 6.2,2.9,4.3,1.3,versicolor 128 | 5.5,4.2,1.4,0.2,setosa 129 | 6.3,2.8,5.1,1.5,virginica 130 | 5.6,3,4.1,1.3,versicolor 131 | 6.7,2.5,5.8,1.8,virginica 132 | 7.1,3,5.9,2.1,virginica 133 | 4.3,3,1.1,0.1,setosa 134 | 5.6,2.8,4.9,2,virginica 135 | 5.5,2.3,4,1.3,versicolor 136 | 6,2.2,4,1,versicolor 137 | 5.1,3.5,1.4,0.2,setosa 138 | 5.7,2.6,3.5,1,versicolor 139 | 4.8,3.4,1.9,0.2,setosa 140 | 5.1,3.4,1.5,0.2,setosa 141 | 5.7,2.5,5,2,virginica 142 | 5.4,3.4,1.7,0.2,setosa 143 | 5.6,3,4.5,1.5,versicolor 144 | 6.3,2.9,5.6,1.8,virginica 145 | 6.3,2.5,4.9,1.5,versicolor 146 | 5.8,2.7,3.9,1.2,versicolor 147 | 6.1,3,4.6,1.4,versicolor 148 | 5.2,4.1,1.5,0.1,setosa 149 | 6.7,3.1,4.7,1.5,versicolor 150 | 6.7,3.3,5.7,2.5,virginica 151 | 6.4,2.9,4.3,1.3,versicolor 152 | --------------------------------------------------------------------------------