├── KNN数据集
    └── iris.csv
├── README.md
├── bpnn_V1数据集
    ├── iris_test.csv
    └── iris_training.csv
├── bpnn_V2数据集
    ├── iris_test.csv
    └── iris_training.csv
├── iris_data_classification_bpnn_V1.py
├── iris_data_classification_bpnn_V2.py
├── iris_data_classification_knn.py
├── iris_data_cluster_sklearn.py
├── iris_data_decision_tree_sklearn.py
├── iris_data_knn_sklearn.py
├── sklearn数据集
    └── iris.csv
├── 原始数据集
    └── iris.csv
└── 格式处理数据集
    └── iris.csv


/KNN数据集/iris.csv:
--------------------------------------------------------------------------------
  1 | 150,4,setosa,versicolor,virginica
  2 | 6.4,2.8,5.6,2.2,2
  3 | 5,2.3,3.3,1,1
  4 | 4.9,2.5,4.5,1.7,2
  5 | 4.9,3.1,1.5,0.1,0
  6 | 5.7,3.8,1.7,0.3,0
  7 | 4.4,3.2,1.3,0.2,0
  8 | 5.4,3.4,1.5,0.4,0
  9 | 6.9,3.1,5.1,2.3,2
 10 | 6.7,3.1,4.4,1.4,1
 11 | 5.1,3.7,1.5,0.4,0
 12 | 5.2,2.7,3.9,1.4,1
 13 | 6.9,3.1,4.9,1.5,1
 14 | 5.8,4,1.2,0.2,0
 15 | 5.4,3.9,1.7,0.4,0
 16 | 7.7,3.8,6.7,2.2,2
 17 | 6.3,3.3,4.7,1.6,1
 18 | 6.8,3.2,5.9,2.3,2
 19 | 7.6,3,6.6,2.1,2
 20 | 6.4,3.2,5.3,2.3,2
 21 | 5.7,4.4,1.5,0.4,0
 22 | 6.7,3.3,5.7,2.1,2
 23 | 6.4,2.8,5.6,2.1,2
 24 | 5.4,3.9,1.3,0.4,0
 25 | 6.1,2.6,5.6,1.4,2
 26 | 7.2,3,5.8,1.6,2
 27 | 5.2,3.5,1.5,0.2,0
 28 | 5.8,2.6,4,1.2,1
 29 | 5.9,3,5.1,1.8,2
 30 | 5.4,3,4.5,1.5,1
 31 | 6.7,3,5,1.7,1
 32 | 6.3,2.3,4.4,1.3,1
 33 | 5.1,2.5,3,1.1,1
 34 | 6.4,3.2,4.5,1.5,1
 35 | 6.8,3,5.5,2.1,2
 36 | 6.2,2.8,4.8,1.8,2
 37 | 6.9,3.2,5.7,2.3,2
 38 | 6.5,3.2,5.1,2,2
 39 | 5.8,2.8,5.1,2.4,2
 40 | 5.1,3.8,1.5,0.3,0
 41 | 4.8,3,1.4,0.3,0
 42 | 7.9,3.8,6.4,2,2
 43 | 5.8,2.7,5.1,1.9,2
 44 | 6.7,3,5.2,2.3,2
 45 | 5.1,3.8,1.9,0.4,0
 46 | 4.7,3.2,1.6,0.2,0
 47 | 6,2.2,5,1.5,2
 48 | 4.8,3.4,1.6,0.2,0
 49 | 7.7,2.6,6.9,2.3,2
 50 | 4.6,3.6,1,0.2,0
 51 | 7.2,3.2,6,1.8,2
 52 | 5,3.3,1.4,0.2,0
 53 | 6.6,3,4.4,1.4,1
 54 | 6.1,2.8,4,1.3,1
 55 | 5,3.2,1.2,0.2,0
 56 | 7,3.2,4.7,1.4,1
 57 | 6,3,4.8,1.8,2
 58 | 7.4,2.8,6.1,1.9,2
 59 | 5.8,2.7,5.1,1.9,2
 60 | 6.2,3.4,5.4,2.3,2
 61 | 5,2,3.5,1,1
 62 | 5.6,2.5,3.9,1.1,1
 63 | 6.7,3.1,5.6,2.4,2
 64 | 6.3,2.5,5,1.9,2
 65 | 6.4,3.1,5.5,1.8,2
 66 | 6.2,2.2,4.5,1.5,1
 67 | 7.3,2.9,6.3,1.8,2
 68 | 4.4,3,1.3,0.2,0
 69 | 7.2,3.6,6.1,2.5,2
 70 | 6.5,3,5.5,1.8,2
 71 | 5,3.4,1.5,0.2,0
 72 | 4.7,3.2,1.3,0.2,0
 73 | 6.6,2.9,4.6,1.3,1
 74 | 5.5,3.5,1.3,0.2,0
 75 | 7.7,3,6.1,2.3,2
 76 | 6.1,3,4.9,1.8,2
 77 | 4.9,3.1,1.5,0.1,0
 78 | 5.5,2.4,3.8,1.1,1
 79 | 5.7,2.9,4.2,1.3,1
 80 | 6,2.9,4.5,1.5,1
 81 | 6.4,2.7,5.3,1.9,2
 82 | 5.4,3.7,1.5,0.2,0
 83 | 6.1,2.9,4.7,1.4,1
 84 | 6.5,2.8,4.6,1.5,1
 85 | 5.6,2.7,4.2,1.3,1
 86 | 6.3,3.4,5.6,2.4,2
 87 | 4.9,3.1,1.5,0.1,0
 88 | 6.8,2.8,4.8,1.4,1
 89 | 5.7,2.8,4.5,1.3,1
 90 | 6,2.7,5.1,1.6,1
 91 | 5,3.5,1.3,0.3,0
 92 | 6.5,3,5.2,2,2
 93 | 6.1,2.8,4.7,1.2,1
 94 | 5.1,3.5,1.4,0.3,0
 95 | 4.6,3.1,1.5,0.2,0
 96 | 6.5,3,5.8,2.2,2
 97 | 4.6,3.4,1.4,0.3,0
 98 | 4.6,3.2,1.4,0.2,0
 99 | 7.7,2.8,6.7,2,2
100 | 5.9,3.2,4.8,1.8,1
101 | 5.1,3.8,1.6,0.2,0
102 | 4.9,3,1.4,0.2,0
103 | 4.9,2.4,3.3,1,1
104 | 4.5,2.3,1.3,0.3,0
105 | 5.8,2.7,4.1,1,1
106 | 5,3.4,1.6,0.4,0
107 | 5.2,3.4,1.4,0.2,0
108 | 5.3,3.7,1.5,0.2,0
109 | 5,3.6,1.4,0.2,0
110 | 5.6,2.9,3.6,1.3,1
111 | 4.8,3.1,1.6,0.2,0
112 | 6.3,2.7,4.9,1.8,2
113 | 5.7,2.8,4.1,1.3,1
114 | 5,3,1.6,0.2,0
115 | 6.3,3.3,6,2.5,2
116 | 5,3.5,1.6,0.6,0
117 | 5.5,2.6,4.4,1.2,1
118 | 5.7,3,4.2,1.2,1
119 | 4.4,2.9,1.4,0.2,0
120 | 4.8,3,1.4,0.1,0
121 | 5.5,2.4,3.7,1,1
122 | 5.9,3,4.2,1.5,1
123 | 6.9,3.1,5.4,2.1,2
124 | 5.1,3.3,1.7,0.5,0
125 | 6,3.4,4.5,1.6,1
126 | 5.5,2.5,4,1.3,1
127 | 6.2,2.9,4.3,1.3,1
128 | 5.5,4.2,1.4,0.2,0
129 | 6.3,2.8,5.1,1.5,2
130 | 5.6,3,4.1,1.3,1
131 | 6.7,2.5,5.8,1.8,2
132 | 7.1,3,5.9,2.1,2
133 | 4.3,3,1.1,0.1,0
134 | 5.6,2.8,4.9,2,2
135 | 5.5,2.3,4,1.3,1
136 | 6,2.2,4,1,1
137 | 5.1,3.5,1.4,0.2,0
138 | 5.7,2.6,3.5,1,1
139 | 4.8,3.4,1.9,0.2,0
140 | 5.1,3.4,1.5,0.2,0
141 | 5.7,2.5,5,2,2
142 | 5.4,3.4,1.7,0.2,0
143 | 5.6,3,4.5,1.5,1
144 | 6.3,2.9,5.6,1.8,2
145 | 6.3,2.5,4.9,1.5,1
146 | 5.8,2.7,3.9,1.2,1
147 | 6.1,3,4.6,1.4,1
148 | 5.2,4.1,1.5,0.1,0
149 | 6.7,3.1,4.7,1.5,1
150 | 6.7,3.3,5.7,2.5,2
151 | 6.4,2.9,4.3,1.3,1
152 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # iris_classification_BPNeuralNetwork
  2 | 
  3 | > 本文用Python实现了BP神经网络分类算法，根据鸢尾花的4个特征，实现3种鸢尾花的分类。
  4 | > 算法参考文章：[纯Python实现鸢尾属植物数据集神经网络模型](https://yq.aliyun.com/articles/614411?utm_content=m_1000007130#)
  5 | 
  6 | iris_data_classification_bpnn_V1.py 需使用 **bpnn_V1数据集** 文件夹中的数据
  7 | 
  8 | iris_data_classification_bpnn_V2.py 需使用 **bpnn_V2数据集** 文件夹中的数据
  9 | 
 10 | iris_data_classification_knn.py     需使用 **原始数据集**    文件夹中的数据
 11 | 
 12 | iris_data_cluster_sklearn.py        需使用 **sklearn数据集** 文件夹中的数据
 13 | 
 14 | 不同数据集里数据都是一样的，只是为了程序使用方便而做了一些格式的变动。
 15 | 
 16 | ---
 17 | 
 18 | **2020.07.21更新：** 增加了分类结果可视化result_visualization。
 19 | 
 20 | **2020.07.09更新：** 完善代码中取数据部分的操作。
 21 | 
 22 | ---
 23 | 
 24 | ## 1.数据准备
 25 | 鸢尾花数据集包含4种特征，萼片长度（Sepal Length）、萼片宽度（Sepal Width）、花瓣长度（Petal Length）和花瓣宽度（Petal Width），以及3种鸢尾花Versicolor、Virginica和Setosa。
 26 | 
 27 | 数据集共151行，5列：
 28 |  - 第1行是数据说明，“150”表示共150条数据；“4”表示特征数；“setosa、versicolor、virginica”是三类花的名字
 29 |  - 第2行至第151行是150条数据
 30 |  - 第1至4列是Sepal Length、Sepal Width、Petal Length、Petal
 31 |    Width 4个特征
 32 |  - 第5列是花的类别，用0、1、2表示
 33 | ![iris数据集](https://img-blog.csdnimg.cn/20191227134604250.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L21pY2hhZWxfZjIwMDg=,size_16,color_FFFFFF,t_70)为方便起见，需要对数据集稍作处理：
 34 | 
 35 |  1. 将150条数据分隔为两个文件，前120条另存为`iris_training.csv`，即训练集；后30条另存为`iris_test.csv`，即测试集；
 36 |  2. 训练集和测试集都删去第1行；
 37 |  3. 训练集和测试集都删去原来的最后1列，并新增加3列，目的是用3列来表示鸢尾花的分类：如果原来最后一列是0，则新增加的3列为(0,0,0);如果原来最后一列是1，则新增加的3列为(0,1,0);如果原来最后一列是2，则新增加的3列为(0,0,1)。
 38 | ![iris训练集](https://img-blog.csdnimg.cn/20191227141640748.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L21pY2hhZWxfZjIwMDg=,size_16,color_FFFFFF,t_70)
 39 | ## 2.算法实现
 40 | [纯Python实现鸢尾属植物数据集神经网络模型](https://yq.aliyun.com/articles/614411?utm_content=m_1000007130#) 这篇文章中讲解得更为详细。本人对代码做了略微的修改，并增加了评估模型准确率的`predict()`函数。
 41 | 
 42 | **以下代码对应的是iris_data_classification_bpnn_V2.py文件**
 43 | 
 44 | ```python
 45 | import pandas as pd
 46 | import numpy as np
 47 | import datetime
 48 | import matplotlib.pyplot as plt
 49 | from pandas.plotting import radviz
 50 | '''
 51 |     构建一个具有1个隐藏层的神经网络，隐层的大小为10
 52 |     输入层为4个特征，输出层为3个分类
 53 |     (1,0,0)为第一类，(0,1,0)为第二类，(0,0,1)为第三类
 54 | '''
 55 | 
 56 | 
 57 | # 1.初始化参数
 58 | def initialize_parameters(n_x, n_h, n_y):
 59 |     np.random.seed(2)
 60 | 
 61 |     # 权重和偏置矩阵
 62 |     w1 = np.random.randn(n_h, n_x) * 0.01
 63 |     b1 = np.zeros(shape=(n_h, 1))
 64 |     w2 = np.random.randn(n_y, n_h) * 0.01
 65 |     b2 = np.zeros(shape=(n_y, 1))
 66 | 
 67 |     # 通过字典存储参数
 68 |     parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
 69 | 
 70 |     return parameters
 71 | 
 72 | 
 73 | # 2.前向传播
 74 | def forward_propagation(X, parameters):
 75 |     w1 = parameters['w1']
 76 |     b1 = parameters['b1']
 77 |     w2 = parameters['w2']
 78 |     b2 = parameters['b2']
 79 | 
 80 |     # 通过前向传播来计算a2
 81 |     z1 = np.dot(w1, X) + b1     # 这个地方需注意矩阵加法：虽然(w1*X)和b1的维度不同，但可以相加
 82 |     a1 = np.tanh(z1)            # 使用tanh作为第一层的激活函数
 83 |     z2 = np.dot(w2, a1) + b2
 84 |     a2 = 1 / (1 + np.exp(-z2))  # 使用sigmoid作为第二层的激活函数
 85 | 
 86 |     # 通过字典存储参数
 87 |     cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}
 88 | 
 89 |     return a2, cache
 90 | 
 91 | 
 92 | # 3.计算代价函数
 93 | def compute_cost(a2, Y, parameters):
 94 |     m = Y.shape[1]      # Y的列数即为总的样本数
 95 | 
 96 |     # 采用交叉熵（cross-entropy）作为代价函数
 97 |     logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2))
 98 |     cost = - np.sum(logprobs) / m
 99 | 
100 |     return cost
101 | 
102 | 
103 | # 4.反向传播（计算代价函数的导数）
104 | def backward_propagation(parameters, cache, X, Y):
105 |     m = Y.shape[1]
106 | 
107 |     w2 = parameters['w2']
108 | 
109 |     a1 = cache['a1']
110 |     a2 = cache['a2']
111 | 
112 |     # 反向传播，计算dw1、db1、dw2、db2
113 |     dz2 = a2 - Y
114 |     dw2 = (1 / m) * np.dot(dz2, a1.T)
115 |     db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
116 |     dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2))
117 |     dw1 = (1 / m) * np.dot(dz1, X.T)
118 |     db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)
119 | 
120 |     grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2}
121 | 
122 |     return grads
123 | 
124 | 
125 | # 5.更新参数
126 | def update_parameters(parameters, grads, learning_rate=0.4):
127 |     w1 = parameters['w1']
128 |     b1 = parameters['b1']
129 |     w2 = parameters['w2']
130 |     b2 = parameters['b2']
131 | 
132 |     dw1 = grads['dw1']
133 |     db1 = grads['db1']
134 |     dw2 = grads['dw2']
135 |     db2 = grads['db2']
136 | 
137 |     # 更新参数
138 |     w1 = w1 - dw1 * learning_rate
139 |     b1 = b1 - db1 * learning_rate
140 |     w2 = w2 - dw2 * learning_rate
141 |     b2 = b2 - db2 * learning_rate
142 | 
143 |     parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
144 | 
145 |     return parameters
146 | 
147 | 
148 | # 6.模型评估
149 | def predict(parameters, x_test, y_test):
150 |     w1 = parameters['w1']
151 |     b1 = parameters['b1']
152 |     w2 = parameters['w2']
153 |     b2 = parameters['b2']
154 | 
155 |     z1 = np.dot(w1, x_test) + b1
156 |     a1 = np.tanh(z1)
157 |     z2 = np.dot(w2, a1) + b2
158 |     a2 = 1 / (1 + np.exp(-z2))
159 | 
160 |     # 结果的维度
161 |     n_rows = y_test.shape[0]
162 |     n_cols = y_test.shape[1]
163 | 
164 |     # 预测值结果存储
165 |     output = np.empty(shape=(n_rows, n_cols), dtype=int)
166 | 
167 |     for i in range(n_rows):
168 |         for j in range(n_cols):
169 |             if a2[i][j] > 0.5:
170 |                 output[i][j] = 1
171 |             else:
172 |                 output[i][j] = 0
173 | 
174 |     print('预测结果：')
175 |     print(output)
176 |     print('真实结果：')
177 |     print(y_test)
178 | 
179 |     count = 0
180 |     for k in range(0, n_cols):
181 |         if output[0][k] == y_test[0][k] and output[1][k] == y_test[1][k] and output[2][k] == y_test[2][k]:
182 |             count = count + 1
183 |         else:
184 |             print(k)
185 | 
186 |     acc = count / int(y_test.shape[1]) * 100
187 |     print('准确率：%.2f%%' % acc)
188 | 
189 |     return output
190 | 
191 | 
192 | # 建立神经网络
193 | def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False):
194 |     np.random.seed(3)
195 | 
196 |     n_x = n_input           # 输入层节点数
197 |     n_y = n_output          # 输出层节点数
198 | 
199 |     # 1.初始化参数
200 |     parameters = initialize_parameters(n_x, n_h, n_y)
201 | 
202 |     # 梯度下降循环
203 |     for i in range(0, num_iterations):
204 |         # 2.前向传播
205 |         a2, cache = forward_propagation(X, parameters)
206 |         # 3.计算代价函数
207 |         cost = compute_cost(a2, Y, parameters)
208 |         # 4.反向传播
209 |         grads = backward_propagation(parameters, cache, X, Y)
210 |         # 5.更新参数
211 |         parameters = update_parameters(parameters, grads)
212 | 
213 |         # 每1000次迭代，输出一次代价函数
214 |         if print_cost and i % 1000 == 0:
215 |             print('迭代第%i次，代价函数为：%f' % (i, cost))
216 | 
217 |     return parameters
218 | 
219 | 
220 | # 结果可视化
221 | # 特征有4个维度，类别有1个维度，一共5个维度，故采用了RadViz图
222 | def result_visualization(x_test, y_test, result):
223 |     cols = y_test.shape[1]
224 |     y = []
225 |     pre = []
226 | 
227 |     # 反转换类别的独热编码
228 |     for i in range(cols):
229 |         if y_test[0][i] == 0 and y_test[1][i] == 0 and y_test[2][i] == 1:
230 |             y.append('setosa')
231 |         elif y_test[0][i] == 0 and y_test[1][i] == 1 and y_test[2][i] == 0:
232 |             y.append('versicolor')
233 |         elif y_test[0][i] == 1 and y_test[1][i] == 0 and y_test[2][i] == 0:
234 |             y.append('virginica')
235 | 
236 |     for j in range(cols):
237 |         if result[0][j] == 0 and result[1][j] == 0 and result[2][j] == 1:
238 |             pre.append('setosa')
239 |         elif result[0][j] == 0 and result[1][j] == 1 and result[2][j] == 0:
240 |             pre.append('versicolor')
241 |         elif result[0][j] == 1 and result[1][j] == 0 and result[2][j] == 0:
242 |             pre.append('virginica')
243 |         else:
244 |             pre.append('unknown')
245 | 
246 |     # 将特征和类别矩阵拼接起来
247 |     real = np.column_stack((x_test.T, y))
248 |     prediction = np.column_stack((x_test.T, pre))
249 | 
250 |     # 转换成DataFrame类型，并添加columns
251 |     df_real = pd.DataFrame(real, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
252 |     df_prediction = pd.DataFrame(prediction, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
253 | 
254 |     # 将特征列转换为float类型，否则radviz会报错
255 |     df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
256 |     df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
257 | 
258 |     # 绘图
259 |     plt.figure('真实分类')
260 |     radviz(df_real, 'Species', color=['blue', 'green', 'red', 'yellow'])
261 |     plt.figure('预测分类')
262 |     radviz(df_prediction, 'Species', color=['blue', 'green', 'red', 'yellow'])
263 |     plt.show()
264 | 
265 | 
266 | if __name__ == "__main__":
267 |     # 读取数据
268 |     data_set = pd.read_csv('D:\\iris_training.csv', header=None)
269 | 
270 |     # 第1种取数据方法：
271 |     X = data_set.iloc[:, 0:4].values.T          # 前四列是特征，T表示转置
272 |     Y = data_set.iloc[:, 4:].values.T           # 后三列是标签
273 | 
274 |     # 第2种取数据方法：
275 |     # X = data_set.ix[:, 0:3].values.T
276 |     # Y = data_set.ix[:, 4:6].values.T
277 | 
278 |     # 第3种取数据方法：
279 |     # X = data_set.loc[:, 0:3].values.T
280 |     # Y = data_set.loc[:, 4:6].values.T
281 | 
282 |     # 第4种取数据方法：
283 |     # X = data_set[data_set.columns[0:4]].values.T
284 |     # Y = data_set[data_set.columns[4:7]].values.T
285 |     Y = Y.astype('uint8')
286 | 
287 |     # 开始训练
288 |     start_time = datetime.datetime.now()
289 |     # 输入4个节点，隐层10个节点，输出3个节点，迭代10000次
290 |     parameters = nn_model(X, Y, n_h=10, n_input=4, n_output=3, num_iterations=10000, print_cost=True)
291 |     end_time = datetime.datetime.now()
292 |     print("用时：" + str((end_time - start_time).seconds) + 's' + str(round((end_time - start_time).microseconds / 1000)) + 'ms')
293 | 
294 |     # 对模型进行测试
295 |     data_test = pd.read_csv('D:\\iris_test.csv', header=None)
296 |     x_test = data_test.iloc[:, 0:4].values.T
297 |     y_test = data_test.iloc[:, 4:].values.T
298 |     y_test = y_test.astype('uint8')
299 | 
300 |     result = predict(parameters, x_test, y_test)
301 | 
302 |     # 分类结果可视化
303 |     result_visualization(x_test, y_test, result)
304 | 
305 | ```
306 | 最终结果：
307 | ![结果](https://img-blog.csdnimg.cn/20191227152325990.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L21pY2hhZWxfZjIwMDg=,size_16,color_FFFFFF,t_70)
308 | 
309 | 分类的可视化效果，左侧为测试集的真实分类，右侧为模型的预测分类结果，采用的是**RadViz图**：
310 | ![分类可视化](https://img-blog.csdnimg.cn/20200721132114814.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L21pY2hhZWxfZjIwMDg=,size_16,color_FFFFFF,t_70#pic_center)
311 | 
312 | 每次运行时准确率可能都不一样，可以通过调整**学习率、隐节点数、迭代次数**等参数来改善模型的效果。
313 | 
314 | ## 3.总结
315 | 算法的实现总共分为6步：
316 | 
317 |  1. 初始化参数
318 |  2. 前向传播
319 |  3. 计算代价函数
320 |  4. 反向传播
321 |  5. 更新参数
322 |  6. 模型评估
323 | 
324 | 
325 | 


--------------------------------------------------------------------------------
/bpnn_V1数据集/iris_test.csv:
--------------------------------------------------------------------------------
 1 | SepalLength,SepalWidth,PetalLength,PetalWidth,species
 2 | 5.9,3,4.2,1.5,1
 3 | 6.9,3.1,5.4,2.1,2
 4 | 5.1,3.3,1.7,0.5,0
 5 | 6,3.4,4.5,1.6,1
 6 | 5.5,2.5,4,1.3,1
 7 | 6.2,2.9,4.3,1.3,1
 8 | 5.5,4.2,1.4,0.2,0
 9 | 6.3,2.8,5.1,1.5,2
10 | 5.6,3,4.1,1.3,1
11 | 6.7,2.5,5.8,1.8,2
12 | 7.1,3,5.9,2.1,2
13 | 4.3,3,1.1,0.1,0
14 | 5.6,2.8,4.9,2,2
15 | 5.5,2.3,4,1.3,1
16 | 6,2.2,4,1,1
17 | 5.1,3.5,1.4,0.2,0
18 | 5.7,2.6,3.5,1,1
19 | 4.8,3.4,1.9,0.2,0
20 | 5.1,3.4,1.5,0.2,0
21 | 5.7,2.5,5,2,2
22 | 5.4,3.4,1.7,0.2,0
23 | 5.6,3,4.5,1.5,1
24 | 6.3,2.9,5.6,1.8,2
25 | 6.3,2.5,4.9,1.5,1
26 | 5.8,2.7,3.9,1.2,1
27 | 6.1,3,4.6,1.4,1
28 | 5.2,4.1,1.5,0.1,0
29 | 6.7,3.1,4.7,1.5,1
30 | 6.7,3.3,5.7,2.5,2
31 | 6.4,2.9,4.3,1.3,1
32 | 


--------------------------------------------------------------------------------
/bpnn_V1数据集/iris_training.csv:
--------------------------------------------------------------------------------
  1 | SepalLength,SepalWidth,PetalLength,PetalWidth,species
  2 | 6.4,2.8,5.6,2.2,2
  3 | 5,2.3,3.3,1,1
  4 | 4.9,2.5,4.5,1.7,2
  5 | 4.9,3.1,1.5,0.1,0
  6 | 5.7,3.8,1.7,0.3,0
  7 | 4.4,3.2,1.3,0.2,0
  8 | 5.4,3.4,1.5,0.4,0
  9 | 6.9,3.1,5.1,2.3,2
 10 | 6.7,3.1,4.4,1.4,1
 11 | 5.1,3.7,1.5,0.4,0
 12 | 5.2,2.7,3.9,1.4,1
 13 | 6.9,3.1,4.9,1.5,1
 14 | 5.8,4,1.2,0.2,0
 15 | 5.4,3.9,1.7,0.4,0
 16 | 7.7,3.8,6.7,2.2,2
 17 | 6.3,3.3,4.7,1.6,1
 18 | 6.8,3.2,5.9,2.3,2
 19 | 7.6,3,6.6,2.1,2
 20 | 6.4,3.2,5.3,2.3,2
 21 | 5.7,4.4,1.5,0.4,0
 22 | 6.7,3.3,5.7,2.1,2
 23 | 6.4,2.8,5.6,2.1,2
 24 | 5.4,3.9,1.3,0.4,0
 25 | 6.1,2.6,5.6,1.4,2
 26 | 7.2,3,5.8,1.6,2
 27 | 5.2,3.5,1.5,0.2,0
 28 | 5.8,2.6,4,1.2,1
 29 | 5.9,3,5.1,1.8,2
 30 | 5.4,3,4.5,1.5,1
 31 | 6.7,3,5,1.7,1
 32 | 6.3,2.3,4.4,1.3,1
 33 | 5.1,2.5,3,1.1,1
 34 | 6.4,3.2,4.5,1.5,1
 35 | 6.8,3,5.5,2.1,2
 36 | 6.2,2.8,4.8,1.8,2
 37 | 6.9,3.2,5.7,2.3,2
 38 | 6.5,3.2,5.1,2,2
 39 | 5.8,2.8,5.1,2.4,2
 40 | 5.1,3.8,1.5,0.3,0
 41 | 4.8,3,1.4,0.3,0
 42 | 7.9,3.8,6.4,2,2
 43 | 5.8,2.7,5.1,1.9,2
 44 | 6.7,3,5.2,2.3,2
 45 | 5.1,3.8,1.9,0.4,0
 46 | 4.7,3.2,1.6,0.2,0
 47 | 6,2.2,5,1.5,2
 48 | 4.8,3.4,1.6,0.2,0
 49 | 7.7,2.6,6.9,2.3,2
 50 | 4.6,3.6,1,0.2,0
 51 | 7.2,3.2,6,1.8,2
 52 | 5,3.3,1.4,0.2,0
 53 | 6.6,3,4.4,1.4,1
 54 | 6.1,2.8,4,1.3,1
 55 | 5,3.2,1.2,0.2,0
 56 | 7,3.2,4.7,1.4,1
 57 | 6,3,4.8,1.8,2
 58 | 7.4,2.8,6.1,1.9,2
 59 | 5.8,2.7,5.1,1.9,2
 60 | 6.2,3.4,5.4,2.3,2
 61 | 5,2,3.5,1,1
 62 | 5.6,2.5,3.9,1.1,1
 63 | 6.7,3.1,5.6,2.4,2
 64 | 6.3,2.5,5,1.9,2
 65 | 6.4,3.1,5.5,1.8,2
 66 | 6.2,2.2,4.5,1.5,1
 67 | 7.3,2.9,6.3,1.8,2
 68 | 4.4,3,1.3,0.2,0
 69 | 7.2,3.6,6.1,2.5,2
 70 | 6.5,3,5.5,1.8,2
 71 | 5,3.4,1.5,0.2,0
 72 | 4.7,3.2,1.3,0.2,0
 73 | 6.6,2.9,4.6,1.3,1
 74 | 5.5,3.5,1.3,0.2,0
 75 | 7.7,3,6.1,2.3,2
 76 | 6.1,3,4.9,1.8,2
 77 | 4.9,3.1,1.5,0.1,0
 78 | 5.5,2.4,3.8,1.1,1
 79 | 5.7,2.9,4.2,1.3,1
 80 | 6,2.9,4.5,1.5,1
 81 | 6.4,2.7,5.3,1.9,2
 82 | 5.4,3.7,1.5,0.2,0
 83 | 6.1,2.9,4.7,1.4,1
 84 | 6.5,2.8,4.6,1.5,1
 85 | 5.6,2.7,4.2,1.3,1
 86 | 6.3,3.4,5.6,2.4,2
 87 | 4.9,3.1,1.5,0.1,0
 88 | 6.8,2.8,4.8,1.4,1
 89 | 5.7,2.8,4.5,1.3,1
 90 | 6,2.7,5.1,1.6,1
 91 | 5,3.5,1.3,0.3,0
 92 | 6.5,3,5.2,2,2
 93 | 6.1,2.8,4.7,1.2,1
 94 | 5.1,3.5,1.4,0.3,0
 95 | 4.6,3.1,1.5,0.2,0
 96 | 6.5,3,5.8,2.2,2
 97 | 4.6,3.4,1.4,0.3,0
 98 | 4.6,3.2,1.4,0.2,0
 99 | 7.7,2.8,6.7,2,2
100 | 5.9,3.2,4.8,1.8,1
101 | 5.1,3.8,1.6,0.2,0
102 | 4.9,3,1.4,0.2,0
103 | 4.9,2.4,3.3,1,1
104 | 4.5,2.3,1.3,0.3,0
105 | 5.8,2.7,4.1,1,1
106 | 5,3.4,1.6,0.4,0
107 | 5.2,3.4,1.4,0.2,0
108 | 5.3,3.7,1.5,0.2,0
109 | 5,3.6,1.4,0.2,0
110 | 5.6,2.9,3.6,1.3,1
111 | 4.8,3.1,1.6,0.2,0
112 | 6.3,2.7,4.9,1.8,2
113 | 5.7,2.8,4.1,1.3,1
114 | 5,3,1.6,0.2,0
115 | 6.3,3.3,6,2.5,2
116 | 5,3.5,1.6,0.6,0
117 | 5.5,2.6,4.4,1.2,1
118 | 5.7,3,4.2,1.2,1
119 | 4.4,2.9,1.4,0.2,0
120 | 4.8,3,1.4,0.1,0
121 | 5.5,2.4,3.7,1,1
122 | 


--------------------------------------------------------------------------------
/bpnn_V2数据集/iris_test.csv:
--------------------------------------------------------------------------------
 1 | 5.9,3,4.2,1.5,0,1,0
 2 | 6.9,3.1,5.4,2.1,0,0,1
 3 | 5.1,3.3,1.7,0.5,1,0,0
 4 | 6,3.4,4.5,1.6,0,1,0
 5 | 5.5,2.5,4,1.3,0,1,0
 6 | 6.2,2.9,4.3,1.3,0,1,0
 7 | 5.5,4.2,1.4,0.2,1,0,0
 8 | 6.3,2.8,5.1,1.5,0,0,1
 9 | 5.6,3,4.1,1.3,0,1,0
10 | 6.7,2.5,5.8,1.8,0,0,1
11 | 7.1,3,5.9,2.1,0,0,1
12 | 4.3,3,1.1,0.1,1,0,0
13 | 5.6,2.8,4.9,2,0,0,1
14 | 5.5,2.3,4,1.3,0,1,0
15 | 6,2.2,4,1,0,1,0
16 | 5.1,3.5,1.4,0.2,1,0,0
17 | 5.7,2.6,3.5,1,0,1,0
18 | 4.8,3.4,1.9,0.2,1,0,0
19 | 5.1,3.4,1.5,0.2,1,0,0
20 | 5.7,2.5,5,2,0,0,1
21 | 5.4,3.4,1.7,0.2,1,0,0
22 | 5.6,3,4.5,1.5,0,1,0
23 | 6.3,2.9,5.6,1.8,0,0,1
24 | 6.3,2.5,4.9,1.5,0,1,0
25 | 5.8,2.7,3.9,1.2,0,1,0
26 | 6.1,3,4.6,1.4,0,1,0
27 | 5.2,4.1,1.5,0.1,1,0,0
28 | 6.7,3.1,4.7,1.5,0,1,0
29 | 6.7,3.3,5.7,2.5,0,0,1
30 | 6.4,2.9,4.3,1.3,0,1,0
31 | 


--------------------------------------------------------------------------------
/bpnn_V2数据集/iris_training.csv:
--------------------------------------------------------------------------------
  1 | 6.4,2.8,5.6,2.2,0,0,1
  2 | 5,2.3,3.3,1,0,1,0
  3 | 4.9,2.5,4.5,1.7,0,0,1
  4 | 4.9,3.1,1.5,0.1,1,0,0
  5 | 5.7,3.8,1.7,0.3,1,0,0
  6 | 4.4,3.2,1.3,0.2,1,0,0
  7 | 5.4,3.4,1.5,0.4,1,0,0
  8 | 6.9,3.1,5.1,2.3,0,0,1
  9 | 6.7,3.1,4.4,1.4,0,1,0
 10 | 5.1,3.7,1.5,0.4,1,0,0
 11 | 5.2,2.7,3.9,1.4,0,1,0
 12 | 6.9,3.1,4.9,1.5,0,1,0
 13 | 5.8,4,1.2,0.2,1,0,0
 14 | 5.4,3.9,1.7,0.4,1,0,0
 15 | 7.7,3.8,6.7,2.2,0,0,1
 16 | 6.3,3.3,4.7,1.6,0,1,0
 17 | 6.8,3.2,5.9,2.3,0,0,1
 18 | 7.6,3,6.6,2.1,0,0,1
 19 | 6.4,3.2,5.3,2.3,0,0,1
 20 | 5.7,4.4,1.5,0.4,1,0,0
 21 | 6.7,3.3,5.7,2.1,0,0,1
 22 | 6.4,2.8,5.6,2.1,0,0,1
 23 | 5.4,3.9,1.3,0.4,1,0,0
 24 | 6.1,2.6,5.6,1.4,0,0,1
 25 | 7.2,3,5.8,1.6,0,0,1
 26 | 5.2,3.5,1.5,0.2,1,0,0
 27 | 5.8,2.6,4,1.2,0,1,0
 28 | 5.9,3,5.1,1.8,0,0,1
 29 | 5.4,3,4.5,1.5,0,1,0
 30 | 6.7,3,5,1.7,0,1,0
 31 | 6.3,2.3,4.4,1.3,0,1,0
 32 | 5.1,2.5,3,1.1,0,1,0
 33 | 6.4,3.2,4.5,1.5,0,1,0
 34 | 6.8,3,5.5,2.1,0,0,1
 35 | 6.2,2.8,4.8,1.8,0,0,1
 36 | 6.9,3.2,5.7,2.3,0,0,1
 37 | 6.5,3.2,5.1,2,0,0,1
 38 | 5.8,2.8,5.1,2.4,0,0,1
 39 | 5.1,3.8,1.5,0.3,1,0,0
 40 | 4.8,3,1.4,0.3,1,0,0
 41 | 7.9,3.8,6.4,2,0,0,1
 42 | 5.8,2.7,5.1,1.9,0,0,1
 43 | 6.7,3,5.2,2.3,0,0,1
 44 | 5.1,3.8,1.9,0.4,1,0,0
 45 | 4.7,3.2,1.6,0.2,1,0,0
 46 | 6,2.2,5,1.5,0,0,1
 47 | 4.8,3.4,1.6,0.2,1,0,0
 48 | 7.7,2.6,6.9,2.3,0,0,1
 49 | 4.6,3.6,1,0.2,1,0,0
 50 | 7.2,3.2,6,1.8,0,0,1
 51 | 5,3.3,1.4,0.2,1,0,0
 52 | 6.6,3,4.4,1.4,0,1,0
 53 | 6.1,2.8,4,1.3,0,1,0
 54 | 5,3.2,1.2,0.2,1,0,0
 55 | 7,3.2,4.7,1.4,0,1,0
 56 | 6,3,4.8,1.8,0,0,1
 57 | 7.4,2.8,6.1,1.9,0,0,1
 58 | 5.8,2.7,5.1,1.9,0,0,1
 59 | 6.2,3.4,5.4,2.3,0,0,1
 60 | 5,2,3.5,1,0,1,0
 61 | 5.6,2.5,3.9,1.1,0,1,0
 62 | 6.7,3.1,5.6,2.4,0,0,1
 63 | 6.3,2.5,5,1.9,0,0,1
 64 | 6.4,3.1,5.5,1.8,0,0,1
 65 | 6.2,2.2,4.5,1.5,0,1,0
 66 | 7.3,2.9,6.3,1.8,0,0,1
 67 | 4.4,3,1.3,0.2,1,0,0
 68 | 7.2,3.6,6.1,2.5,0,0,1
 69 | 6.5,3,5.5,1.8,0,0,1
 70 | 5,3.4,1.5,0.2,1,0,0
 71 | 4.7,3.2,1.3,0.2,1,0,0
 72 | 6.6,2.9,4.6,1.3,0,1,0
 73 | 5.5,3.5,1.3,0.2,1,0,0
 74 | 7.7,3,6.1,2.3,0,0,1
 75 | 6.1,3,4.9,1.8,0,0,1
 76 | 4.9,3.1,1.5,0.1,0,0,0
 77 | 5.5,2.4,3.8,1.1,0,1,0
 78 | 5.7,2.9,4.2,1.3,0,1,0
 79 | 6,2.9,4.5,1.5,0,1,0
 80 | 6.4,2.7,5.3,1.9,0,0,1
 81 | 5.4,3.7,1.5,0.2,1,0,0
 82 | 6.1,2.9,4.7,1.4,0,1,0
 83 | 6.5,2.8,4.6,1.5,0,1,0
 84 | 5.6,2.7,4.2,1.3,0,1,0
 85 | 6.3,3.4,5.6,2.4,0,0,1
 86 | 4.9,3.1,1.5,0.1,1,0,0
 87 | 6.8,2.8,4.8,1.4,0,1,0
 88 | 5.7,2.8,4.5,1.3,0,1,0
 89 | 6,2.7,5.1,1.6,0,1,0
 90 | 5,3.5,1.3,0.3,1,0,0
 91 | 6.5,3,5.2,2,0,0,1
 92 | 6.1,2.8,4.7,1.2,0,1,0
 93 | 5.1,3.5,1.4,0.3,1,0,0
 94 | 4.6,3.1,1.5,0.2,1,0,0
 95 | 6.5,3,5.8,2.2,0,0,1
 96 | 4.6,3.4,1.4,0.3,1,0,0
 97 | 4.6,3.2,1.4,0.2,1,0,0
 98 | 7.7,2.8,6.7,2,0,0,1
 99 | 5.9,3.2,4.8,1.8,0,1,0
100 | 5.1,3.8,1.6,0.2,1,0,0
101 | 4.9,3,1.4,0.2,1,0,0
102 | 4.9,2.4,3.3,1,0,1,0
103 | 4.5,2.3,1.3,0.3,1,0,0
104 | 5.8,2.7,4.1,1,0,1,0
105 | 5,3.4,1.6,0.4,1,0,0
106 | 5.2,3.4,1.4,0.2,1,0,0
107 | 5.3,3.7,1.5,0.2,1,0,0
108 | 5,3.6,1.4,0.2,1,0,0
109 | 5.6,2.9,3.6,1.3,0,1,0
110 | 4.8,3.1,1.6,0.2,1,0,0
111 | 6.3,2.7,4.9,1.8,0,0,1
112 | 5.7,2.8,4.1,1.3,0,1,0
113 | 5,3,1.6,0.2,1,0,0
114 | 6.3,3.3,6,2.5,0,0,1
115 | 5,3.5,1.6,0.6,1,0,0
116 | 5.5,2.6,4.4,1.2,0,1,0
117 | 5.7,3,4.2,1.2,0,1,0
118 | 4.4,2.9,1.4,0.2,1,0,0
119 | 4.8,3,1.4,0.1,1,0,0
120 | 5.5,2.4,3.7,1,0,1,0
121 | 


--------------------------------------------------------------------------------
/iris_data_classification_bpnn_V1.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import pandas as pd
  3 | import numpy as np
  4 | import datetime
  5 | from sklearn.preprocessing import OneHotEncoder
  6 | from pandas.plotting import radviz
  7 | '''
  8 |     构建一个具有1个隐藏层的神经网络，隐层的大小为10
  9 |     输入层为2（或4）个特征；输出层1个节点，结果为0或1
 10 |     当特征为2个时，表头为：'SepalLength', 'SepalWidth', 'species'，迭代1000次，正确率为100%
 11 |     当特征为4个时，表头为：'SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'species'，迭代1000次，正确率为63.64%
 12 | '''
 13 | 
 14 | 
 15 | # 画图看原始数据
 16 | def draw_plot(X, Y):
 17 |     # 用来正常显示中文标签
 18 |     plt.rcParams['font.sans-serif'] = ['SimHei']
 19 | 
 20 |     plt.scatter(X[0, :], X[1, :], c=Y[0, :], s=50, cmap=plt.cm.Spectral)
 21 |     plt.title('蓝色-Versicolor， 红色-Virginica')
 22 |     plt.xlabel('花瓣长度')
 23 |     plt.ylabel('花瓣宽度')
 24 |     plt.show()
 25 | 
 26 | 
 27 | # 1.初始化参数
 28 | def initialize_parameters(n_x, n_h, n_y):
 29 |     np.random.seed(2)
 30 | 
 31 |     # 权重和偏置矩阵
 32 |     w1 = np.random.randn(n_h, n_x) * 0.01
 33 |     b1 = np.zeros(shape=(n_h, 1))
 34 |     w2 = np.random.randn(n_y, n_h) * 0.01
 35 |     b2 = np.zeros(shape=(n_y, 1))
 36 | 
 37 |     # 通过字典存储参数
 38 |     parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
 39 | 
 40 |     return parameters
 41 | 
 42 | 
 43 | # 2.前向传播
 44 | def forward_propagation(X, parameters):
 45 |     w1 = parameters['w1']
 46 |     b1 = parameters['b1']
 47 |     w2 = parameters['w2']
 48 |     b2 = parameters['b2']
 49 | 
 50 |     # 通过前向传播来计算a2
 51 |     z1 = np.dot(w1, X) + b1     # 这个地方需注意矩阵加法：虽然(w1*X)和b1的维度不同，但可以相加
 52 |     a1 = np.tanh(z1)            # 使用tanh作为第一层的激活函数
 53 |     z2 = np.dot(w2, a1) + b2
 54 |     a2 = 1 / (1 + np.exp(-z2))  # 使用sigmoid作为第二层的激活函数
 55 | 
 56 |     # 通过字典存储参数
 57 |     cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}
 58 | 
 59 |     return a2, cache
 60 | 
 61 | 
 62 | # 3.计算代价函数
 63 | def compute_cost(a2, Y):
 64 |     m = Y.shape[1]      # Y的列数即为总的样本数
 65 | 
 66 |     # 采用交叉熵（cross-entropy）作为代价函数
 67 |     logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2))
 68 |     cost = - np.sum(logprobs) / m
 69 | 
 70 |     return cost
 71 | 
 72 | 
 73 | # 4.反向传播（计算代价函数的导数）
 74 | def backward_propagation(parameters, cache, X, Y):
 75 |     m = Y.shape[1]
 76 | 
 77 |     w2 = parameters['w2']
 78 | 
 79 |     a1 = cache['a1']
 80 |     a2 = cache['a2']
 81 | 
 82 |     # 反向传播，计算dw1、db1、dw2、db2
 83 |     dz2 = a2 - Y
 84 |     dw2 = (1 / m) * np.dot(dz2, a1.T)
 85 |     db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
 86 |     dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2))
 87 |     dw1 = (1 / m) * np.dot(dz1, X.T)
 88 |     db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)
 89 | 
 90 |     grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2}
 91 | 
 92 |     return grads
 93 | 
 94 | 
 95 | # 5.更新参数
 96 | def update_parameters(parameters, grads, learning_rate=0.4):
 97 |     w1 = parameters['w1']
 98 |     b1 = parameters['b1']
 99 |     w2 = parameters['w2']
100 |     b2 = parameters['b2']
101 | 
102 |     dw1 = grads['dw1']
103 |     db1 = grads['db1']
104 |     dw2 = grads['dw2']
105 |     db2 = grads['db2']
106 | 
107 |     # 更新参数
108 |     w1 = w1 - dw1 * learning_rate
109 |     b1 = b1 - db1 * learning_rate
110 |     w2 = w2 - dw2 * learning_rate
111 |     b2 = b2 - db2 * learning_rate
112 | 
113 |     parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
114 | 
115 |     return parameters
116 | 
117 | 
118 | # 建立神经网络
119 | def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False):
120 |     np.random.seed(3)
121 | 
122 |     n_x = n_input           # 输入层节点数
123 |     n_y = n_output          # 输出层节点数
124 | 
125 |     # 1.初始化参数
126 |     parameters = initialize_parameters(n_x, n_h, n_y)
127 | 
128 |     # 梯度下降循环
129 |     for i in range(0, num_iterations):
130 |         # 2.前向传播
131 |         a2, cache = forward_propagation(X, parameters)
132 |         # 3.计算代价函数
133 |         cost = compute_cost(a2, Y)
134 |         # 4.反向传播
135 |         grads = backward_propagation(parameters, cache, X, Y)
136 |         # 5.更新参数
137 |         parameters = update_parameters(parameters, grads)
138 | 
139 |         # 每1000次迭代，输出一次代价函数
140 |         if print_cost and i % 1000 == 0:
141 |             print('迭代第%i次，代价函数为：%f' % (i, cost))
142 | 
143 |     return parameters
144 | 
145 | 
146 | # 6.模型评估
147 | def predict(parameters, x_test, y_test):
148 |     w1 = parameters['w1']
149 |     b1 = parameters['b1']
150 |     w2 = parameters['w2']
151 |     b2 = parameters['b2']
152 | 
153 |     z1 = np.dot(w1, x_test) + b1
154 |     a1 = np.tanh(z1)
155 |     z2 = np.dot(w2, a1) + b2
156 |     a2 = 1 / (1 + np.exp(-z2))
157 | 
158 |     # 结果的维度
159 |     n_rows = a2.shape[0]
160 |     n_cols = a2.shape[1]
161 | 
162 |     # 预测值结果存储
163 |     output = np.empty(shape=(n_rows, n_cols), dtype=int)
164 | 
165 |     for i in range(n_rows):
166 |         for j in range(n_cols):
167 |             if a2[i][j] > 0.5:
168 |                 output[i][j] = 1
169 |             else:
170 |                 output[i][j] = 0
171 | 
172 |     # 将独热编码反转为标签
173 |     output = encoder.inverse_transform(output.T)
174 |     output = output.reshape(1, output.shape[0])
175 |     output = output.flatten()
176 | 
177 |     print('预测结果：', output)
178 |     print('真实结果：', y_test)
179 | 
180 |     count = 0
181 |     for k in range(0, n_cols):
182 |         if output[k] == y_test[k]:
183 |             count = count + 1
184 |         else:
185 |             print('错误分类样本的序号：', k + 1)
186 | 
187 |     acc = count / int(a2.shape[1]) * 100
188 |     print('准确率：%.2f%%' % acc)
189 | 
190 |     return output
191 | 
192 | 
193 | # 7.结果可视化
194 | # 特征有4个维度，类别有1个维度，一共5个维度，故采用了RadViz图
195 | def result_visualization(x_test, y_test, result):
196 |     cols = y_test.shape[0]
197 |     y = []
198 |     pre = []
199 |     labels = ['setosa', 'versicolor', 'virginica']
200 | 
201 |     # 将0、1、2转换成setosa、versicolor、virginica
202 |     for i in range(cols):
203 |         y.append(labels[y_test[i]])
204 |         pre.append(labels[result[i]])
205 | 
206 |     # 将特征和类别矩阵拼接起来
207 |     real = np.column_stack((x_test.T, y))
208 |     prediction = np.column_stack((x_test.T, pre))
209 | 
210 |     # 转换成DataFrame类型，并添加columns
211 |     df_real = pd.DataFrame(real, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
212 |     df_prediction = pd.DataFrame(prediction, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
213 | 
214 |     # 将特征列转换为float类型，否则radviz会报错
215 |     df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
216 |     df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
217 | 
218 |     # 绘图
219 |     plt.figure('真实分类')
220 |     radviz(df_real, 'Species', color=['blue', 'green', 'red', 'yellow'])
221 |     plt.figure('预测分类')
222 |     radviz(df_prediction, 'Species', color=['blue', 'green', 'red', 'yellow'])
223 |     plt.show()
224 | 
225 | 
226 | if __name__ == "__main__":
227 |     # 读取数据
228 |     iris = pd.read_csv('E:\\GitHub\\iris_classification_BPNeuralNetwork\\bpnn_V1数据集\\iris_training.csv')
229 |     X = iris[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']].values.T  # T是转置
230 |     Y = iris['species'].values
231 | 
232 |     # 将标签转换为独热编码
233 |     encoder = OneHotEncoder()
234 |     Y = encoder.fit_transform(Y.reshape(Y.shape[0], 1))
235 |     Y = Y.toarray().T
236 |     Y = Y.astype('uint8')
237 | 
238 |     # 开始训练
239 |     start_time = datetime.datetime.now()
240 |     # 输入4个节点，隐层10个节点，输出3个节点，迭代10000次
241 |     parameters = nn_model(X, Y, n_h=10, n_input=4, n_output=3, num_iterations=10000, print_cost=True)
242 |     end_time = datetime.datetime.now()
243 |     print("用时：" + str(round((end_time - start_time).microseconds / 1000)) + 'ms')
244 | 
245 |     # 对模型进行测试
246 |     data_test = pd.read_csv('E:\\GitHub\\iris_classification_BPNeuralNetwork\\bpnn_V1数据集\\iris_test.csv')
247 |     x_test = data_test[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']].values.T
248 |     y_test = data_test['species'].values
249 | 
250 |     result = predict(parameters, x_test, y_test)
251 | 
252 |     # 分类结果可视化
253 |     result_visualization(x_test, y_test, result)
254 | 


--------------------------------------------------------------------------------
/iris_data_classification_bpnn_V2.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import datetime
  4 | import matplotlib.pyplot as plt
  5 | from pandas.plotting import radviz
  6 | '''
  7 |     构建一个具有1个隐藏层的神经网络，隐层的大小为10
  8 |     输入层为4个特征，输出层为3个分类
  9 |     (1,0,0)为第一类，(0,1,0)为第二类，(0,0,1)为第三类
 10 | '''
 11 | 
 12 | 
 13 | # 1.初始化参数
 14 | def initialize_parameters(n_x, n_h, n_y):
 15 |     np.random.seed(2)
 16 | 
 17 |     # 权重和偏置矩阵
 18 |     w1 = np.random.randn(n_h, n_x) * 0.01
 19 |     b1 = np.zeros(shape=(n_h, 1))
 20 |     w2 = np.random.randn(n_y, n_h) * 0.01
 21 |     b2 = np.zeros(shape=(n_y, 1))
 22 | 
 23 |     # 通过字典存储参数
 24 |     parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
 25 | 
 26 |     return parameters
 27 | 
 28 | 
 29 | # 2.前向传播
 30 | def forward_propagation(X, parameters):
 31 |     w1 = parameters['w1']
 32 |     b1 = parameters['b1']
 33 |     w2 = parameters['w2']
 34 |     b2 = parameters['b2']
 35 | 
 36 |     # 通过前向传播来计算a2
 37 |     z1 = np.dot(w1, X) + b1     # 这个地方需注意矩阵加法：虽然(w1*X)和b1的维度不同，但可以相加
 38 |     a1 = np.tanh(z1)            # 使用tanh作为第一层的激活函数
 39 |     z2 = np.dot(w2, a1) + b2
 40 |     a2 = 1 / (1 + np.exp(-z2))  # 使用sigmoid作为第二层的激活函数
 41 | 
 42 |     # 通过字典存储参数
 43 |     cache = {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}
 44 | 
 45 |     return a2, cache
 46 | 
 47 | 
 48 | # 3.计算代价函数
 49 | def compute_cost(a2, Y):
 50 |     m = Y.shape[1]      # Y的列数即为总的样本数
 51 | 
 52 |     # 采用交叉熵（cross-entropy）作为代价函数
 53 |     logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2))
 54 |     cost = - np.sum(logprobs) / m
 55 | 
 56 |     return cost
 57 | 
 58 | 
 59 | # 4.反向传播（计算代价函数的导数）
 60 | def backward_propagation(parameters, cache, X, Y):
 61 |     m = Y.shape[1]
 62 | 
 63 |     w2 = parameters['w2']
 64 | 
 65 |     a1 = cache['a1']
 66 |     a2 = cache['a2']
 67 | 
 68 |     # 反向传播，计算dw1、db1、dw2、db2
 69 |     dz2 = a2 - Y
 70 |     dw2 = (1 / m) * np.dot(dz2, a1.T)
 71 |     db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
 72 |     dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2))
 73 |     dw1 = (1 / m) * np.dot(dz1, X.T)
 74 |     db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)
 75 | 
 76 |     grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2}
 77 | 
 78 |     return grads
 79 | 
 80 | 
 81 | # 5.更新参数
 82 | def update_parameters(parameters, grads, learning_rate=0.4):
 83 |     w1 = parameters['w1']
 84 |     b1 = parameters['b1']
 85 |     w2 = parameters['w2']
 86 |     b2 = parameters['b2']
 87 | 
 88 |     dw1 = grads['dw1']
 89 |     db1 = grads['db1']
 90 |     dw2 = grads['dw2']
 91 |     db2 = grads['db2']
 92 | 
 93 |     # 更新参数
 94 |     w1 = w1 - dw1 * learning_rate
 95 |     b1 = b1 - db1 * learning_rate
 96 |     w2 = w2 - dw2 * learning_rate
 97 |     b2 = b2 - db2 * learning_rate
 98 | 
 99 |     parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
100 | 
101 |     return parameters
102 | 
103 | 
104 | # 建立神经网络
105 | def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False):
106 |     np.random.seed(3)
107 | 
108 |     n_x = n_input           # 输入层节点数
109 |     n_y = n_output          # 输出层节点数
110 | 
111 |     # 1.初始化参数
112 |     parameters = initialize_parameters(n_x, n_h, n_y)
113 | 
114 |     # 梯度下降循环
115 |     for i in range(0, num_iterations):
116 |         # 2.前向传播
117 |         a2, cache = forward_propagation(X, parameters)
118 |         # 3.计算代价函数
119 |         cost = compute_cost(a2, Y)
120 |         # 4.反向传播
121 |         grads = backward_propagation(parameters, cache, X, Y)
122 |         # 5.更新参数
123 |         parameters = update_parameters(parameters, grads)
124 | 
125 |         # 每1000次迭代，输出一次代价函数
126 |         if print_cost and i % 1000 == 0:
127 |             print('迭代第%i次，代价函数为：%f' % (i, cost))
128 | 
129 |     return parameters
130 | 
131 | 
132 | # 6.模型评估
133 | def predict(parameters, x_test, y_test):
134 |     w1 = parameters['w1']
135 |     b1 = parameters['b1']
136 |     w2 = parameters['w2']
137 |     b2 = parameters['b2']
138 | 
139 |     z1 = np.dot(w1, x_test) + b1
140 |     a1 = np.tanh(z1)
141 |     z2 = np.dot(w2, a1) + b2
142 |     a2 = 1 / (1 + np.exp(-z2))
143 | 
144 |     # 结果的维度
145 |     n_rows = y_test.shape[0]
146 |     n_cols = y_test.shape[1]
147 | 
148 |     # 预测值结果存储
149 |     output = np.empty(shape=(n_rows, n_cols), dtype=int)
150 | 
151 |     for i in range(n_rows):
152 |         for j in range(n_cols):
153 |             if a2[i][j] > 0.5:
154 |                 output[i][j] = 1
155 |             else:
156 |                 output[i][j] = 0
157 | 
158 |     print('预测结果：', output)
159 |     print('真实结果：', y_test)
160 | 
161 |     count = 0
162 |     for k in range(0, n_cols):
163 |         if output[0][k] == y_test[0][k] and output[1][k] == y_test[1][k] and output[2][k] == y_test[2][k]:
164 |             count = count + 1
165 |         else:
166 |             print('错误分类样本的序号：', k + 1)
167 | 
168 |     acc = count / int(y_test.shape[1]) * 100
169 |     print('准确率：%.2f%%' % acc)
170 | 
171 |     return output
172 | 
173 | 
174 | # 7.结果可视化
175 | # 特征有4个维度，类别有1个维度，一共5个维度，故采用了RadViz图
176 | def result_visualization(x_test, y_test, result):
177 |     cols = y_test.shape[1]
178 |     y = []
179 |     pre = []
180 | 
181 |     # 反转换类别的独热编码
182 |     for i in range(cols):
183 |         if y_test[0][i] == 0 and y_test[1][i] == 0 and y_test[2][i] == 1:
184 |             y.append('setosa')
185 |         elif y_test[0][i] == 0 and y_test[1][i] == 1 and y_test[2][i] == 0:
186 |             y.append('versicolor')
187 |         elif y_test[0][i] == 1 and y_test[1][i] == 0 and y_test[2][i] == 0:
188 |             y.append('virginica')
189 | 
190 |     for j in range(cols):
191 |         if result[0][j] == 0 and result[1][j] == 0 and result[2][j] == 1:
192 |             pre.append('setosa')
193 |         elif result[0][j] == 0 and result[1][j] == 1 and result[2][j] == 0:
194 |             pre.append('versicolor')
195 |         elif result[0][j] == 1 and result[1][j] == 0 and result[2][j] == 0:
196 |             pre.append('virginica')
197 |         else:
198 |             pre.append('unknown')
199 | 
200 |     # 将特征和类别矩阵拼接起来
201 |     real = np.column_stack((x_test.T, y))
202 |     prediction = np.column_stack((x_test.T, pre))
203 | 
204 |     # 转换成DataFrame类型，并添加columns
205 |     df_real = pd.DataFrame(real, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
206 |     df_prediction = pd.DataFrame(prediction, index=None, columns=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
207 | 
208 |     # 将特征列转换为float类型，否则radviz会报错
209 |     df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
210 |     df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] = df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
211 | 
212 |     # 绘图
213 |     plt.figure('真实分类')
214 |     radviz(df_real, 'Species', color=['blue', 'green', 'red', 'yellow'])
215 |     plt.figure('预测分类')
216 |     radviz(df_prediction, 'Species', color=['blue', 'green', 'red', 'yellow'])
217 |     plt.show()
218 | 
219 | 
220 | if __name__ == "__main__":
221 |     # 读取数据
222 |     data_set = pd.read_csv('E:\\GitHub\\iris_classification_BPNeuralNetwork\\bpnn_V2数据集\\iris_training.csv', header=None)
223 | 
224 |     # 第1种取数据方法：
225 |     X = data_set.iloc[:, 0:4].values.T          # 前四列是特征，T表示转置
226 |     Y = data_set.iloc[:, 4:].values.T           # 后三列是标签
227 | 
228 |     # 第2种取数据方法：
229 |     # X = data_set.ix[:, 0:3].values.T
230 |     # Y = data_set.ix[:, 4:6].values.T
231 | 
232 |     # 第3种取数据方法：
233 |     # X = data_set.loc[:, 0:3].values.T
234 |     # Y = data_set.loc[:, 4:6].values.T
235 | 
236 |     # 第4种取数据方法：
237 |     # X = data_set[data_set.columns[0:4]].values.T
238 |     # Y = data_set[data_set.columns[4:7]].values.T
239 |     Y = Y.astype('uint8')
240 | 
241 |     # 开始训练
242 |     start_time = datetime.datetime.now()
243 |     # 输入4个节点，隐层10个节点，输出3个节点，迭代10000次
244 |     parameters = nn_model(X, Y, n_h=10, n_input=4, n_output=3, num_iterations=10000, print_cost=True)
245 |     end_time = datetime.datetime.now()
246 |     print("用时：" + str((end_time - start_time).seconds) + 's' + str(round((end_time - start_time).microseconds / 1000)) + 'ms')
247 | 
248 |     # 对模型进行测试
249 |     data_test = pd.read_csv('E:\\GitHub\\iris_classification_BPNeuralNetwork\\bpnn_V2数据集\\iris_test.csv', header=None)
250 |     x_test = data_test.iloc[:, 0:4].values.T
251 |     y_test = data_test.iloc[:, 4:].values.T
252 |     y_test = y_test.astype('uint8')
253 | 
254 |     result = predict(parameters, x_test, y_test)
255 | 
256 |     # 分类结果可视化
257 |     result_visualization(x_test, y_test, result)
258 | 


--------------------------------------------------------------------------------
/iris_data_classification_knn.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import random
  3 | import numpy as np
  4 | import operator
  5 | import datetime
  6 | 
  7 | 
  8 | def open_file(file_name):
  9 |     """
 10 |     打开数据集，进行数据处理
 11 |     :param file_name: 数据集的路径
 12 |     :return: 返回数据集的 特征、标签、标签名
 13 |     """
 14 |     with open(file_name) as csv_file:
 15 |         data_file = csv.reader(csv_file)
 16 | 
 17 |         # temp读取的是csv文件的第一行，相当于表头
 18 |         temp = next(data_file)
 19 | 
 20 |         # 数据集中数据的总数量
 21 |         n_samples = int(temp[0])
 22 | 
 23 |         # 数据集中特征值的种类个数
 24 |         n_features = int(temp[1])
 25 | 
 26 |         # 标签名
 27 |         labels_names = np.array(temp[2:])
 28 | 
 29 |         # 特征集，行数为数据集数量，列数为特征值的种类个数
 30 |         features = np.empty((n_samples, n_features))
 31 | 
 32 |         # 标签集，行数为数据集数量，1列，数据格式为int
 33 |         labels = np.empty((n_samples,), dtype=np.int)
 34 | 
 35 |         for i, j in enumerate(data_file):
 36 |             # 将数据集中的将数据转化为矩阵，数据格式为float
 37 |             # 将数据中从第一列到倒数第二列中的数据保存在data中
 38 |             features[i] = np.asarray(j[:-1], dtype=np.float64)
 39 | 
 40 |             # 将数据集中的将数据转化为矩阵，数据格式为int
 41 |             # 将数据集中倒数第一列中的数据保存在target中
 42 |             labels[i] = np.asarray(j[-1], dtype=np.int)
 43 | 
 44 |     # 返回 数据，标签 和标签名
 45 |     return features, labels, labels_names
 46 | 
 47 | 
 48 | def random_number(data_size):
 49 |     """
 50 |     该函数使用shuffle()打乱一个包含从0到数据集大小的整数列表。因此每次运行程序划分不同，导致结果不同
 51 |     :param data_size: 数据集大小
 52 |     :return: 返回一个列表
 53 |     """
 54 |     number_set = []
 55 |     for i in range(data_size):
 56 |         number_set.append(i)
 57 | 
 58 |     random.shuffle(number_set)
 59 | 
 60 |     return number_set
 61 | 
 62 | 
 63 | def split_data_set(features_set, labels_set, rate=0.20):
 64 |     """
 65 |     分割数据集，默认数据集的25%是测试集
 66 |     :param features_set: 数据集
 67 |     :param labels_set: 标签数据
 68 |     :param rate: 测试集所占的比率
 69 |     :return: 返回训练集数据、训练集标签、测试集数据、测试集标签
 70 |     """
 71 |     # 计算训练集的数据个数
 72 |     train_size = int((1-rate)*len(features_set))
 73 | 
 74 |     # 调用random_number获得随机数据索引
 75 |     data_index = random_number(len(features_set))
 76 | 
 77 |     # 分隔数据集
 78 |     # x是自变量，即输入（分类特征）；y是因变量，即输出（分类结果）
 79 |     x_train = features_set[data_index[:train_size]]
 80 |     x_test = features_set[data_index[train_size:]]
 81 | 
 82 |     y_train = labels_set[data_index[:train_size]]
 83 |     y_test = labels_set[data_index[train_size:]]
 84 | 
 85 |     return x_train, x_test, y_train, y_test
 86 | 
 87 | 
 88 | def data_distance(x_test, x_train):
 89 |     """
 90 |     :param x_test: 测试集
 91 |     :param x_train: 训练集
 92 |     :return: 返回计算的距离
 93 |     """
 94 |     distances = np.sqrt(sum((x_test - x_train) ** 2))
 95 |     return distances
 96 | 
 97 | 
 98 | def knn(x_train, y_train, x_test, k):
 99 |     """
100 |     :param x_train: 训练集特征数据
101 |     :param y_train: 训练集标签数据
102 |     :param x_test: 测试集特征数据
103 |     :param k: 邻居数
104 |     :return: 返回一个列表包含预测结果
105 |     """
106 |     # 预测结果列表，用于存储测试集预测出来的结果
107 |     predict_result_set = []
108 | 
109 |     # 训练集的长度
110 |     train_set_size = len(x_train)
111 | 
112 |     # 创建一个全零的矩阵，长度为训练集的长度
113 |     distances = np.array(np.zeros(train_set_size))
114 | 
115 |     # 计算每一个测试集与每一个训练集的距离
116 |     for x in x_test:
117 |         for index in range(train_set_size):
118 |             # 计算数据之间的距离
119 |             distances[index] = data_distance(x, x_train[index])
120 | 
121 |         # 排序后的距离的下标（从小到大）
122 |         sorted_dist = np.argsort(distances)
123 | 
124 |         class_count = {}
125 | 
126 |         # 取出k个最短距离
127 |         for j in range(k):
128 |             # 获得下标所对应的标签值
129 |             sort_label = y_train[sorted_dist[j]]
130 | 
131 |             # 将标签存入字典之中并存入个数
132 |             class_count[sort_label] = class_count.get(sort_label, 0) + 1
133 | 
134 |         # 对标签进行排序
135 |         sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True)
136 | 
137 |         # 将出现频次最高的放入预测结果列表
138 |         predict_result_set.append(sorted_class_count[0][0])
139 | 
140 |     # 返回预测结果列表
141 |     return predict_result_set
142 | 
143 | 
144 | def predict_score(predict_result_set, y_test):
145 |     """
146 |     :param predict_result_set: 预测结果列表
147 |     :param y_test: 测试集标签数据
148 |     :return: 返回测试精度
149 |     """
150 |     count = 0
151 |     for i in range(0, len(predict_result_set)):
152 |         if predict_result_set[i] == y_test[i]:
153 |             count = count + 1
154 | 
155 |     accuracy = count / len(predict_result_set)
156 |     return accuracy
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     # 1.读入数据
161 |     iris_data_set = open_file("D:\\iris.csv")
162 | 
163 |     # 2.分割训练集和测试集
164 |     x_train, x_test, y_train, y_test = split_data_set(iris_data_set[0], iris_data_set[1])
165 | 
166 |     # 3.调用KNN算法
167 |     start_time = datetime.datetime.now()
168 |     result = knn(x_train, y_train, x_test, 8)
169 |     end_time = datetime.datetime.now()
170 | 
171 |     # 4.准确率
172 |     acc = predict_score(result, y_test)
173 | 
174 |     print("正确标签：", y_test)
175 |     print("预测结果：", np.array(result))
176 |     print("准确率：" + str(acc * 100) + "%")
177 |     # print("测试集的精度：%.2f" % acc)
178 |     print("用时：" + str((end_time - start_time).microseconds / 1000) + 'ms')
179 | 


--------------------------------------------------------------------------------
/iris_data_cluster_sklearn.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.cluster import KMeans
 3 | from sklearn.cluster import DBSCAN
 4 | 
 5 | data = pd.read_csv('D:\\iris.csv', sep=',', encoding='utf-8')
 6 | x = data[['A', 'B', 'C', 'D']].as_matrix()
 7 | real = data['E'].as_matrix()
 8 | 
 9 | # 第1种实现：KMeans算法
10 | # kms = KMeans(n_clusters=3)  # 传入要分类的数目
11 | # y = kms.fit_predict(x)
12 | 
13 | # 第2种实现：DBSCAN算法
14 | dbscan = DBSCAN(eps=0.5, min_samples=13)
15 | dbscan.fit(x)
16 | y = dbscan.labels_
17 | 
18 | count = 0
19 | for i in range(len(real)):
20 |     if abs(int(y[i])) == abs(int(real[i])):
21 |         count = count + 1
22 | print('正确：' + str(count))
23 | acc = round(count / len(real), 4) * 100
24 | print('正确率：' + str(acc) + '%')
25 | 


--------------------------------------------------------------------------------
/iris_data_decision_tree_sklearn.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.tree import DecisionTreeClassifier
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.metrics import confusion_matrix
 5 | from sklearn.metrics import classification_report
 6 | from sklearn.model_selection import GridSearchCV
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | iris_data_set = pd.read_csv('E:\\PyCharm-Workspace\\DataAnalysis\\data\\03 Iris\\DecisionTreeClassifier\\iris.csv')
10 | 
11 | # x是4列特征
12 | x = iris_data_set.iloc[:, 0:4].values
13 | # y是1列标签
14 | y = iris_data_set.iloc[:, -1].values
15 | 
16 | # 划分训练集和测试集
17 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
18 | 
19 | # 利用GridSearchCV选择最优参数
20 | model = DecisionTreeClassifier()
21 | param = {'criterion': ['gini', 'entropy'], 'max_depth': [30, 50, 60, 100], 'min_samples_leaf': [2, 3, 5, 10], 'min_impurity_decrease': [0.1, 0.2, 0.5]}
22 | grid = GridSearchCV(model, param_grid=param, cv=5)
23 | grid.fit(x_train, y_train)
24 | print('最优分类器:', grid.best_estimator_)
25 | print('最优超参数：', grid.best_params_)
26 | print('最优分数:', grid.best_score_)
27 | 
28 | # 利用决策树分类器构建分类模型
29 | model = grid.best_estimator_
30 | y_pre = model.predict(x_test)
31 | 
32 | print('正确标签：', y_test)
33 | print('预测结果：', y_pre)
34 | 
35 | print('训练集分数：', model.score(x_train, y_train))
36 | print('测试集分数：', model.score(x_test, y_test))
37 | 
38 | # 混淆矩阵
39 | conf_mat = confusion_matrix(y_test, y_pre)
40 | print('混淆矩阵：')
41 | print(conf_mat)
42 | 
43 | # 分类指标文本报告（精确率、召回率、F1值等）
44 | print('分类指标报告：')
45 | print(classification_report(y_test, y_pre))
46 | 
47 | # 特征重要性
48 | print(model.feature_importances_)
49 | 
50 | # 画图展示训练结果
51 | fig = plt.figure()
52 | ax = fig.add_subplot(111)
53 | f1 = ax.scatter(list(range(len(x_test))), y_test, marker='*')
54 | f2 = ax.scatter(list(range(len(x_test))), y_pre, marker='o')
55 | plt.legend(handles=[f1, f2], labels=['True', 'Prediction'])
56 | plt.show()
57 | 


--------------------------------------------------------------------------------
/iris_data_knn_sklearn.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.model_selection import train_test_split
 3 | from sklearn.neighbors import KNeighborsClassifier
 4 | from sklearn.metrics import confusion_matrix
 5 | from sklearn.metrics import classification_report
 6 | from sklearn.model_selection import GridSearchCV
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | 
10 | # 读取数据
11 | iris_data_set = pd.read_csv('E:\\PyCharm-Workspace\\DataAnalysis\\data\\03 Iris\\DecisionTreeClassifier\\iris.csv')
12 | # x是4列特征
13 | x = iris_data_set.iloc[:, 0:4].values
14 | # y是1列标签
15 | y = iris_data_set.iloc[:, -1].values
16 | 
17 | # 划分训练集和测试集
18 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
19 | 
20 | # 将特征转为一维数组
21 | y_train = y_train.flatten()
22 | y_test = y_test.flatten()
23 | 
24 | # 利用GridSearchCV选择最优参数
25 | knn_model = KNeighborsClassifier()
26 | param_grid = [
27 |     {
28 |         'weights': ['uniform'],
29 |         'n_neighbors': [i for i in range(1, 20)]
30 |     },
31 |     {
32 |         'weights': ['distance'],
33 |         'n_neighbors': [i for i in range(1, 20)],
34 |         'p':[i for i in range(1, 6)]
35 |     }
36 | ]
37 | grid = GridSearchCV(knn_model, param_grid=param_grid, cv=5)
38 | grid.fit(x_train, y_train)
39 | print('最优分类器:', grid.best_estimator_)
40 | print('最优超参数：', grid.best_params_)
41 | print('最优分数:', grid.best_score_)
42 | 
43 | # 预测
44 | knn_model = grid.best_estimator_
45 | y_pre = knn_model.predict(x_test)
46 | 
47 | print('正确标签：', y_test)
48 | print('预测结果：', y_pre)
49 | 
50 | print('训练集分数：', knn_model.score(x_train, y_train))
51 | print('测试集分数：', knn_model.score(x_test, y_test))
52 | 
53 | # 混淆矩阵
54 | conf_mat = confusion_matrix(y_test, y_pre)
55 | print('混淆矩阵：')
56 | print(conf_mat)
57 | 
58 | # 分类指标文本报告（精确率、召回率、F1值等）
59 | print('分类指标报告：')
60 | print(classification_report(y_test, y_pre))
61 | 
62 | # 画图展示训练结果
63 | fig = plt.figure()
64 | ax = fig.add_subplot(111)
65 | f1 = ax.scatter(list(range(len(x_test))), y_test, marker='*')
66 | f2 = ax.scatter(list(range(len(x_test))), y_pre, marker='o')
67 | plt.legend(handles=[f1, f2], labels=['True', 'Prediction'])
68 | plt.show()
69 | 


--------------------------------------------------------------------------------
/sklearn数据集/iris.csv:
--------------------------------------------------------------------------------
  1 | SepalLength,SepalWidth,PetalLength,PetalWidth,species
  2 | 6.4,2.8,5.6,2.2,2
  3 | 5,2.3,3.3,1,1
  4 | 4.9,2.5,4.5,1.7,2
  5 | 4.9,3.1,1.5,0.1,0
  6 | 5.7,3.8,1.7,0.3,0
  7 | 4.4,3.2,1.3,0.2,0
  8 | 5.4,3.4,1.5,0.4,0
  9 | 6.9,3.1,5.1,2.3,2
 10 | 6.7,3.1,4.4,1.4,1
 11 | 5.1,3.7,1.5,0.4,0
 12 | 5.2,2.7,3.9,1.4,1
 13 | 6.9,3.1,4.9,1.5,1
 14 | 5.8,4,1.2,0.2,0
 15 | 5.4,3.9,1.7,0.4,0
 16 | 7.7,3.8,6.7,2.2,2
 17 | 6.3,3.3,4.7,1.6,1
 18 | 6.8,3.2,5.9,2.3,2
 19 | 7.6,3,6.6,2.1,2
 20 | 6.4,3.2,5.3,2.3,2
 21 | 5.7,4.4,1.5,0.4,0
 22 | 6.7,3.3,5.7,2.1,2
 23 | 6.4,2.8,5.6,2.1,2
 24 | 5.4,3.9,1.3,0.4,0
 25 | 6.1,2.6,5.6,1.4,2
 26 | 7.2,3,5.8,1.6,2
 27 | 5.2,3.5,1.5,0.2,0
 28 | 5.8,2.6,4,1.2,1
 29 | 5.9,3,5.1,1.8,2
 30 | 5.4,3,4.5,1.5,1
 31 | 6.7,3,5,1.7,1
 32 | 6.3,2.3,4.4,1.3,1
 33 | 5.1,2.5,3,1.1,1
 34 | 6.4,3.2,4.5,1.5,1
 35 | 6.8,3,5.5,2.1,2
 36 | 6.2,2.8,4.8,1.8,2
 37 | 6.9,3.2,5.7,2.3,2
 38 | 6.5,3.2,5.1,2,2
 39 | 5.8,2.8,5.1,2.4,2
 40 | 5.1,3.8,1.5,0.3,0
 41 | 4.8,3,1.4,0.3,0
 42 | 7.9,3.8,6.4,2,2
 43 | 5.8,2.7,5.1,1.9,2
 44 | 6.7,3,5.2,2.3,2
 45 | 5.1,3.8,1.9,0.4,0
 46 | 4.7,3.2,1.6,0.2,0
 47 | 6,2.2,5,1.5,2
 48 | 4.8,3.4,1.6,0.2,0
 49 | 7.7,2.6,6.9,2.3,2
 50 | 4.6,3.6,1,0.2,0
 51 | 7.2,3.2,6,1.8,2
 52 | 5,3.3,1.4,0.2,0
 53 | 6.6,3,4.4,1.4,1
 54 | 6.1,2.8,4,1.3,1
 55 | 5,3.2,1.2,0.2,0
 56 | 7,3.2,4.7,1.4,1
 57 | 6,3,4.8,1.8,2
 58 | 7.4,2.8,6.1,1.9,2
 59 | 5.8,2.7,5.1,1.9,2
 60 | 6.2,3.4,5.4,2.3,2
 61 | 5,2,3.5,1,1
 62 | 5.6,2.5,3.9,1.1,1
 63 | 6.7,3.1,5.6,2.4,2
 64 | 6.3,2.5,5,1.9,2
 65 | 6.4,3.1,5.5,1.8,2
 66 | 6.2,2.2,4.5,1.5,1
 67 | 7.3,2.9,6.3,1.8,2
 68 | 4.4,3,1.3,0.2,0
 69 | 7.2,3.6,6.1,2.5,2
 70 | 6.5,3,5.5,1.8,2
 71 | 5,3.4,1.5,0.2,0
 72 | 4.7,3.2,1.3,0.2,0
 73 | 6.6,2.9,4.6,1.3,1
 74 | 5.5,3.5,1.3,0.2,0
 75 | 7.7,3,6.1,2.3,2
 76 | 6.1,3,4.9,1.8,2
 77 | 4.9,3.1,1.5,0.1,0
 78 | 5.5,2.4,3.8,1.1,1
 79 | 5.7,2.9,4.2,1.3,1
 80 | 6,2.9,4.5,1.5,1
 81 | 6.4,2.7,5.3,1.9,2
 82 | 5.4,3.7,1.5,0.2,0
 83 | 6.1,2.9,4.7,1.4,1
 84 | 6.5,2.8,4.6,1.5,1
 85 | 5.6,2.7,4.2,1.3,1
 86 | 6.3,3.4,5.6,2.4,2
 87 | 4.9,3.1,1.5,0.1,0
 88 | 6.8,2.8,4.8,1.4,1
 89 | 5.7,2.8,4.5,1.3,1
 90 | 6,2.7,5.1,1.6,1
 91 | 5,3.5,1.3,0.3,0
 92 | 6.5,3,5.2,2,2
 93 | 6.1,2.8,4.7,1.2,1
 94 | 5.1,3.5,1.4,0.3,0
 95 | 4.6,3.1,1.5,0.2,0
 96 | 6.5,3,5.8,2.2,2
 97 | 4.6,3.4,1.4,0.3,0
 98 | 4.6,3.2,1.4,0.2,0
 99 | 7.7,2.8,6.7,2,2
100 | 5.9,3.2,4.8,1.8,1
101 | 5.1,3.8,1.6,0.2,0
102 | 4.9,3,1.4,0.2,0
103 | 4.9,2.4,3.3,1,1
104 | 4.5,2.3,1.3,0.3,0
105 | 5.8,2.7,4.1,1,1
106 | 5,3.4,1.6,0.4,0
107 | 5.2,3.4,1.4,0.2,0
108 | 5.3,3.7,1.5,0.2,0
109 | 5,3.6,1.4,0.2,0
110 | 5.6,2.9,3.6,1.3,1
111 | 4.8,3.1,1.6,0.2,0
112 | 6.3,2.7,4.9,1.8,2
113 | 5.7,2.8,4.1,1.3,1
114 | 5,3,1.6,0.2,0
115 | 6.3,3.3,6,2.5,2
116 | 5,3.5,1.6,0.6,0
117 | 5.5,2.6,4.4,1.2,1
118 | 5.7,3,4.2,1.2,1
119 | 4.4,2.9,1.4,0.2,0
120 | 4.8,3,1.4,0.1,0
121 | 5.5,2.4,3.7,1,1
122 | 5.9,3,4.2,1.5,1
123 | 6.9,3.1,5.4,2.1,2
124 | 5.1,3.3,1.7,0.5,0
125 | 6,3.4,4.5,1.6,1
126 | 5.5,2.5,4,1.3,1
127 | 6.2,2.9,4.3,1.3,1
128 | 5.5,4.2,1.4,0.2,0
129 | 6.3,2.8,5.1,1.5,2
130 | 5.6,3,4.1,1.3,1
131 | 6.7,2.5,5.8,1.8,2
132 | 7.1,3,5.9,2.1,2
133 | 4.3,3,1.1,0.1,0
134 | 5.6,2.8,4.9,2,2
135 | 5.5,2.3,4,1.3,1
136 | 6,2.2,4,1,1
137 | 5.1,3.5,1.4,0.2,0
138 | 5.7,2.6,3.5,1,1
139 | 4.8,3.4,1.9,0.2,0
140 | 5.1,3.4,1.5,0.2,0
141 | 5.7,2.5,5,2,2
142 | 5.4,3.4,1.7,0.2,0
143 | 5.6,3,4.5,1.5,1
144 | 6.3,2.9,5.6,1.8,2
145 | 6.3,2.5,4.9,1.5,1
146 | 5.8,2.7,3.9,1.2,1
147 | 6.1,3,4.6,1.4,1
148 | 5.2,4.1,1.5,0.1,0
149 | 6.7,3.1,4.7,1.5,1
150 | 6.7,3.3,5.7,2.5,2
151 | 6.4,2.9,4.3,1.3,1
152 | 


--------------------------------------------------------------------------------
/原始数据集/iris.csv:
--------------------------------------------------------------------------------
  1 | 150,4,setosa,versicolor,virginica
  2 | 6.4,2.8,5.6,2.2,2
  3 | 5,2.3,3.3,1,1
  4 | 4.9,2.5,4.5,1.7,2
  5 | 4.9,3.1,1.5,0.1,0
  6 | 5.7,3.8,1.7,0.3,0
  7 | 4.4,3.2,1.3,0.2,0
  8 | 5.4,3.4,1.5,0.4,0
  9 | 6.9,3.1,5.1,2.3,2
 10 | 6.7,3.1,4.4,1.4,1
 11 | 5.1,3.7,1.5,0.4,0
 12 | 5.2,2.7,3.9,1.4,1
 13 | 6.9,3.1,4.9,1.5,1
 14 | 5.8,4,1.2,0.2,0
 15 | 5.4,3.9,1.7,0.4,0
 16 | 7.7,3.8,6.7,2.2,2
 17 | 6.3,3.3,4.7,1.6,1
 18 | 6.8,3.2,5.9,2.3,2
 19 | 7.6,3,6.6,2.1,2
 20 | 6.4,3.2,5.3,2.3,2
 21 | 5.7,4.4,1.5,0.4,0
 22 | 6.7,3.3,5.7,2.1,2
 23 | 6.4,2.8,5.6,2.1,2
 24 | 5.4,3.9,1.3,0.4,0
 25 | 6.1,2.6,5.6,1.4,2
 26 | 7.2,3,5.8,1.6,2
 27 | 5.2,3.5,1.5,0.2,0
 28 | 5.8,2.6,4,1.2,1
 29 | 5.9,3,5.1,1.8,2
 30 | 5.4,3,4.5,1.5,1
 31 | 6.7,3,5,1.7,1
 32 | 6.3,2.3,4.4,1.3,1
 33 | 5.1,2.5,3,1.1,1
 34 | 6.4,3.2,4.5,1.5,1
 35 | 6.8,3,5.5,2.1,2
 36 | 6.2,2.8,4.8,1.8,2
 37 | 6.9,3.2,5.7,2.3,2
 38 | 6.5,3.2,5.1,2,2
 39 | 5.8,2.8,5.1,2.4,2
 40 | 5.1,3.8,1.5,0.3,0
 41 | 4.8,3,1.4,0.3,0
 42 | 7.9,3.8,6.4,2,2
 43 | 5.8,2.7,5.1,1.9,2
 44 | 6.7,3,5.2,2.3,2
 45 | 5.1,3.8,1.9,0.4,0
 46 | 4.7,3.2,1.6,0.2,0
 47 | 6,2.2,5,1.5,2
 48 | 4.8,3.4,1.6,0.2,0
 49 | 7.7,2.6,6.9,2.3,2
 50 | 4.6,3.6,1,0.2,0
 51 | 7.2,3.2,6,1.8,2
 52 | 5,3.3,1.4,0.2,0
 53 | 6.6,3,4.4,1.4,1
 54 | 6.1,2.8,4,1.3,1
 55 | 5,3.2,1.2,0.2,0
 56 | 7,3.2,4.7,1.4,1
 57 | 6,3,4.8,1.8,2
 58 | 7.4,2.8,6.1,1.9,2
 59 | 5.8,2.7,5.1,1.9,2
 60 | 6.2,3.4,5.4,2.3,2
 61 | 5,2,3.5,1,1
 62 | 5.6,2.5,3.9,1.1,1
 63 | 6.7,3.1,5.6,2.4,2
 64 | 6.3,2.5,5,1.9,2
 65 | 6.4,3.1,5.5,1.8,2
 66 | 6.2,2.2,4.5,1.5,1
 67 | 7.3,2.9,6.3,1.8,2
 68 | 4.4,3,1.3,0.2,0
 69 | 7.2,3.6,6.1,2.5,2
 70 | 6.5,3,5.5,1.8,2
 71 | 5,3.4,1.5,0.2,0
 72 | 4.7,3.2,1.3,0.2,0
 73 | 6.6,2.9,4.6,1.3,1
 74 | 5.5,3.5,1.3,0.2,0
 75 | 7.7,3,6.1,2.3,2
 76 | 6.1,3,4.9,1.8,2
 77 | 4.9,3.1,1.5,0.1,0
 78 | 5.5,2.4,3.8,1.1,1
 79 | 5.7,2.9,4.2,1.3,1
 80 | 6,2.9,4.5,1.5,1
 81 | 6.4,2.7,5.3,1.9,2
 82 | 5.4,3.7,1.5,0.2,0
 83 | 6.1,2.9,4.7,1.4,1
 84 | 6.5,2.8,4.6,1.5,1
 85 | 5.6,2.7,4.2,1.3,1
 86 | 6.3,3.4,5.6,2.4,2
 87 | 4.9,3.1,1.5,0.1,0
 88 | 6.8,2.8,4.8,1.4,1
 89 | 5.7,2.8,4.5,1.3,1
 90 | 6,2.7,5.1,1.6,1
 91 | 5,3.5,1.3,0.3,0
 92 | 6.5,3,5.2,2,2
 93 | 6.1,2.8,4.7,1.2,1
 94 | 5.1,3.5,1.4,0.3,0
 95 | 4.6,3.1,1.5,0.2,0
 96 | 6.5,3,5.8,2.2,2
 97 | 4.6,3.4,1.4,0.3,0
 98 | 4.6,3.2,1.4,0.2,0
 99 | 7.7,2.8,6.7,2,2
100 | 5.9,3.2,4.8,1.8,1
101 | 5.1,3.8,1.6,0.2,0
102 | 4.9,3,1.4,0.2,0
103 | 4.9,2.4,3.3,1,1
104 | 4.5,2.3,1.3,0.3,0
105 | 5.8,2.7,4.1,1,1
106 | 5,3.4,1.6,0.4,0
107 | 5.2,3.4,1.4,0.2,0
108 | 5.3,3.7,1.5,0.2,0
109 | 5,3.6,1.4,0.2,0
110 | 5.6,2.9,3.6,1.3,1
111 | 4.8,3.1,1.6,0.2,0
112 | 6.3,2.7,4.9,1.8,2
113 | 5.7,2.8,4.1,1.3,1
114 | 5,3,1.6,0.2,0
115 | 6.3,3.3,6,2.5,2
116 | 5,3.5,1.6,0.6,0
117 | 5.5,2.6,4.4,1.2,1
118 | 5.7,3,4.2,1.2,1
119 | 4.4,2.9,1.4,0.2,0
120 | 4.8,3,1.4,0.1,0
121 | 5.5,2.4,3.7,1,1
122 | 5.9,3,4.2,1.5,1
123 | 6.9,3.1,5.4,2.1,2
124 | 5.1,3.3,1.7,0.5,0
125 | 6,3.4,4.5,1.6,1
126 | 5.5,2.5,4,1.3,1
127 | 6.2,2.9,4.3,1.3,1
128 | 5.5,4.2,1.4,0.2,0
129 | 6.3,2.8,5.1,1.5,2
130 | 5.6,3,4.1,1.3,1
131 | 6.7,2.5,5.8,1.8,2
132 | 7.1,3,5.9,2.1,2
133 | 4.3,3,1.1,0.1,0
134 | 5.6,2.8,4.9,2,2
135 | 5.5,2.3,4,1.3,1
136 | 6,2.2,4,1,1
137 | 5.1,3.5,1.4,0.2,0
138 | 5.7,2.6,3.5,1,1
139 | 4.8,3.4,1.9,0.2,0
140 | 5.1,3.4,1.5,0.2,0
141 | 5.7,2.5,5,2,2
142 | 5.4,3.4,1.7,0.2,0
143 | 5.6,3,4.5,1.5,1
144 | 6.3,2.9,5.6,1.8,2
145 | 6.3,2.5,4.9,1.5,1
146 | 5.8,2.7,3.9,1.2,1
147 | 6.1,3,4.6,1.4,1
148 | 5.2,4.1,1.5,0.1,0
149 | 6.7,3.1,4.7,1.5,1
150 | 6.7,3.3,5.7,2.5,2
151 | 6.4,2.9,4.3,1.3,1
152 | 


--------------------------------------------------------------------------------
/格式处理数据集/iris.csv:
--------------------------------------------------------------------------------
  1 | Sepal Length,Sepal Width,Petal Length,Petal Width,Species
  2 | 6.4,2.8,5.6,2.2,virginica
  3 | 5,2.3,3.3,1,versicolor
  4 | 4.9,2.5,4.5,1.7,virginica
  5 | 4.9,3.1,1.5,0.1,setosa
  6 | 5.7,3.8,1.7,0.3,setosa
  7 | 4.4,3.2,1.3,0.2,setosa
  8 | 5.4,3.4,1.5,0.4,setosa
  9 | 6.9,3.1,5.1,2.3,virginica
 10 | 6.7,3.1,4.4,1.4,versicolor
 11 | 5.1,3.7,1.5,0.4,setosa
 12 | 5.2,2.7,3.9,1.4,versicolor
 13 | 6.9,3.1,4.9,1.5,versicolor
 14 | 5.8,4,1.2,0.2,setosa
 15 | 5.4,3.9,1.7,0.4,setosa
 16 | 7.7,3.8,6.7,2.2,virginica
 17 | 6.3,3.3,4.7,1.6,versicolor
 18 | 6.8,3.2,5.9,2.3,virginica
 19 | 7.6,3,6.6,2.1,virginica
 20 | 6.4,3.2,5.3,2.3,virginica
 21 | 5.7,4.4,1.5,0.4,setosa
 22 | 6.7,3.3,5.7,2.1,virginica
 23 | 6.4,2.8,5.6,2.1,virginica
 24 | 5.4,3.9,1.3,0.4,setosa
 25 | 6.1,2.6,5.6,1.4,virginica
 26 | 7.2,3,5.8,1.6,virginica
 27 | 5.2,3.5,1.5,0.2,setosa
 28 | 5.8,2.6,4,1.2,versicolor
 29 | 5.9,3,5.1,1.8,virginica
 30 | 5.4,3,4.5,1.5,versicolor
 31 | 6.7,3,5,1.7,versicolor
 32 | 6.3,2.3,4.4,1.3,versicolor
 33 | 5.1,2.5,3,1.1,versicolor
 34 | 6.4,3.2,4.5,1.5,versicolor
 35 | 6.8,3,5.5,2.1,virginica
 36 | 6.2,2.8,4.8,1.8,virginica
 37 | 6.9,3.2,5.7,2.3,virginica
 38 | 6.5,3.2,5.1,2,virginica
 39 | 5.8,2.8,5.1,2.4,virginica
 40 | 5.1,3.8,1.5,0.3,setosa
 41 | 4.8,3,1.4,0.3,setosa
 42 | 7.9,3.8,6.4,2,virginica
 43 | 5.8,2.7,5.1,1.9,virginica
 44 | 6.7,3,5.2,2.3,virginica
 45 | 5.1,3.8,1.9,0.4,setosa
 46 | 4.7,3.2,1.6,0.2,setosa
 47 | 6,2.2,5,1.5,virginica
 48 | 4.8,3.4,1.6,0.2,setosa
 49 | 7.7,2.6,6.9,2.3,virginica
 50 | 4.6,3.6,1,0.2,setosa
 51 | 7.2,3.2,6,1.8,virginica
 52 | 5,3.3,1.4,0.2,setosa
 53 | 6.6,3,4.4,1.4,versicolor
 54 | 6.1,2.8,4,1.3,versicolor
 55 | 5,3.2,1.2,0.2,setosa
 56 | 7,3.2,4.7,1.4,versicolor
 57 | 6,3,4.8,1.8,virginica
 58 | 7.4,2.8,6.1,1.9,virginica
 59 | 5.8,2.7,5.1,1.9,virginica
 60 | 6.2,3.4,5.4,2.3,virginica
 61 | 5,2,3.5,1,versicolor
 62 | 5.6,2.5,3.9,1.1,versicolor
 63 | 6.7,3.1,5.6,2.4,virginica
 64 | 6.3,2.5,5,1.9,virginica
 65 | 6.4,3.1,5.5,1.8,virginica
 66 | 6.2,2.2,4.5,1.5,versicolor
 67 | 7.3,2.9,6.3,1.8,virginica
 68 | 4.4,3,1.3,0.2,setosa
 69 | 7.2,3.6,6.1,2.5,virginica
 70 | 6.5,3,5.5,1.8,virginica
 71 | 5,3.4,1.5,0.2,setosa
 72 | 4.7,3.2,1.3,0.2,setosa
 73 | 6.6,2.9,4.6,1.3,versicolor
 74 | 5.5,3.5,1.3,0.2,setosa
 75 | 7.7,3,6.1,2.3,virginica
 76 | 6.1,3,4.9,1.8,virginica
 77 | 4.9,3.1,1.5,0.1,setosa
 78 | 5.5,2.4,3.8,1.1,versicolor
 79 | 5.7,2.9,4.2,1.3,versicolor
 80 | 6,2.9,4.5,1.5,versicolor
 81 | 6.4,2.7,5.3,1.9,virginica
 82 | 5.4,3.7,1.5,0.2,setosa
 83 | 6.1,2.9,4.7,1.4,versicolor
 84 | 6.5,2.8,4.6,1.5,versicolor
 85 | 5.6,2.7,4.2,1.3,versicolor
 86 | 6.3,3.4,5.6,2.4,virginica
 87 | 4.9,3.1,1.5,0.1,setosa
 88 | 6.8,2.8,4.8,1.4,versicolor
 89 | 5.7,2.8,4.5,1.3,versicolor
 90 | 6,2.7,5.1,1.6,versicolor
 91 | 5,3.5,1.3,0.3,setosa
 92 | 6.5,3,5.2,2,virginica
 93 | 6.1,2.8,4.7,1.2,versicolor
 94 | 5.1,3.5,1.4,0.3,setosa
 95 | 4.6,3.1,1.5,0.2,setosa
 96 | 6.5,3,5.8,2.2,virginica
 97 | 4.6,3.4,1.4,0.3,setosa
 98 | 4.6,3.2,1.4,0.2,setosa
 99 | 7.7,2.8,6.7,2,virginica
100 | 5.9,3.2,4.8,1.8,versicolor
101 | 5.1,3.8,1.6,0.2,setosa
102 | 4.9,3,1.4,0.2,setosa
103 | 4.9,2.4,3.3,1,versicolor
104 | 4.5,2.3,1.3,0.3,setosa
105 | 5.8,2.7,4.1,1,versicolor
106 | 5,3.4,1.6,0.4,setosa
107 | 5.2,3.4,1.4,0.2,setosa
108 | 5.3,3.7,1.5,0.2,setosa
109 | 5,3.6,1.4,0.2,setosa
110 | 5.6,2.9,3.6,1.3,versicolor
111 | 4.8,3.1,1.6,0.2,setosa
112 | 6.3,2.7,4.9,1.8,virginica
113 | 5.7,2.8,4.1,1.3,versicolor
114 | 5,3,1.6,0.2,setosa
115 | 6.3,3.3,6,2.5,virginica
116 | 5,3.5,1.6,0.6,setosa
117 | 5.5,2.6,4.4,1.2,versicolor
118 | 5.7,3,4.2,1.2,versicolor
119 | 4.4,2.9,1.4,0.2,setosa
120 | 4.8,3,1.4,0.1,setosa
121 | 5.5,2.4,3.7,1,versicolor
122 | 5.9,3,4.2,1.5,versicolor
123 | 6.9,3.1,5.4,2.1,virginica
124 | 5.1,3.3,1.7,0.5,setosa
125 | 6,3.4,4.5,1.6,versicolor
126 | 5.5,2.5,4,1.3,versicolor
127 | 6.2,2.9,4.3,1.3,versicolor
128 | 5.5,4.2,1.4,0.2,setosa
129 | 6.3,2.8,5.1,1.5,virginica
130 | 5.6,3,4.1,1.3,versicolor
131 | 6.7,2.5,5.8,1.8,virginica
132 | 7.1,3,5.9,2.1,virginica
133 | 4.3,3,1.1,0.1,setosa
134 | 5.6,2.8,4.9,2,virginica
135 | 5.5,2.3,4,1.3,versicolor
136 | 6,2.2,4,1,versicolor
137 | 5.1,3.5,1.4,0.2,setosa
138 | 5.7,2.6,3.5,1,versicolor
139 | 4.8,3.4,1.9,0.2,setosa
140 | 5.1,3.4,1.5,0.2,setosa
141 | 5.7,2.5,5,2,virginica
142 | 5.4,3.4,1.7,0.2,setosa
143 | 5.6,3,4.5,1.5,versicolor
144 | 6.3,2.9,5.6,1.8,virginica
145 | 6.3,2.5,4.9,1.5,versicolor
146 | 5.8,2.7,3.9,1.2,versicolor
147 | 6.1,3,4.6,1.4,versicolor
148 | 5.2,4.1,1.5,0.1,setosa
149 | 6.7,3.1,4.7,1.5,versicolor
150 | 6.7,3.3,5.7,2.5,virginica
151 | 6.4,2.9,4.3,1.3,versicolor
152 | 


--------------------------------------------------------------------------------