└── new-tensorflow
├── .idea
├── misc.xml
├── modules.xml
├── new-tensorflow.iml
└── workspace.xml
├── CNNTensorflow.py
├── CNNTensorflowValidate.py
├── __pycache__
├── input_data.cpython-35.pyc
└── model.cpython-35.pyc
├── evaluateDisease.py
├── input_data.py
├── model.py
└── training.py
/new-tensorflow/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/new-tensorflow/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/new-tensorflow/.idea/new-tensorflow.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/new-tensorflow/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 | 1541996025007
241 |
242 |
243 | 1541996025007
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
--------------------------------------------------------------------------------
/new-tensorflow/CNNTensorflow.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import tensorflow as tf
4 | import numpy as np
5 | import time
6 | from skimage import io, transform
7 |
8 | #数据集地址
9 | path='E:/data/datasets/dieases_photos/'
10 | #模型保存地址
11 | model_path='E:/data/model/dieases/model.ckpt'
12 |
13 | #将所有的图片resize成100*100
14 | w = 100
15 | h = 100
16 | c = 3
17 |
18 |
19 | #读取图片
20 | def read_img(path):
21 | cate=[path+x for x in os.listdir(path) if os.path.isdir(path+x)] #给文件夹排序号,0是0文件夹,1是1文件夹...
22 | imgs=[]
23 | labels=[]
24 | for idx,folder in enumerate(cate):
25 | for im in glob.glob(folder+'/*.jpg'):
26 | print('reading the images:%s'% (im))
27 | img = io.imread(im)
28 | img = transform.resize(img, (w, h, c))
29 | imgs.append(img)
30 | labels.append(idx)
31 | print('reading the idx:%s' % (idx))
32 | return np.asarray(imgs, np.float32), np.asarray(labels, np.int32)
33 |
34 |
35 | # 样本和标签的读入与分类
36 | data, label = read_img(path)
37 |
38 | #打乱顺序
39 | num_example = data.shape[0]
40 | arr = np.arange(num_example)
41 | np.random.shuffle(arr)
42 | data = data[arr]
43 | label = label[arr]
44 |
45 |
46 | #将所有数据分为训练集和验证集
47 | ratio = 0.8
48 | s = np.int(num_example*ratio)
49 | x_train = data[:s]
50 | y_train = label[:s]
51 | x_val = data[s:]
52 | y_val = label[s:]
53 |
54 | #-----------------构建CNN神经网络模型----------------------
55 | #数据占位符
56 | x = tf.placeholder(tf.float32, shape=[None, w, h, c], name='x')
57 | y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_')
58 |
59 |
60 | def inference(input_tensor, train, regularizer):
61 | with tf.variable_scope('layer1-conv1'): # 开启一个联系上下文的命名空间,空间名是layer1-conv1,在tf.get_variable可以顺利调用
62 | conv1_weights = tf.get_variable("weight", [5, 5, 3, 32], initializer=tf.truncated_normal_initializer(stddev=0.1))
63 | # 上面一行命令是生成卷积核:是一个tansor类型,具体含义是[卷积核的高度,卷积核的宽度,图像通道数,卷积核个数],要求类型与参数input相同,有一个地方需要注意,第三维in_channels,就是参数input的第四维
64 | # tf.truncated_normal_initializer:从截断的正态分布中输出随机值。这是神经网络权重和过滤器的推荐初始值。
65 | # mean:一个python标量或一个标量张量。要生成的随机值的均值。
66 | # stddev:一个python标量或一个标量张量。要生成的随机值的标准偏差。
67 | # seed:一个Python整数。用于创建随机种子。查看 tf.set_random_seed 行为。
68 | # dtype:数据类型。只支持浮点类型。
69 |
70 | conv1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0))
71 | conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
72 | # 除去name参数用以指定该操作的name,与方法有关的一共五个参数:
73 | # 第一个参数input:指需要做卷积的输入图像,它要求是一个Tensor,具有[batch, in_height, in_width, in_channels]这样的shape,
74 | # 具体含义是[训练时一个batch的图片数量, 图片高度, 图片宽度, 图像通道数],注意这是一个4维的Tensor,要求类型为float32和float64其中之一
75 |
76 | # 第二个参数filter:相当于CNN中的卷积核,它要求是一个Tensor,具有[filter_height, filter_width, in_channels, out_channels]这样的shape,
77 | # 具体含义是[卷积核的高度,卷积核的宽度,图像通道数,卷积核个数],要求类型与参数input相同,有一个地方需要注意,第三维in_channels,就是参数input的第四维
78 |
79 | # 第三个参数strides:卷积时在图像每一维的步长,这是一个一维的向量,长度4
80 | # 第四个参数padding:string类型的量,只能是"SAME","VALID"其中之一,这个值决定了不同的卷积方式(后面会介绍)
81 | # 第五个参数:use_cudnn_on_gpu:bool类型,是否使用cudnn加速,默认为true#
82 | # 结果返回一个Tensor,这个输出,就是我们常说的feature map特征图,shape仍然是[batch, height, width, channels]这种形式。
83 |
84 | relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
85 | # 激活函数,非最大值置零
86 | # 这个函数的作用是计算激活函数 relu,即 max(features, 0)。即将矩阵中每行的非最大值置0。
87 |
88 | with tf.name_scope("layer2-pool1"):
89 | pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
90 | # tf.nn.max_pool(value, ksize, strides, padding, name=None)
91 | # 参数是四个,和卷积很类似:
92 | # 第一个参数value:需要池化的输入,一般池化层接在卷积层后面,所以输入通常是feature map,依然是[batch, height, width, channels]这样的shape
93 | # 第二个参数ksize:池化窗口的大小,取一个四维向量,一般是[1, height, width, 1],因为我们不想在batch和channels上做池化,所以这两个维度设为了1
94 | # 第三个参数strides:和卷积类似,窗口在每一个维度上滑动的步长,一般也是[1, stride,stride, 1]
95 | # 第四个参数padding:和卷积类似,可以取'VALID' 或者'SAME'
96 | # 返回一个Tensor,类型不变,shape仍然是[batch, height, width, channels]这种形式
97 |
98 |
99 | with tf.variable_scope("layer3-conv2"):
100 | conv2_weights = tf.get_variable("weight", [5, 5, 32, 64], initializer=tf.truncated_normal_initializer(stddev=0.1))
101 | # [5,5,32,64] 5表示本次卷积核高宽,32表示经过上一层32个卷积核的卷积,我们有了32张特征图,64表明本次会有64个卷积核卷积
102 | conv2_biases = tf.get_variable("bias", [64], initializer=tf.constant_initializer(0.0))
103 | conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
104 | relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
105 |
106 | with tf.name_scope("layer4-pool2"):
107 | pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
108 |
109 | with tf.variable_scope("layer5-conv3"):
110 | conv3_weights = tf.get_variable("weight", [3, 3, 64, 128], initializer=tf.truncated_normal_initializer(stddev=0.1))
111 | conv3_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0))
112 | conv3 = tf.nn.conv2d(pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
113 | relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases))
114 |
115 | with tf.name_scope("layer6-pool3"):
116 | pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
117 |
118 | with tf.variable_scope("layer7-conv4"):
119 | conv4_weights = tf.get_variable("weight", [3, 3, 128, 128], initializer=tf.truncated_normal_initializer(stddev=0.1))
120 | conv4_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0))
121 | conv4 = tf.nn.conv2d(pool3, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
122 | relu4 = tf.nn.relu(tf.nn.bias_add(conv4, conv4_biases))
123 |
124 | with tf.name_scope("layer8-pool4"):
125 | pool4 = tf.nn.max_pool(relu4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
126 | nodes = 6*6*128
127 | reshaped = tf.reshape(pool4, [-1, nodes])
128 | # tf.reshape(tensor(矩阵),shape(维度),name=None)
129 | # 改变一个矩阵的维度,可以从多维变到一维,也可以从一维变到多维
130 | # 其中,-1参数表示不确定,可由函数自己计算出来,原矩阵/一个维度=另一个维度
131 |
132 | with tf.variable_scope('layer9-fc1'):
133 | fc1_weights = tf.get_variable("weight", [nodes, 1024],
134 | initializer=tf.truncated_normal_initializer(stddev=0.1))
135 | if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
136 | # tf.add_to_collection:把变量放入一个集合,把很多变量变成一个列表
137 | # 在深度学习中,通常用这几个函数存放不同层中的权值和偏置参数,
138 | # 也就是把所有可学习参数利用tf.contrib.layers.l2_regularizer(regular_num)(w)得到norm后,都放到’regular’的列表中作为正则项,
139 | # 然后使用tf.add_n函数将他们和原本的loss相加,得到含有正则的loss。
140 |
141 | fc1_biases = tf.get_variable("bias", [1024], initializer=tf.constant_initializer(0.1))
142 |
143 | fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases) # MCP模型
144 | # tf.nn.dropout是TensorFlow里面为了防止或减轻过拟合而使用的函数,它一般用在全连接层
145 | if train: fc1 = tf.nn.dropout(fc1, 0.5)
146 | # tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None,name=None)
147 | # 上面方法中常用的是前两个参数:
148 | # 第一个参数x:指输入的数据。
149 | # 第二个参数keep_prob: 设置神经元被选中的概率,在初始化时keep_prob是一个占位符, keep_prob = tf.placeholder(tf.float32) 。
150 | # tensorflow在run时设置keep_prob具体的值,例如keep_prob: 0.5
151 | # 第五个参数name:指定该操作的名字
152 |
153 | with tf.variable_scope('layer10-fc2'):
154 | fc2_weights = tf.get_variable("weight", [1024, 512],
155 | initializer=tf.truncated_normal_initializer(stddev=0.1))
156 | if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
157 | fc2_biases = tf.get_variable("bias", [512], initializer=tf.constant_initializer(0.1))
158 |
159 | fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_biases)
160 | if train: fc2 = tf.nn.dropout(fc2, 0.5)
161 |
162 | with tf.variable_scope('layer11-fc3'):
163 | fc3_weights = tf.get_variable("weight", [512, 4],
164 | initializer=tf.truncated_normal_initializer(stddev=0.1))
165 | if regularizer != None: tf.add_to_collection('losses', regularizer(fc3_weights))
166 | fc3_biases = tf.get_variable("bias", [4], initializer=tf.constant_initializer(0.1))
167 | logit = tf.matmul(fc2, fc3_weights) + fc3_biases
168 |
169 | return logit
170 |
171 |
172 | # 定义规则化方法,并计算网络激活值
173 | regularizer = tf.contrib.layers.l2_regularizer(0.0001)
174 | #两种思想都是希望限制权重的大小,使得模型不能拟合训练数据中的随机噪点。(两种思想,就是两个公式,因为是图,就没贴出来)
175 | #两种方式在TensorFlow中的提供的函数为:
176 | #tf.contrib.layers.l1_regularizer(scale, scope=None) 其中scale为权值(这个权值会乘以w的值,MCP的内个w,江湖传闻w和过拟合值有说不清的关系)
177 | #tf.contrib.layers.l2_regularizer(scale, scope=None)
178 |
179 | #x是输入的图像的tansor,logits是经过卷积、池化、全连接处理处理过的数据
180 | logits = inference(x, False, regularizer)
181 |
182 |
183 |
184 | #(小处理)将logits乘以1赋值给logits_eval,定义name,方便在后续调用模型时通过tensor名字调用输出tensor
185 | b = tf.constant(value=1,dtype=tf.float32)
186 | logits_eval = tf.multiply(logits, b, name='logits_eval')
187 |
188 | #计算logits 和 labels 之间的稀疏softmax 交叉熵 这个是计算误差率
189 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_)
190 | # tf.train.AdamOptimizer 优化器中的梯度优化函数(作用是依据learning_rate步长,来最小化loss误差率)
191 | train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
192 | #tf.argmax(vector, 1):返回的是vector中的最大值的索引号,如果vector是一个向量,那就返回一个值,如果是一个矩阵,那就返回一个向量,
193 | #这个向量的每一个维度都是相对应矩阵行的最大值元素的索引号
194 | correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_)
195 | # 求平均值tf.reduce_mean(input_tensor, reduction_indices=None, keep_dims=False, name=None)
196 | # 参数1--input_tensor:待求值的tensor。
197 | # 参数2--reduction_indices:在哪一维上求解
198 | acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
199 |
200 |
201 | #定义一个函数,按批次取数据
202 | #四个参数是:训练数据,测试数据,用户输入的每批训练的数据数量,shuffle是洗牌的意思,这里表示是否开始随机
203 | def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
204 | assert len(inputs) == len(targets)#assert断言机制,如果后面的表达式为真,则直接抛出异常。在这里的意思,大概就是:样本和标签数量要对上
205 | if shuffle:
206 | # 生成一个np.arange可迭代长度是len(训练数据),也就是训练数据第一维数据的数量(就是训练数据的数量,训练图片的数量)
207 | indices = np.arange(len(inputs))
208 | # np.random.shuffle打乱arange中的顺序,使其随机循序化,如果是数组,只打乱第一维
209 | np.random.shuffle(indices)
210 | # 这个range(初始值为0,终止值为[训练图片数-每批训练图片数+1],步长是[每批训练图片数]):例(0[起始值],80[训练图片数]-20[每批训练图片数],20[每批训练图片数]),也就是(0,60,20)当循环到60时,会加20到达80的训练样本
211 | for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
212 | if shuffle:
213 | # 如果shuffle为真,将indices列表,切片(一批)赋值给excerpt
214 | excerpt = indices[start_idx:start_idx + batch_size]
215 | else:
216 | excerpt = slice(start_idx, start_idx + batch_size)
217 | yield inputs[excerpt], targets[excerpt]
218 | # yield常见用法:该关键字用于函数中会把函数包装为generator。然后可以对该generator进行迭代: for x in fun(param).
219 | # 按照我的理解,可以把yield的功效理解为暂停和播放。
220 | # 在一个函数中,程序执行到yield语句的时候,程序暂停,返回yield后面表达式的值,在下一次调用的时候,从yield语句暂停的地方继续执行,如此循环,直到函数执行完。
221 | # 此处,就是返回每次循环中 从inputs和targets列表中,截取的 经过上面slice()切片函数定义过的 数据.
222 | # (最后的shuffle变量,决定了样本是否随机化)
223 |
224 |
225 | #训练和测试数据,可将n_epoch设置更大一些
226 | n_epoch = 5
227 | batch_size = 64
228 | saver = tf.train.Saver()
229 | sess = tf.Session()
230 | sess.run(tf.global_variables_initializer())
231 | count = 0
232 | # 训练多少遍,FLAGS.epoch是用户输入的,比如是10,也就是把样本遍历10遍
233 | for epoch in range(n_epoch):
234 | start_time = time.time()
235 | count += 1
236 | ### 单次训练部分 此处for循环结束之日,就是训练样本遍历了一遍之时
237 | train_loss, train_acc, n_batch = 0, 0, 0
238 | for x_train_a, y_train_a in minibatches(x_train, y_train, batch_size, shuffle=True):
239 | _, err, ac = sess.run([train_op, loss, acc], feed_dict={x: x_train_a, y_: y_train_a})
240 | train_loss += err;
241 | train_acc += ac;
242 | n_batch += 1
243 | print(" train loss: %f" % (np.sum(train_loss) / n_batch))
244 | print(" train acc: %f" % (np.sum(train_acc) / n_batch))
245 |
246 | ### 单次验证部分 具体和上面雷同,下面是计算的测试数据,不用梯度优化
247 | val_loss, val_acc, n_batch = 0, 0, 0
248 | for x_val_a, y_val_a in minibatches(x_val, y_val, batch_size, shuffle=False):
249 | err, ac = sess.run([loss, acc], feed_dict={x: x_val_a, y_: y_val_a})
250 | val_loss += err;
251 | val_acc += ac;
252 | n_batch += 1
253 | print(" validation loss: %f" % (np.sum(val_loss) / n_batch))
254 | print(" validation acc: %f" % (np.sum(val_acc) / n_batch))
255 |
256 | print("这是第 %d 次batch测试和验证" % count)
257 | saver.save(sess, model_path)
258 | sess.close()
259 |
--------------------------------------------------------------------------------
/new-tensorflow/CNNTensorflowValidate.py:
--------------------------------------------------------------------------------
1 | from skimage import io, transform
2 | import tensorflow as tf
3 | import numpy as np
4 |
5 |
6 | path1 = "E:/data/datasets/dieases_photos/apple2_ban/apple16.jpg"
7 | path2 = "E:/data/datasets/dieases_photos/apple_lun/apple1.jpg"
8 |
9 |
10 | flower_dict = {0: '斑点落叶病', 1: '红蜘蛛', 2: '炭除病', 3: '轮纹病'}
11 |
12 | w = 100
13 | h = 100
14 | c = 3
15 |
16 |
17 | def addElementToDict(element):
18 | flower_dict.update(element)
19 | return flower_dict
20 |
21 |
22 | def read_one_image(path):
23 | img = io.imread(path)
24 | img = transform.resize(img, (w, h))
25 | return np.asarray(img)
26 |
27 |
28 | with tf.Session() as sess:
29 | data = []
30 | data1 = read_one_image(path1)
31 | data2 = read_one_image(path2)
32 | data.append(data1)
33 | data.append(data2)
34 |
35 |
36 | saver = tf.train.import_meta_graph('E:/data/model/dieases/model.ckpt.meta')
37 | saver.restore(sess, tf.train.latest_checkpoint('E:/data/model/dieases/'))
38 |
39 | graph = tf.get_default_graph()
40 | x = graph.get_tensor_by_name("x:0")
41 | feed_dict = {x: data}
42 |
43 | logits = graph.get_tensor_by_name("logits_eval:0")
44 |
45 | classification_result = sess.run(logits,feed_dict)
46 |
47 | #打印出预测矩阵
48 | print(classification_result)
49 | #打印出预测矩阵每一行最大值的索引
50 | print(tf.argmax(classification_result,1).eval())
51 | #根据索引通过字典对应花的分类
52 | output = []
53 | output = tf.argmax(classification_result,1).eval()
54 | for i in range(len(output)):
55 | print("第", i+1, "朵花预测:"+flower_dict[output[i]])
56 |
--------------------------------------------------------------------------------
/new-tensorflow/__pycache__/input_data.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhoubill/Tensorflow-cnn/fa90494d64393796dcca27988cbdd3d7eefc8ce1/new-tensorflow/__pycache__/input_data.cpython-35.pyc
--------------------------------------------------------------------------------
/new-tensorflow/__pycache__/model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhoubill/Tensorflow-cnn/fa90494d64393796dcca27988cbdd3d7eefc8ce1/new-tensorflow/__pycache__/model.cpython-35.pyc
--------------------------------------------------------------------------------
/new-tensorflow/evaluateDisease.py:
--------------------------------------------------------------------------------
1 | #coding=utf-8
2 | import tensorflow as tf
3 | from PIL import Image
4 | import matplotlib.pyplot as plt
5 | import input_data
6 | import numpy as np
7 | import model
8 | import os
9 |
10 | #从训练集中选取一张图片
11 | def get_one_image(train):
12 | files = os.listdir(train)
13 | n = len(files)
14 | ind = np.random.randint(0,n)
15 | img_dir = os.path.join(train,files[ind])
16 | image = Image.open(img_dir)
17 | plt.imshow(image)
18 | plt.show()
19 | image = image.resize([208, 208])
20 | image = np.array(image)
21 | return image
22 |
23 | def get_image(imagepath):
24 | image = Image.open(imagepath).convert("RGB")
25 | image = image.resize([208, 208])
26 | image = np.array(image)
27 | return image
28 |
29 |
30 | def evaluate_image(imagepath,nclass):
31 | image_array = get_image(imagepath)
32 | with tf.Graph().as_default():
33 | BATCH_SIZE = 1 # 因为只读取一副图片 所以batch 设置为1
34 | #N_CLASSES = 4 # 2个输出神经元,[1,0] 或者 [0,1]猫和狗的概率
35 | # 转化图片格式
36 | image = tf.cast(image_array, tf.float32)
37 | # 图片标准化
38 | image = tf.image.per_image_standardization(image)
39 | # 图片原来是三维的 [208, 208, 3] 重新定义图片形状 改为一个4D 四维的 tensor
40 | image = tf.reshape(image, [1,208,208, 3])
41 | logit = model.inference(image, BATCH_SIZE, nclass)
42 | # 因为 inference 的返回没有用激活函数,所以在这里对结果用softmax 激活
43 | logit = tf.nn.softmax(logit)
44 |
45 | # 用最原始的输入数据的方式向模型输入数据 placeholder
46 | x = tf.placeholder(tf.float32, shape=[208, 208, 3])
47 |
48 | # 我门存放模型的路径
49 | logs_train_dir = 'E:/data/model/dieases/'
50 | # 定义saver
51 | saver = tf.train.Saver()
52 |
53 | with tf.Session() as sess:
54 |
55 | print("从指定的路径中加载tensorflow模型。。。。")
56 | # 将模型加载到sess 中
57 | ckpt = tf.train.get_checkpoint_state(logs_train_dir)
58 | if ckpt and ckpt.model_checkpoint_path:
59 | global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
60 | saver.restore(sess, ckpt.model_checkpoint_path)
61 | print('模型加载成功, 训练的步数为 %s' % global_step)
62 | else:
63 | print('模型加载失败,,,文件没有找到')
64 | # 将图片输入到模型计算
65 | prediction = sess.run(logit, feed_dict={x: image_array})
66 | # 获取输出结果中最大概率的索引
67 | max_index = np.argmax(prediction)
68 | print(prediction)
69 | print('图片的原始路径是: %s 图片分类概率最大的标签是 %d' % (imagepath ,max_index))
70 |
71 |
72 | def evaluate_one_image():
73 | train = 'D:/tensorflow/picture/picture/test/'
74 |
75 | # 获取图片路径集和标签集
76 | image_array = get_one_image(train)
77 |
78 | with tf.Graph().as_default():
79 | BATCH_SIZE = 1 # 因为只读取一副图片 所以batch 设置为1
80 | N_CLASSES = 4 # 2个输出神经元,[1,0] 或者 [0,1]猫和狗的概率
81 | # 转化图片格式
82 | image = tf.cast(image_array, tf.float32)
83 | # 图片标准化
84 | image = tf.image.per_image_standardization(image)
85 | # 图片原来是三维的 [208, 208, 3] 重新定义图片形状 改为一个4D 四维的 tensor
86 | image = tf.reshape(image, [1, 208, 208, 3])
87 | logit = model.inference(image, BATCH_SIZE, N_CLASSES)
88 | # 因为 inference 的返回没有用激活函数,所以在这里对结果用softmax 激活
89 | logit = tf.nn.softmax(logit)
90 |
91 | # 用最原始的输入数据的方式向模型输入数据 placeholder
92 | x = tf.placeholder(tf.float32, shape=[208, 208, 3])
93 |
94 | # 我门存放模型的路径
95 | logs_train_dir = 'E:/data/model/dieases'
96 | # 定义saver
97 | saver = tf.train.Saver()
98 |
99 | with tf.Session() as sess:
100 |
101 | print("从指定的路径中加载tensorflow模型。。。。")
102 | # 将模型加载到sess 中
103 | ckpt = tf.train.get_checkpoint_state(logs_train_dir)
104 | if ckpt and ckpt.model_checkpoint_path:
105 | global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
106 | saver.restore(sess, ckpt.model_checkpoint_path)
107 | print('模型加载成功, 训练的步数为 %s' % global_step)
108 | else:
109 | print('模型加载失败,,,文件没有找到')
110 | # 将图片输入到模型计算
111 | prediction = sess.run(logit, feed_dict={x: image_array})
112 | # 获取输出结果中最大概率的索引
113 | max_index = np.argmax(prediction)
114 | print(prediction)
115 | if max_index==0:
116 | print('猫的图片相似度概率 %.6f' %prediction[:, 0])
117 | elif max_index==1:
118 | print('鸭的图片相似度概率 %.6f' %prediction[:, 1])
119 | else:
120 | print('狗的图片相似度概率 %.6f' % prediction[:, 2])
121 | # 测试
122 | #evaluate_one_imge()
123 | imagedir = 'E:/opencv-image1/apple1/'
124 | list = os.listdir(imagedir) #列出文件夹下所有的目录与文件
125 | for i in range(0,len(list)):
126 | path = os.path.join(imagedir,list[i])
127 | evaluate_image(path,4)
--------------------------------------------------------------------------------
/new-tensorflow/input_data.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 | import numpy as np
4 | import glob
5 |
6 | def get_files(file_dir):
7 | cats = []
8 | label_cats = []
9 | dogs = []
10 | label_dogs = []
11 | dags = []
12 | label_dags = []
13 | apples = []
14 | label_apples = []
15 | for file in os.listdir(file_dir):
16 | name = file.split(sep='.')
17 | if 'cat' in name[0]:
18 | cats.append(file_dir + file)
19 | label_cats.append(0)
20 | elif 'dag' in name[0]:
21 | dags.append(file_dir + file)
22 | label_dags.append(1)
23 | elif 'apple' in name[0]:
24 | apples.append(file_dir + file)
25 | label_apples.append(2)
26 | else:
27 | if 'dog' in name[0]:
28 | dogs.append(file_dir + file)
29 | label_dogs.append(3)
30 | image_list = np.hstack((cats,dogs,dags,apples))
31 | label_list = np.hstack((label_cats,label_dogs,label_dags,label_apples))
32 | # print('There are %d cats\nThere are %d dogs' %(len(cats), len(dogs)))
33 | # 多个种类分别的时候需要把多个种类放在一起,打乱顺序,这里不需要
34 |
35 | # 把标签和图片都放倒一个 temp 中 然后打乱顺序,然后取出来
36 | temp = np.array([image_list,label_list])
37 | temp = temp.transpose()
38 | # 打乱顺序
39 | np.random.shuffle(temp)
40 |
41 | # 取出第一个元素作为 image 第二个元素作为 label
42 | image_list = list(temp[:,0])
43 | label_list = list(temp[:,1])
44 | label_list = [int(i) for i in label_list]
45 | return image_list,label_list
46 |
47 | #读取图片
48 | def read_img(path):
49 | cate=[path+x for x in os.listdir(path) if os.path.isdir(path+x)]
50 | images = []
51 | imagelable = []
52 | for idx,folder in enumerate(cate):
53 | for im in glob.glob(folder+'/*.jpg'):
54 | images.append(im)
55 | imagelable.append(idx)
56 | imagelist = np.hstack((images))
57 | lableslist = np.hstack((imagelable))
58 | # 把标签和图片都放倒一个 temp 中 然后打乱顺序,然后取出来
59 | temp = np.array([imagelist, lableslist])
60 | temp = temp.transpose()
61 | # 打乱顺序
62 | np.random.shuffle(temp)
63 |
64 | # 取出第一个元素作为 image 第二个元素作为 label
65 | image_list = list(temp[:, 0])
66 | label_list = list(temp[:, 1])
67 | label_list = [int(i) for i in label_list]
68 | return image_list, label_list
69 |
70 |
71 |
72 | # 测试 get_files
73 | # imgs , label = get_files('/Users/yangyibo/GitWork/pythonLean/AI/猫狗识别/testImg/')
74 | # for i in imgs:
75 | # print("img:",i)
76 |
77 | # for i in label:
78 | # print('label:',i)
79 | # 测试 get_files end
80 |
81 |
82 | # image_W ,image_H 指定图片大小,batch_size 每批读取的个数 ,capacity队列中 最多容纳元素的个数
83 | def get_batch(image,label,image_W,image_H,batch_size,capacity):
84 | # 转换数据为 ts 能识别的格式
85 | image = tf.cast(image,tf.string)
86 | label = tf.cast(label, tf.int32)
87 |
88 | # 将image 和 label 放倒队列里
89 | input_queue = tf.train.slice_input_producer([image,label])
90 | label = input_queue[1]
91 | # 读取图片的全部信息
92 | image_contents = tf.read_file(input_queue[0])
93 | # 把图片解码,channels =3 为彩色图片, r,g ,b 黑白图片为 1 ,也可以理解为图片的厚度
94 | image = tf.image.decode_jpeg(image_contents,channels =3)
95 | # 将图片以图片中心进行裁剪或者扩充为 指定的image_W,image_H
96 | #image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
97 | image = tf.image.resize_images(image, [image_W, image_H], method=0)
98 | # 对数据进行标准化,标准化,就是减去它的均值,除以他的方差
99 | image = tf.image.per_image_standardization(image)
100 |
101 | # 生成批次 num_threads 有多少个线程根据电脑配置设置 capacity 队列中 最多容纳图片的个数 tf.train.shuffle_batch 打乱顺序,
102 | image_batch, label_batch = tf.train.batch([image, label],batch_size = batch_size, num_threads = 64, capacity = capacity)
103 |
104 | # 重新定义下 label_batch 的形状
105 | label_batch = tf.reshape(label_batch , [batch_size])
106 | # 转化图片
107 | image_batch = tf.cast(image_batch,tf.float32)
108 | return image_batch, label_batch
109 |
110 |
111 | # test get_batch
112 | # import matplotlib.pyplot as plt
113 | # BATCH_SIZE = 2
114 | # CAPACITY = 256
115 | # IMG_W = 208
116 | # IMG_H = 208
117 |
118 | # train_dir = '/Users/yangyibo/GitWork/pythonLean/AI/猫狗识别/testImg/'
119 |
120 | # image_list, label_list = get_files(train_dir)
121 | # image_batch, label_batch = get_batch(image_list, label_list, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
122 |
123 | # with tf.Session() as sess:
124 | # i = 0
125 | # # Coordinator 和 start_queue_runners 监控 queue 的状态,不停的入队出队
126 | # coord = tf.train.Coordinator()
127 | # threads = tf.train.start_queue_runners(coord=coord)
128 | # # coord.should_stop() 返回 true 时也就是 数据读完了应该调用 coord.request_stop()
129 | # try:
130 | # while not coord.should_stop() and i<1:
131 | # # 测试一个步
132 | # img, label = sess.run([image_batch, label_batch])
133 |
134 | # for j in np.arange(BATCH_SIZE):
135 | # print('label: %d' %label[j])
136 | # # 因为是个4D 的数据所以第一个为 索引 其他的为冒号就行了
137 | # plt.imshow(img[j,:,:,:])
138 | # plt.show()
139 | # i+=1
140 | # # 队列中没有数据
141 | # except tf.errors.OutOfRangeError:
142 | # print('done!')
143 | # finally:
144 | # coord.request_stop()
145 | # coord.join(threads)
146 | # sess.close()
147 |
148 |
149 |
--------------------------------------------------------------------------------
/new-tensorflow/model.py:
--------------------------------------------------------------------------------
1 | #coding=utf-8
2 | import tensorflow as tf
3 | # 结构
4 | # conv1 卷积层 1
5 | # pooling1_lrn 池化层 1
6 | # conv2 卷积层 2
7 | # pooling2_lrn 池化层 2
8 | # local3 全连接层 1
9 | # local4 全连接层 2
10 | # softmax 全连接层 3
11 | def inference(images, batch_size, n_classes):
12 |
13 | with tf.variable_scope('conv1') as scope:
14 | # 卷积盒的为 3*3 的卷积盒,图片厚度是3,输出是16个featuremap
15 | weights = tf.get_variable('weights',
16 | shape=[3, 3, 3, 16],
17 | dtype=tf.float32,
18 | initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
19 | biases = tf.get_variable('biases',
20 | shape=[16],
21 | dtype=tf.float32,
22 | initializer=tf.constant_initializer(0.1))
23 | conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')
24 | pre_activation = tf.nn.bias_add(conv, biases)
25 | conv1 = tf.nn.relu(pre_activation, name=scope.name)
26 |
27 | with tf.variable_scope('pooling1_lrn') as scope:
28 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1')
29 | norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
30 |
31 | with tf.variable_scope('conv2') as scope:
32 | weights = tf.get_variable('weights',
33 | shape=[3, 3, 16, 16],
34 | dtype=tf.float32,
35 | initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
36 | biases = tf.get_variable('biases',
37 | shape=[16],
38 | dtype=tf.float32,
39 | initializer=tf.constant_initializer(0.1))
40 | conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME')
41 | pre_activation = tf.nn.bias_add(conv, biases)
42 | conv2 = tf.nn.relu(pre_activation, name='conv2')
43 |
44 | # pool2 and norm2
45 | with tf.variable_scope('pooling2_lrn') as scope:
46 | norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
47 | pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pooling2')
48 |
49 | with tf.variable_scope('local3') as scope:
50 | reshape = tf.reshape(pool2, shape=[batch_size, -1])
51 | dim = reshape.get_shape()[1].value
52 | weights = tf.get_variable('weights',
53 | shape=[dim, 128],
54 | dtype=tf.float32,
55 | initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
56 | biases = tf.get_variable('biases',
57 | shape=[128],
58 | dtype=tf.float32,
59 | initializer=tf.constant_initializer(0.1))
60 | local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
61 |
62 | # local4
63 | with tf.variable_scope('local4') as scope:
64 | weights = tf.get_variable('weights',
65 | shape=[128, 128],
66 | dtype=tf.float32,
67 | initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
68 | biases = tf.get_variable('biases',
69 | shape=[128],
70 | dtype=tf.float32,
71 | initializer=tf.constant_initializer(0.1))
72 | local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')
73 |
74 | # softmax
75 | with tf.variable_scope('softmax_linear') as scope:
76 | weights = tf.get_variable('softmax_linear',
77 | shape=[128, n_classes],
78 | dtype=tf.float32,
79 | initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
80 | biases = tf.get_variable('biases',
81 | shape=[n_classes],
82 | dtype=tf.float32,
83 | initializer=tf.constant_initializer(0.1))
84 | softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')
85 |
86 | return softmax_linear
87 |
88 |
89 |
90 | def losses(logits, labels):
91 | with tf.variable_scope('loss') as scope:
92 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits \
93 | (logits=logits, labels=labels, name='xentropy_per_example')
94 | loss = tf.reduce_mean(cross_entropy, name='loss')
95 | tf.summary.scalar(scope.name + '/loss', loss)
96 | return loss
97 |
98 | def trainning(loss, learning_rate):
99 | with tf.name_scope('optimizer'):
100 | optimizer = tf.train.AdamOptimizer(learning_rate= learning_rate)
101 | global_step = tf.Variable(0, name='global_step', trainable=False)
102 | train_op = optimizer.minimize(loss, global_step= global_step)
103 | return train_op
104 |
105 | def evaluation(logits, labels):
106 | with tf.variable_scope('accuracy') as scope:
107 | correct = tf.nn.in_top_k(logits, labels, 1)
108 | correct = tf.cast(correct, tf.float16)
109 | accuracy = tf.reduce_mean(correct)
110 | tf.summary.scalar(scope.name + '/accuracy', accuracy)
111 | return accuracy
--------------------------------------------------------------------------------
/new-tensorflow/training.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import tensorflow as tf
4 | import input_data
5 | import model
6 |
7 |
8 | #N_CLASSES = 4 # 2种维度的输出神经元,[1,0] 或者 [0,1]猫和狗的概率
9 | IMG_W = 208 # 重新定义图片的大小,图片如果过大则训练比较慢
10 | IMG_H = 208
11 | BATCH_SIZE = 32 #每批数据的大小
12 | CAPACITY = 256
13 | MAX_STEP = 150 # 训练的步数,应当 >= 10000
14 | learning_rate = 0.0001 # 学习率,建议刚开始的 learning_rate <= 0.0001
15 |
16 |
17 | def run_training(n_classes):
18 |
19 | # 数据集
20 | train_dir = 'E:/data/datasets/dieases_photos/'
21 | #logs_train_dir 存放训练模型的过程的数据,在tensorboard 中查看
22 | logs_train_dir = 'E:/data/model/dieases/'
23 |
24 | # 获取图片和标签集
25 | train, train_label = input_data.read_img(train_dir)
26 | # 生成批次
27 | train_batch, train_label_batch = input_data.get_batch(train,
28 | train_label,
29 | IMG_W,
30 | IMG_H,
31 | BATCH_SIZE,
32 | CAPACITY)
33 | # 进入模型
34 | train_logits = model.inference(train_batch, BATCH_SIZE, n_classes)
35 | # 获取 loss
36 | train_loss = model.losses(train_logits, train_label_batch)
37 | # 训练
38 | train_op = model.trainning(train_loss, learning_rate)
39 | # 获取准确率
40 | train__acc = model.evaluation(train_logits, train_label_batch)
41 | # 合并 summary
42 | summary_op = tf.summary.merge_all()
43 | sess = tf.Session()
44 | # 保存summary
45 | train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
46 | saver = tf.train.Saver()
47 |
48 | sess.run(tf.global_variables_initializer())
49 | coord = tf.train.Coordinator()
50 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
51 |
52 | try:
53 | for step in np.arange(MAX_STEP):
54 | if coord.should_stop():
55 | break
56 | _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc])
57 |
58 | if step % 50 == 0:
59 | print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc*100.0))
60 | summary_str = sess.run(summary_op)
61 | train_writer.add_summary(summary_str, step)
62 |
63 | if step % 2000 == 0 or (step + 1) == MAX_STEP:
64 | # 每隔2000步保存一下模型,模型保存在 checkpoint_path 中
65 | checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
66 | saver.save(sess, checkpoint_path, global_step=step)
67 |
68 | except tf.errors.OutOfRangeError:
69 | print('Done training -- epoch limit reached')
70 | finally:
71 | coord.request_stop()
72 | coord.join(threads)
73 | sess.close()
74 |
75 | # train
76 | run_training(4)
77 |
--------------------------------------------------------------------------------