├── README.md
├── old_versions
    ├── YOLO_tiny.py
    ├── tiny_yolo.py
    ├── very_tiny_yolo.py
    ├── very_tiny_yolov2.py
    └── very_tiny_yolov3.py
├── some_tests
    ├── 108.jpg
    ├── 109.jpg
    ├── 126.jpg
    ├── 152.jpg
    ├── 171.jpg
    └── 173.jpg
└── yolo_v1.py


/README.md:
--------------------------------------------------------------------------------
1 | # YOLOv1_tf
2 | A simple Tensorflow implementation of [You Only Look Once:Unified, Real-Time Object Detection](https://arxiv.org/pdf/1506.02640.pdf) 
3 | for face detection and recognition
4 | # Results
5 | ![](/some_tests/108.jpg)  ![](/some_tests/109.jpg)
6 | ![](/some_tests/126.jpg)  ![](/some_tests/152.jpg)
7 | ![](/some_tests/171.jpg)  ![](/some_tests/173.jpg)
8 | 


--------------------------------------------------------------------------------
/old_versions/YOLO_tiny.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import gzip
  7 | import os
  8 | import sys
  9 | import time
 10 | import cv2
 11 | import numpy
 12 | from PIL import Image
 13 | from six.moves import urllib
 14 | from six.moves import xrange
 15 | import tensorflow as tf
 16 | 
 17 | IMAGE_SIZE = 224
 18 | NUM_CHANNELS = 3
 19 | PIXEL_DEPTH = 255
 20 | SEED = 66478
 21 | BATCH_SIZE = 2
 22 | NUM_EPOCHS = 200
 23 | S = 7
 24 | B = 2
 25 | CLASSES = 2
 26 | COORD_W = 5
 27 | NOOBJ_W = 0.5
 28 | PROB_THRESHOLD = 0.25
 29 | NMS_THRESHOLD = 0.5
 30 | TRAIN_SIZE = 122
 31 | alpha = 0.1
 32 | EVAL_FREQUENCY = 100
 33 | TRAIN_IMG_DIR = '/home/yy/train/'
 34 | TRAIN_LABEL_DIR = '/home/yy/labels/'
 35 | CLASSES_NAME = ["DaLai","NonDaLai"]
 36 | TEST_IMG_DIR = '/home/yy/test1/'
 37 | TEST_LABEL_DIR = 'home/yy/labels/'
 38 | RES_DIR = '/home/yy/subnets2/'
 39 | SAVE_MODEL = '/home/yy/tf_saver_models/model_yolo4.ckpt'
 40 | SAVE_TENSORBOARD = '/home/yy/tensorboard'
 41 | 
 42 | 
 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS+2, 64], stddev=0.01, seed=SEED, dtype=tf.float32))
 44 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=tf.float32))
 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 47 | conv3_weights = tf.Variable(tf.truncated_normal([3, 3, 128, 256], stddev=0.01, seed=SEED, dtype=tf.float32))
 48 | conv3_biases = tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32))
 49 | conv4_weights = tf.Variable(tf.truncated_normal([3, 3, 256, 512], stddev=0.01, seed=SEED, dtype=tf.float32))
 50 | conv4_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32))
 51 | conv5_weights = tf.Variable(tf.truncated_normal([3, 3, 512, 1024], stddev=0.01, seed=SEED, dtype=tf.float32))
 52 | conv5_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32))
 53 | 
 54 | """separate fc layer to fc1fc2 for coordinate regression and fc3fc4 for classify regression"""
 55 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //1024  * 1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 56 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 57 | fc2_weights = tf.Variable(tf.truncated_normal([128, S*S*(B*5)], stddev=0.01, seed=SEED, dtype=tf.float32))
 58 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5)], dtype=tf.float32))
 59 | 
 60 | fc3_weights = tf.Variable(tf.truncated_normal([1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 61 | fc3_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 62 | fc4_weights = tf.Variable(tf.truncated_normal([128, S*S*CLASSES], stddev=0.01, seed=SEED, dtype=tf.float32))
 63 | fc4_biases = tf.Variable(tf.constant(0.1, shape=[S*S*CLASSES], dtype=tf.float32))
 64 | 
 65 | def model(data):
 66 |   conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
 67 |   conv_bias = tf.nn.bias_add(conv, conv1_biases)
 68 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 69 | 
 70 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 71 | 
 72 |   conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
 73 |   conv_bias = tf.nn.bias_add(conv, conv2_biases)
 74 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 75 | 
 76 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 77 | 
 78 |   conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
 79 |   conv_bias = tf.nn.bias_add(conv, conv3_biases)
 80 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 81 | 
 82 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 83 | 
 84 |   conv = tf.nn.conv2d(pool, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
 85 |   conv_bias = tf.nn.bias_add(conv, conv4_biases)
 86 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 87 | 
 88 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 89 | 
 90 |   conv = tf.nn.conv2d(pool, conv5_weights, strides=[1, 1, 1, 1], padding='SAME')
 91 |   conv_bias = tf.nn.bias_add(conv, conv5_biases)
 92 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 93 | 
 94 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 95 | 
 96 |   fc1_shape = pool.get_shape().as_list()
 97 |   reshape = tf.reshape(pool, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]])
 98 | 
 99 |   fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases
100 |   fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden)
101 | 
102 |   coors = tf.sigmoid(tf.matmul(fc1_out, fc2_weights) + fc2_biases)
103 | 
104 |   pool = tf.nn.avg_pool(lrelu, ksize=[1, IMAGE_SIZE*IMAGE_SIZE/1024, IMAGE_SIZE*IMAGE_SIZE/1024, 1], strides=[1, IMAGE_SIZE*IMAGE_SIZE/1024, IMAGE_SIZE*IMAGE_SIZE/1024, 1], padding='SAME')
105 |   
106 |   fc3_shape = pool.get_shape().as_list()
107 |   reshape = tf.reshape(pool, [fc3_shape[0], fc3_shape[1] * fc3_shape[2] * fc3_shape[3]])
108 | 
109 |   fc3_hidden = tf.matmul(reshape, fc3_weights) + fc3_biases
110 |   fc3_out = tf.maximum(alpha*fc3_hidden, fc3_hidden)
111 | 
112 |   probs = tf.sigmoid(tf.matmul(fc3_out, fc4_weights) + fc4_biases)
113 |   output = []
114 | 
115 |   for i in range(BATCH_SIZE):
116 |     for j in range(S*S):
117 |       for k in range(10):
118 |         output.append(coors[i,j*B*5+k])
119 |       for k in range(CLASSES):
120 |         output.append(probs[i,j*CLASSES+k])
121 |   output = tf.reshape(output, [BATCH_SIZE, S*S*(B*5+CLASSES)])
122 | 
123 |   return output
124 | 
125 | 
126 | def nms(dets, thresh):
127 |   """Non maximum suppression"""
128 |   x1 = dets[:, 0]
129 |   y1 = dets[:, 1]
130 |   x2 = dets[:, 2]
131 |   y2 = dets[:, 3]
132 |   scores = dets[:, 4]
133 | 
134 |   areas = (x2 - x1 + 1) * (y2 - y1 + 1)
135 |   order = scores.argsort()[::-1]
136 | 
137 |   keep = []
138 |   while order.size > 0:
139 |     i = order[0]
140 |     keep.append(i)
141 |     xx1 = numpy.maximum(x1[i], x1[order[1:]])
142 |     yy1 = numpy.maximum(y1[i], y1[order[1:]])
143 |     xx2 = numpy.minimum(x2[i], x2[order[1:]])
144 |     yy2 = numpy.minimum(y2[i], y2[order[1:]])
145 | 
146 |     w = numpy.maximum(0.0, xx2 - xx1 + 1)
147 |     h = numpy.maximum(0.0, yy2 - yy1 + 1)
148 |     inter = w * h
149 |     ovr = inter / (areas[i] + areas[order[1:]] - inter)
150 | 
151 |     inds = numpy.where(ovr <= thresh)[0]
152 |     order = order[inds + 1]
153 | 
154 |   return keep
155 | 
156 | def get_results(output):
157 |   results = []
158 |   classes = []
159 |   probs = numpy.ndarray(shape=[CLASSES,])
160 |   for p in range(B):
161 |     for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES):
162 |       for i in range(CLASSES):
163 |         probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i]
164 | 
165 |       cls_ind = probs.argsort()[::-1][0]
166 |       if probs[cls_ind] > PROB_THRESHOLD:
167 |         results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]])
168 |         classes.append(cls_ind)
169 | 
170 |   res = numpy.array(results).astype(numpy.float32)
171 |   if len(res) != 0:
172 |     keep = nms(res, NMS_THRESHOLD)
173 |     results_ = []
174 |     classes_ = []
175 |     for i in keep:
176 |       results_.append(results[i])
177 |       classes_.append(classes[i])
178 | 
179 |     return results_,classes_
180 |   else:
181 |     return [],[]
182 | 
183 | def show_results(img_path, results, classes):
184 |   img = cv2.imread(img_path).copy()
185 |   if len(results) != 0:
186 |     for i in range(len(results)):
187 |       x1 = int(results[i][0]*img.shape[1])
188 |       y1 = int(results[i][1]*img.shape[0])
189 |       x2 = int(results[i][2]*img.shape[1])
190 |       y2 = int(results[i][3]*img.shape[0])
191 |       score = results[i][4]
192 |       cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
193 |       cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2)
194 | 
195 |   cv2.imwrite(RES_DIR + img_path.split('/')[-1], img)
196 | 
197 | def get_next_minibatch(offset, path_list):
198 |   if offset+BATCH_SIZE > len(path_list):
199 |     random.shuffle(path_list)
200 |     return path_list[:BATCH_SIZE]
201 |   else:
202 |     return path_list[offset:offset+BATCH_SIZE]
203 | 
204 | def extract_data_yolo(path_list, train=True):
205 |   if train:
206 |     data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2),dtype=numpy.float32)
207 | 
208 |     """add original position information"""
209 |     for i in range(len(path_list)):
210 |       for j in range(IMAGE_SIZE):
211 |           data[i,j,:,-2] = j
212 | 
213 |     for i in range(len(path_list)):
214 |       for j in range(IMAGE_SIZE):
215 |           data[i,:,j,-1] = j
216 | 
217 |     for i in range(len(path_list)):
218 |       img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg')
219 |       img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
220 |       data[i,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
221 |     
222 |     return data
223 |   else:
224 |     data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2), dtype=numpy.float32)
225 | 
226 |     for i in range(IMAGE_SIZE):
227 |         data[0,i,:,-2] = i
228 | 
229 |     for i in range(IMAGE_SIZE):
230 |         data[0,:,i,-1] = i
231 | 
232 |     img = Image.open(path_list)
233 |     img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
234 |     data[0,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
235 |     return data
236 | 
237 | def iou(box1,box2):
238 |   tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
239 |   lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
240 |   if tb < 0 or lr < 0 : intersection = 0
241 |   else : intersection =  tb*lr
242 |   return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
243 | 
244 | 
245 | def extract_labels_yolo(path_list, train=True):
246 |   if train:
247 |     root = TRAIN_LABEL_DIR
248 |   else:
249 |     root = TEST_LABEL_DIR
250 |   labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32)
251 |   for i in range(labels.shape[0]):
252 |     for j in range(labels.shape[1]):
253 |       if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5:
254 |         labels[i][j] = 1.00001
255 |       else:
256 |         labels[i][j] = 0
257 |   for i in range(len(path_list)):
258 |     with open(root + path_list[i] + '.txt',"r") as f:
259 |       lines = f.readlines()
260 |       for j in range(len(lines)):
261 |         data = lines[j].split()
262 |         col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
263 |         row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
264 |         grid_no = (row_no-1)*S+col_no
265 |         # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0])
266 |         labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1
267 |         for k in range(B):
268 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1])
269 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2])
270 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3])
271 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4])
272 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1
273 | 
274 |   return labels
275 | 
276 | def loss_func_yolo(output, label):
277 |   res = 0
278 | 
279 |   for i in range(BATCH_SIZE):
280 |     for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES):
281 |       highest_bbox = output[i][j+4]-output[i][j+9]
282 |       """here we only compute the loss of bbox which have the highest confidence"""
283 |       """we use tf.sign(tf.maximum(highest_bbox,0)) to do that"""
284 | 
285 |       res += COORD_W * tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(label[i][j+2]) * (
286 |                                                              tf.square(output[i][j] - label[i][j]) + 
287 |                                                              tf.square(output[i][j+1]-label[i][j+1]) + 
288 |                                                              tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 
289 |                                                              tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1))
290 | 
291 |       res += tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4]))
292 | 
293 |       res += NOOBJ_W * tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4]))
294 | 
295 |       res += COORD_W * tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(label[i][j+7]) * (
296 |                                                               tf.square(output[i][j+5] - label[i][j+5]) + 
297 |                                                               tf.square(output[i][j+6]-label[i][j+6]) + 
298 |                                                               tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 
299 |                                                               tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1))
300 | 
301 |       res += tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9]))
302 | 
303 |       res += NOOBJ_W * tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9]))
304 | 
305 |       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11]))
306 | 
307 |   return res/BATCH_SIZE
308 | 
309 | # def loss_func_yolo(output, label):
310 | #   res = 0
311 | 
312 | #   for i in range(BATCH_SIZE):
313 | #     for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES):
314 | #       res += COORD_W * tf.sign(label[i][j+2]) * (tf.square(output[i][j] - label[i][j]) + tf.square(output[i][j+1]-label[i][j+1]) + 
315 | #                                                tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 
316 | #                                                tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1))
317 | 
318 | #       res += tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4]))
319 | 
320 | #       res += NOOBJ_W * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4]))
321 | 
322 | #       res += COORD_W * tf.sign(label[i][j+7]) * (tf.square(output[i][j+5] - label[i][j+5]) + tf.square(output[i][j+6]-label[i][j+6]) + 
323 | #                                                tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 
324 | #                                                tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1))
325 | 
326 | #       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9]))
327 | 
328 | #       res += NOOBJ_W * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9]))
329 | 
330 | #       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11]))
331 | 
332 | #   return res
333 | 
334 | def test_from_img(img, test_model, display_loss=False):
335 |   with tf.Session() as sess:
336 |     tf.global_variables_initializer().run()
337 |     saver = tf.train.Saver()
338 |     saver.restore(sess, test_model)
339 |     data = extract_data_yolo(img, train=False)
340 |     out = sess.run(model(data))
341 |     if display_loss:
342 |       label = extract_labels_yolo([img], train=False)
343 |       print('loss: %.6f' % loss_func_yolo(out, label))
344 |     results,classes = get_results(out)
345 |     show_results(img, results, classes)
346 | 
347 | def test_from_dir(imgdir, test_model, display_loss=False):
348 |   with tf.Session() as sess:
349 |     tf.global_variables_initializer().run()
350 |     saver = tf.train.Saver()
351 |     saver.restore(sess, test_model)
352 |     if display_loss:
353 |       loss = 0
354 |       for root, dirs, files in os.walk(imgdir[:-1]):
355 |         for file in files:
356 |           img = os.path.join(root, file)
357 |           label = extract_labels_yolo([img], train=False)
358 |           data = extract_data_yolo(img, train=False)
359 |           out = sess.run(model(data))
360 |           loss += loss_func_yolo(out, label)
361 |           results,classes = get_results(out)
362 |           show_results(img, results, classes)
363 |       print('loss: %.6f' % loss)
364 |     else:
365 |       for root, dirs, files in os.walk(imgdir[:-1]):
366 |         for file in files:
367 |           img = os.path.join(root, file)
368 |           data = extract_data_yolo(img, train=False)
369 |           out = sess.run(model(data))
370 |           results,classes = get_results(out)
371 |           show_results(img, results, classes)
372 | 
373 | def preprocessing(imgs):
374 |   res = []
375 |   for i in range(BATCH_SIZE):
376 |     res.append(tf.image.per_image_standardization(imgs[i]))
377 |   return tf.stack(res)
378 | 
379 | def main(argv=None):
380 |   num_epochs = NUM_EPOCHS
381 |   train_img_list = []
382 |   for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR):
383 |     for filename in filenames:
384 |       train_img_list.append(filename[:-4])
385 | 
386 |   numpy.random.shuffle(train_img_list)
387 |   train_data_node = tf.placeholder(
388 |       tf.float32,
389 |       shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS+2))
390 |   train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES)))
391 | 
392 |   train_data_node = preprocessing(train_data_node)
393 |   logits = model(train_data_node)
394 |   loss = loss_func_yolo(logits, train_labels_node)
395 | 
396 |   regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
397 |                   tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases) +
398 |                   tf.nn.l2_loss(fc3_weights) + tf.nn.l2_loss(fc3_biases) +
399 |                   tf.nn.l2_loss(fc4_weights) + tf.nn.l2_loss(fc4_biases))
400 | 
401 |   loss += 5e-4 * regularizers
402 | 
403 |   batch = tf.Variable(0, dtype=tf.float32)
404 | 
405 |   learning_rate = tf.train.exponential_decay(
406 |       0.001,                
407 |       batch * BATCH_SIZE,  
408 |       10000,          
409 |       0.95,
410 |       staircase=True)
411 | 
412 |   optimizer = tf.train.MomentumOptimizer(learning_rate,0.9).minimize(loss, global_step=batch)
413 | 
414 |   tf.summary.scalar("loss", loss)
415 |   tf.summary.scalar("lr", learning_rate)
416 |   merged_summary = tf.summary.merge_all()
417 |   with tf.Session() as sess:
418 | 
419 |     tf.global_variables_initializer().run()
420 |     saver = tf.train.Saver()
421 |     print('Initialized!')
422 |     writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph)
423 | 
424 |     for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE):
425 |       offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE)
426 |       batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list))
427 |       batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list))
428 | 
429 |       feed_dict = {train_data_node: batch_data,
430 |                    train_labels_node: batch_labels}
431 | 
432 |       _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict)
433 | 
434 |       if step % EVAL_FREQUENCY == 0:
435 |         print('loss: %.6f' % los)
436 |         #print('weight: %.5f' % sess.run(conv1_weights)[0,0,0,0])
437 |         writer.add_summary(summary, step)
438 |     save_path = saver.save(sess, SAVE_MODEL)
439 | 
440 | def parse_args():
441 |   parser = argparse.ArgumentParser(description='YOLO demo')
442 |   parser.add_argument('--train', help='train the model', action='store_true')
443 |   parser.add_argument('--test', help='test the model', action='store_true')
444 |   parser.add_argument('--test_img_path', help='img path to test', type=str)
445 |   parser.add_argument('--display_loss', default=False, help='whether display the loss', action='store_true')
446 |   parser.add_argument('--test_model', help='model to test', type=str)
447 |   args = parser.parse_args()
448 | 
449 |     return args
450 | if __name__ == '__main__':
451 |   args = parse_args()
452 |   if args.train and args.test:
453 |     print('Error: cannot train and test at the same time')
454 |   elif args.train:
455 |     tf.app.run()
456 |   elif args.test_img_path[-1] == '/':
457 |     test_from_dir(args.test_img_path, args.test_model, args.display_loss)
458 |   else:
459 |     test_from_img(args.test_img_path, args.test_model, args.display_loss)
460 | 


--------------------------------------------------------------------------------
/old_versions/tiny_yolo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import gzip
  7 | import os
  8 | import sys
  9 | import time
 10 | import cv2
 11 | import numpy
 12 | from PIL import Image
 13 | from six.moves import urllib
 14 | from six.moves import xrange
 15 | import tensorflow as tf
 16 | 
 17 | IMAGE_SIZE = 224
 18 | NUM_CHANNELS = 3
 19 | PIXEL_DEPTH = 255
 20 | SEED = 66478
 21 | BATCH_SIZE = 1
 22 | NUM_EPOCHS = 100
 23 | S = 7
 24 | B = 2
 25 | CLASSES = 2
 26 | COORD_W = 5
 27 | NOOBJ_W = 0.5
 28 | PROB_THRESHOLD = 0.25
 29 | NMS_THRESHOLD = 0.5
 30 | TRAIN_SIZE = 122
 31 | alpha = 0.1
 32 | EVAL_FREQUENCY = 100
 33 | TRAIN_IMG_DIR = '/home/yy/train/'
 34 | TRAIN_LABEL_DIR = '/home/yy/labels/'
 35 | CLASSES_NAME = ["DaLai","NonDaLai"]
 36 | TEST_IMG_PATH = '/home/yy/109.jpg'
 37 | RES_DIR = '/home/yy/pred_decay1W/'
 38 | SAVE_MODEL = '/home/yy/tf_saver_models/model_conv5_epoch100_decay10000.ckpt'
 39 | SAVE_TENSORBOARD = '/home/yy/tensorboard'
 40 | TEST_MODEL = '/home/yy/tf_saver_models/model_conv5_epoch100_decay10000.ckpt'
 41 | 
 42 | 
 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS, 16], stddev=0.1, seed=SEED, dtype=tf.float32))
 44 | conv1_biases = tf.Variable(tf.zeros([16], dtype=tf.float32))
 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 16, 32], stddev=0.1, seed=SEED, dtype=tf.float32))
 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[32], dtype=tf.float32))
 47 | conv3_weights = tf.Variable(tf.truncated_normal([3, 3, 32, 64], stddev=0.1,seed=SEED, dtype=tf.float32))
 48 | conv3_biases = tf.Variable(tf.zeros([64], dtype=tf.float32))
 49 | conv4_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.1, seed=SEED, dtype=tf.float32))
 50 | conv4_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 51 | conv5_weights = tf.Variable(tf.truncated_normal([3, 3, 128, 256], stddev=0.1, seed=SEED, dtype=tf.float32))
 52 | conv5_biases = tf.Variable(tf.zeros([256], dtype=tf.float32))
 53 | conv6_weights = tf.Variable(tf.truncated_normal([3, 3, 256, 512], stddev=0.1, seed=SEED, dtype=tf.float32))
 54 | conv6_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32))
 55 | conv7_weights = tf.Variable(tf.truncated_normal([3, 3, 512, 1024], stddev=0.1, seed=SEED, dtype=tf.float32))
 56 | conv7_biases = tf.Variable(tf.zeros([1024], dtype=tf.float32))
 57 | conv8_weights = tf.Variable(tf.truncated_normal([3, 3, 1024, 1024], stddev=0.1, seed=SEED, dtype=tf.float32))
 58 | conv8_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32))
 59 | conv9_weights = tf.Variable(tf.truncated_normal([3, 3, 1024, 1024], stddev=0.1, seed=SEED, dtype=tf.float32))
 60 | conv9_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32))
 61 | conv10_weights = tf.Variable(tf.truncated_normal([3, 3, 1024, 1024], stddev=0.1, seed=SEED, dtype=tf.float32))
 62 | conv10_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32))
 63 | 
 64 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //4096  * 1024, 4096], stddev=0.1, seed=SEED, dtype=tf.float32))
 65 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[4096], dtype=tf.float32))
 66 | fc2_weights = tf.Variable(tf.truncated_normal([4096, 4096], stddev=0.1, seed=SEED, dtype=tf.float32))
 67 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[4096], dtype=tf.float32))
 68 | fc3_weights = tf.Variable(tf.truncated_normal([4096, S*S*(B*5+CLASSES)], stddev=0.1, seed=SEED, dtype=tf.float32))
 69 | fc3_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5+CLASSES)], dtype=tf.float32))
 70 | 
 71 | def model(data, train=False):
 72 |   conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
 73 |   conv_bias = tf.nn.bias_add(conv, conv1_biases)
 74 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 75 | 
 76 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 77 | 
 78 |   conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
 79 |   conv_bias = tf.nn.bias_add(conv, conv2_biases)
 80 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 81 | 
 82 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 83 | 
 84 |   conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
 85 |   conv_bias = tf.nn.bias_add(conv, conv3_biases)
 86 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 87 | 
 88 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 89 | 
 90 |   conv = tf.nn.conv2d(pool, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
 91 |   conv_bias = tf.nn.bias_add(conv, conv4_biases)
 92 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 93 | 
 94 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 95 | 
 96 |   conv = tf.nn.conv2d(pool, conv5_weights, strides=[1, 1, 1, 1], padding='SAME')
 97 |   conv_bias = tf.nn.bias_add(conv, conv5_biases)
 98 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 99 | 
100 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
101 | 
102 |   conv = tf.nn.conv2d(pool, conv6_weights, strides=[1, 1, 1, 1], padding='SAME')
103 |   conv_bias = tf.nn.bias_add(conv, conv6_biases)
104 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
105 | 
106 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
107 | 
108 |   conv = tf.nn.conv2d(pool, conv7_weights, strides=[1, 1, 1, 1], padding='SAME')
109 |   conv_bias = tf.nn.bias_add(conv, conv7_biases)
110 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
111 | 
112 | 
113 |   conv = tf.nn.conv2d(lrelu, conv8_weights, strides=[1, 1, 1, 1], padding='SAME')
114 |   conv_bias = tf.nn.bias_add(conv, conv8_biases)
115 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
116 | 
117 |   conv = tf.nn.conv2d(lrelu, conv9_weights, strides=[1, 1, 1, 1], padding='SAME')
118 |   conv_bias = tf.nn.bias_add(conv, conv9_biases)
119 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
120 | 
121 |   conv = tf.nn.conv2d(lrelu, conv10_weights, strides=[1, 1, 1, 1], padding='SAME')
122 |   conv_bias = tf.nn.bias_add(conv, conv10_biases)
123 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
124 | 
125 |   fc1_shape = lrelu.get_shape().as_list()
126 |   reshape = tf.reshape(lrelu, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]])
127 | 
128 |   fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases
129 |   fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden)
130 | 
131 |   fc2_hidden = tf.matmul(fc1_out, fc2_weights) + fc2_biases
132 |   fc2_out = tf.maximum(alpha*fc2_hidden, fc2_hidden)
133 | 
134 |   return tf.matmul(fc2_out, fc3_weights) + fc3_biases
135 | 
136 | def nms(dets, thresh):
137 |   """Non maximum suppression"""
138 |   x1 = dets[:, 0]
139 |   y1 = dets[:, 1]
140 |   x2 = dets[:, 2]
141 |   y2 = dets[:, 3]
142 |   scores = dets[:, 4]
143 | 
144 |   areas = (x2 - x1 + 1) * (y2 - y1 + 1)
145 |   order = scores.argsort()[::-1]
146 | 
147 |   keep = []
148 |   while order.size > 0:
149 |       i = order[0]
150 |       keep.append(i)
151 |       xx1 = numpy.maximum(x1[i], x1[order[1:]])
152 |       yy1 = numpy.maximum(y1[i], y1[order[1:]])
153 |       xx2 = numpy.minimum(x2[i], x2[order[1:]])
154 |       yy2 = numpy.minimum(y2[i], y2[order[1:]])
155 | 
156 |       w = numpy.maximum(0.0, xx2 - xx1 + 1)
157 |       h = numpy.maximum(0.0, yy2 - yy1 + 1)
158 |       inter = w * h
159 |       ovr = inter / (areas[i] + areas[order[1:]] - inter)
160 | 
161 |       inds = numpy.where(ovr <= thresh)[0]
162 |       order = order[inds + 1]
163 | 
164 |   return keep
165 | 
166 | def get_results(output):
167 |   results = []
168 |   classes = []
169 |   probs = numpy.ndarray(shape=[CLASSES,])
170 |   for p in range(B):
171 |     for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES):
172 |       for i in range(CLASSES):
173 |         probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i]
174 | 
175 |       cls_ind = probs.argsort()[::-1][0]
176 |       if probs[cls_ind] > PROB_THRESHOLD:
177 |         results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]])
178 |         classes.append(cls_ind)
179 | 
180 |   res = numpy.array(results).astype(numpy.float32)
181 |   if len(res) != 0:
182 |     keep = nms(res, NMS_THRESHOLD)
183 |     results_ = []
184 |     classes_ = []
185 |     for i in keep:
186 |       results_.append(results[i])
187 |       classes_.append(classes[i])
188 | 
189 |     return results_,classes_
190 |   else:
191 |     return [],[]
192 | 
193 | def show_results(img_path, results, classes):
194 |   img = cv2.imread(img_path).copy()
195 |   if len(results) != 0:
196 |     for i in range(len(results)):
197 |       x1 = int(results[i][0]*img.shape[1])
198 |       y1 = int(results[i][1]*img.shape[0])
199 |       x2 = int(results[i][2]*img.shape[1])
200 |       y2 = int(results[i][3]*img.shape[0])
201 |       score = results[i][4]
202 |       cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
203 |       cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2)
204 | 
205 |   cv2.imwrite(RES_DIR + img_path.split('/')[-1], img)
206 | 
207 | def get_next_minibatch(offset, path_list):
208 |   if offset+BATCH_SIZE > len(path_list):
209 |     # random.shuffle(path_list)
210 |     return path_list[:BATCH_SIZE]
211 |   else:
212 |     return path_list[offset:offset+BATCH_SIZE]
213 | 
214 | def extract_data_yolo(path_list, train=True):
215 |   if train:
216 |     data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2),dtype=numpy.float32)
217 | 
218 |     for i in range(len(path_list)):
219 |       for j in range(IMAGE_SIZE):
220 |           data[i,j,:,-2] = j/IMAGE_SIZE
221 | 
222 |     for i in range(len(path_list)):
223 |       for j in range(IMAGE_SIZE):
224 |           data[i,:,j,-1] = j/IMAGE_SIZE
225 | 
226 |     for i in range(len(path_list)):
227 |       img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg')
228 |       img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
229 |       data[i,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
230 |       data[i,:,:,:-2] = (data[i,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
231 |     
232 |     return data
233 |   else:
234 |     data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2), dtype=numpy.float32)
235 | 
236 |     for i in range(IMAGE_SIZE):
237 |         data[0,i,:,-2] = i/IMAGE_SIZE
238 | 
239 |     for i in range(IMAGE_SIZE):
240 |         data[0,:,i,-1] = i/IMAGE_SIZE
241 | 
242 |     img = Image.open(path_list)
243 |     img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
244 |     data[0,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
245 |     data[0,:,:,:-2] = (data[0,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
246 |     return data
247 | 
248 | def iou(box1,box2):
249 |   tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
250 |   lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
251 |   if tb < 0 or lr < 0 : intersection = 0
252 |   else : intersection =  tb*lr
253 |   return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
254 | 
255 | 
256 | def extract_labels_yolo(path_list):
257 |   labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32)
258 |   for i in range(labels.shape[0]):
259 |     for j in range(labels.shape[1]):
260 |       if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5:
261 |         labels[i][j] = 1.00001
262 |       else:
263 |         labels[i][j] = 0
264 |   for i in range(len(path_list)):
265 |     with open(TRAIN_LABEL_DIR + path_list[i] + '.txt',"r") as f:
266 |       lines = f.readlines()
267 |       for j in range(len(lines)):
268 |         data = lines[j].split()
269 |         col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
270 |         row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
271 |         grid_no = (row_no-1)*S+col_no
272 |         # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0])
273 |         labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1
274 |         for k in range(B):
275 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1])
276 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2])
277 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3])
278 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4])
279 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1
280 | 
281 |   return labels
282 | 
283 | def loss_func_yolo(output, label):
284 |   res = 0
285 | 
286 |   for i in range(BATCH_SIZE):
287 |     for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES):
288 |       res += COORD_W * tf.sign(label[i][j+2]) * (tf.square(output[i][j] - label[i][j]) + tf.square(output[i][j+1]-label[i][j+1]) + 
289 |                                                tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 
290 |                                                tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1))
291 | 
292 |       res += tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4]))
293 | 
294 |       res += NOOBJ_W * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4]))
295 | 
296 |       res += COORD_W * tf.sign(label[i][j+7]) * (tf.square(output[i][j+5] - label[i][j+5]) + tf.square(output[i][j+6]-label[i][j+6]) + 
297 |                                                tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 
298 |                                                tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1))
299 | 
300 |       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9]))
301 | 
302 |       res += NOOBJ_W * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9]))
303 | 
304 |       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11]))
305 | 
306 |   return res
307 | 
308 | def test(img):
309 |   with tf.Session() as sess:
310 |     tf.global_variables_initializer().run()
311 |     saver = tf.train.Saver()
312 |     saver.restore(sess, TEST_MODEL)
313 |     data = extract_data_yolo(img, False)
314 |     out = sess.run(model(data))
315 |     results,classes = get_results(out)
316 |     show_results(img, results, classes)
317 | 
318 | def main(argv=None):
319 |   num_epochs = NUM_EPOCHS
320 |   train_img_list = []
321 |   for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR):
322 |     for filename in filenames:
323 |       train_img_list.append(filename[:-4])
324 | 
325 |   train_data_node = tf.placeholder(
326 |       tf.float32,
327 |       shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS+2))
328 |   train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES)))
329 | 
330 |   logits = model(train_data_node, True)
331 |   loss = loss_func_yolo(logits, train_labels_node)
332 | 
333 |   regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
334 |                   tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
335 | 
336 |   loss += 5e-4 * regularizers
337 | 
338 |   batch = tf.Variable(0, dtype=tf.float32)
339 | 
340 |   learning_rate = tf.train.exponential_decay(
341 |       0.01,                
342 |       batch * BATCH_SIZE,  
343 |       10000,          
344 |       0.95,
345 |       staircase=True)
346 | 
347 |   op_func = tf.train.MomentumOptimizer(learning_rate,0.9)
348 | 
349 |   tvars = tf.trainable_variables()
350 |   grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 0.5)
351 |   optimizer = op_func.apply_gradients(zip(grads, tvars), global_step=batch)
352 | 
353 |   tf.summary.scalar("loss", loss)
354 |   tf.summary.scalar("lr", learning_rate)
355 |   merged_summary = tf.summary.merge_all()
356 |   with tf.Session() as sess:
357 | 
358 |     tf.global_variables_initializer().run()
359 |     saver = tf.train.Saver()
360 |     print('Initialized!')
361 |     writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph)
362 | 
363 |     for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE):
364 |       offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE)
365 |       batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list))
366 |       batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list))
367 | 
368 |       feed_dict = {train_data_node: batch_data,
369 |                    train_labels_node: batch_labels}
370 | 
371 |       _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict)
372 | 
373 |       if step % EVAL_FREQUENCY == 0:
374 |         print('loss: %.6f' % los)
375 |         #print('weight: %.5f' % sess.run(conv1_weights)[0,0,0,0])
376 |         writer.add_summary(summary, step)
377 |     save_path = saver.save(sess, SAVE_MODEL)
378 | 
379 | def parse_args():
380 |     parser = argparse.ArgumentParser(description='YOLO demo')
381 |     parser.add_argument('--train', help='train the model', action='store_true')
382 |     parser.add_argument('--test', help='test the model', action='store_true')
383 |     parser.add_argument('--test_img_path', help='img path to test', type=str)
384 | 
385 |     args = parser.parse_args()
386 | 
387 |     return args
388 | if __name__ == '__main__':
389 |   args = parse_args()
390 |   if args.train and args.test:
391 |     print('Error: cannot train and test at the same time')
392 |   elif args.train:
393 |     tf.app.run()
394 |   else:
395 |     test(args.test_img_path)
396 | 


--------------------------------------------------------------------------------
/old_versions/very_tiny_yolo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import gzip
  7 | import os
  8 | import sys
  9 | import time
 10 | import cv2
 11 | import numpy
 12 | from PIL import Image
 13 | from six.moves import urllib
 14 | from six.moves import xrange
 15 | import tensorflow as tf
 16 | 
 17 | IMAGE_SIZE = 224
 18 | NUM_CHANNELS = 3
 19 | PIXEL_DEPTH = 255
 20 | SEED = 66478
 21 | BATCH_SIZE = 1
 22 | NUM_EPOCHS = 20
 23 | S = 7
 24 | B = 2
 25 | CLASSES = 2
 26 | COORD_W = 5
 27 | NOOBJ_W = 0.5
 28 | PROB_THRESHOLD = 0.5
 29 | NMS_THRESHOLD = 0.5
 30 | TRAIN_SIZE = 122
 31 | alpha = 0.1
 32 | EVAL_FREQUENCY = 100
 33 | TRAIN_IMG_DIR = '/home/yy/train/'
 34 | TRAIN_LABEL_DIR = '/home/yy/labels/'
 35 | CLASSES_NAME = ["DaLai","NonDaLai"]
 36 | TEST_IMG_PATH = '/home/yy/109.jpg'
 37 | RES_DIR = '/home/yy/pred_shuffle/'
 38 | SAVE_MODEL = '/home/yy/tf_saver_models/model_newls.ckpt'
 39 | SAVE_TENSORBOARD = '/home/yy/tensorboard'
 40 | TEST_MODEL = '/home/yy/tf_saver_models/model_newls.ckpt'
 41 | 
 42 | 
 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS, 64], stddev=0.01, seed=SEED, dtype=tf.float32))
 44 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=tf.float32))
 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 47 | 
 48 | 
 49 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //16  * 128, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 50 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 51 | fc2_weights = tf.Variable(tf.truncated_normal([128, S*S*(B*5+CLASSES)], stddev=0.01, seed=SEED, dtype=tf.float32))
 52 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5+CLASSES)], dtype=tf.float32))
 53 | 
 54 | def model(data, train=False):
 55 |   conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
 56 |   conv_bias = tf.nn.bias_add(conv, conv1_biases)
 57 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 58 | 
 59 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 60 | 
 61 |   conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
 62 |   conv_bias = tf.nn.bias_add(conv, conv2_biases)
 63 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 64 | 
 65 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 66 | 
 67 |   fc1_shape = pool.get_shape().as_list()
 68 |   reshape = tf.reshape(pool, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]])
 69 | 
 70 |   fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases
 71 |   fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden)
 72 | 
 73 |   return tf.matmul(fc1_out, fc2_weights) + fc2_biases
 74 | 
 75 | def nms(dets, thresh):
 76 |   """Non maximum suppression"""
 77 |   x1 = dets[:, 0]
 78 |   y1 = dets[:, 1]
 79 |   x2 = dets[:, 2]
 80 |   y2 = dets[:, 3]
 81 |   scores = dets[:, 4]
 82 | 
 83 |   areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 84 |   order = scores.argsort()[::-1]
 85 | 
 86 |   keep = []
 87 |   while order.size > 0:
 88 |       i = order[0]
 89 |       keep.append(i)
 90 |       xx1 = numpy.maximum(x1[i], x1[order[1:]])
 91 |       yy1 = numpy.maximum(y1[i], y1[order[1:]])
 92 |       xx2 = numpy.minimum(x2[i], x2[order[1:]])
 93 |       yy2 = numpy.minimum(y2[i], y2[order[1:]])
 94 | 
 95 |       w = numpy.maximum(0.0, xx2 - xx1 + 1)
 96 |       h = numpy.maximum(0.0, yy2 - yy1 + 1)
 97 |       inter = w * h
 98 |       ovr = inter / (areas[i] + areas[order[1:]] - inter)
 99 | 
100 |       inds = numpy.where(ovr <= thresh)[0]
101 |       order = order[inds + 1]
102 | 
103 |   return keep
104 | 
105 | def get_results(output):
106 |   results = []
107 |   classes = []
108 |   probs = numpy.ndarray(shape=[CLASSES,])
109 |   for p in range(B):
110 |     for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES):
111 |       for i in range(CLASSES):
112 |         probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i]
113 | 
114 |       cls_ind = probs.argsort()[::-1][0]
115 |       if probs[cls_ind] > PROB_THRESHOLD:
116 |         results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]])
117 |         classes.append(cls_ind)
118 | 
119 |   res = numpy.array(results).astype(numpy.float32)
120 |   if len(res) != 0:
121 |     keep = nms(res, NMS_THRESHOLD)
122 |     results_ = []
123 |     classes_ = []
124 |     for i in keep:
125 |       results_.append(results[i])
126 |       classes_.append(classes[i])
127 | 
128 |     return results_,classes_
129 |   else:
130 |     return [],[]
131 | 
132 | def show_results(img_path, results, classes):
133 |   img = cv2.imread(img_path).copy()
134 |   if len(results) != 0:
135 |     for i in range(len(results)):
136 |       x1 = int(results[i][0]*img.shape[1])
137 |       y1 = int(results[i][1]*img.shape[0])
138 |       x2 = int(results[i][2]*img.shape[1])
139 |       y2 = int(results[i][3]*img.shape[0])
140 |       score = results[i][4]
141 |       cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
142 |       cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
143 | 
144 |   cv2.imwrite(RES_DIR + img_path.split('/')[-1], img)
145 | 
146 | def get_next_minibatch(offset, path_list):
147 |   if offset+BATCH_SIZE > len(path_list):
148 |     # random.shuffle(path_list)
149 |     return path_list[:BATCH_SIZE]
150 |   else:
151 |     return path_list[offset:offset+BATCH_SIZE]
152 | 
153 | def extract_data_yolo(path_list, train=True):
154 |   if train:
155 |     data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS),dtype=numpy.float32)
156 | 
157 |     for i in range(len(path_list)):
158 |       img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg')
159 |       img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
160 |       data[i] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
161 | 
162 |     data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
163 |     return data
164 |   else:
165 |     data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS), dtype=numpy.float32)
166 |     img = Image.open(path_list)
167 |     img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
168 |     data = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
169 |     data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
170 |     return data
171 | 
172 | def iou(box1,box2):
173 |   tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
174 |   lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
175 |   if tb < 0 or lr < 0 : intersection = 0
176 |   else : intersection =  tb*lr
177 |   return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
178 | 
179 | 
180 | def extract_labels_yolo(path_list):
181 |   labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32)
182 |   for i in range(labels.shape[0]):
183 |     for j in range(labels.shape[1]):
184 |       if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5:
185 |         labels[i][j] = 1.00001
186 |       else:
187 |         labels[i][j] = 0
188 |   for i in range(len(path_list)):
189 |     with open(TRAIN_LABEL_DIR + path_list[i] + '.txt',"r") as f:
190 |       lines = f.readlines()
191 |       for j in range(len(lines)):
192 |         data = lines[j].split()
193 |         col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
194 |         row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
195 |         grid_no = (row_no-1)*S+col_no
196 |         # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0])
197 |         labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1
198 |         for k in range(B):
199 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1])
200 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2])
201 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3])
202 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4])
203 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1
204 | 
205 |   return labels
206 | 
207 | def loss_func_yolo(output, exp):
208 |   res = 0
209 | 
210 |   for i in range(BATCH_SIZE):
211 |     for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES):
212 |       res += COORD_W * tf.sign(exp[i][j+2]) * (tf.square(output[i][j] - exp[i][j]) + tf.square(output[i][j+1]-exp[i][j+1]) + 
213 |                                                tf.square(tf.sqrt(tf.abs(output[i][j+2])) - tf.sqrt(exp[i][j+2])) + 
214 |                                                tf.square(tf.sqrt(tf.abs(output[i][j+3])) - tf.sqrt(exp[i][j+3])))
215 | 
216 |       res += tf.sign(exp[i][j+2]) * (tf.square(output[i][j+4] - exp[i][j+4]))
217 | 
218 |       res += NOOBJ_W * tf.sign(tf.floor(exp[i][j])) * (tf.square(output[i][j+4] - exp[i][j+4]))
219 | 
220 |       res += COORD_W * tf.sign(exp[i][j+7]) * (tf.square(output[i][j+5] - exp[i][j+5]) + tf.square(output[i][j+6]-exp[i][j+6]) + 
221 |                                                tf.square(tf.sqrt(tf.abs(output[i][j+7])) - tf.sqrt(exp[i][j+7])) + 
222 |                                                tf.square(tf.sqrt(tf.abs(output[i][j+8])) - tf.sqrt(exp[i][j+8])))
223 | 
224 |       res += tf.sign(exp[i][j+7]) * (tf.square(output[i][j+9] - exp[i][j+9]))
225 | 
226 |       res += NOOBJ_W * tf.sign(tf.floor(exp[i][j+5])) * (tf.square(output[i][j+9] - exp[i][j+9]))
227 | 
228 |       res += tf.sign(exp[i][j+7]) * (tf.square(output[i][j+10] - exp[i][j+10]) + tf.square(output[i][j+11] - exp[i][j+11]))
229 | 
230 |   return res
231 | 
232 | def test(img):
233 |   with tf.Session() as sess:
234 |     tf.global_variables_initializer().run()
235 |     saver = tf.train.Saver()
236 |     saver.restore(sess, TEST_MODEL)
237 |     data = extract_data_yolo(img, False)
238 |     out = sess.run(model(data))
239 |     results,classes = get_results(out)
240 |     show_results(img, results, classes)
241 | 
242 | def main(argv=None):
243 |   num_epochs = NUM_EPOCHS
244 |   train_img_list = []
245 |   for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR):
246 |     for filename in filenames:
247 |       train_img_list.append(filename[:-4])
248 | 
249 |   train_data_node = tf.placeholder(
250 |       tf.float32,
251 |       shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
252 |   train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES)))
253 | 
254 |   logits = model(train_data_node, True)
255 |   loss = loss_func_yolo(logits, train_labels_node)
256 | 
257 |   regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
258 |                   tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
259 | 
260 |   loss += 5e-4 * regularizers
261 | 
262 |   batch = tf.Variable(0, dtype=tf.float32)
263 | 
264 |   learning_rate = tf.train.exponential_decay(
265 |       0.01,                
266 |       batch * BATCH_SIZE,  
267 |       TRAIN_SIZE,          
268 |       0.95,
269 |       staircase=True)
270 | 
271 |   op_func = tf.train.MomentumOptimizer(learning_rate,0.9)
272 | 
273 |   tvars = tf.trainable_variables()
274 |   grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 0.5)
275 |   optimizer = op_func.apply_gradients(zip(grads, tvars), global_step=batch)
276 | 
277 |   tf.summary.scalar("loss", loss)
278 |   tf.summary.scalar("lr", learning_rate)
279 |   merged_summary = tf.summary.merge_all()
280 |   with tf.Session() as sess:
281 | 
282 |     tf.global_variables_initializer().run()
283 |     saver = tf.train.Saver()
284 |     print('Initialized!')
285 |     writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph)
286 | 
287 |     for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE):
288 |       offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE)
289 |       batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list))
290 |       batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list))
291 | 
292 |       feed_dict = {train_data_node: batch_data,
293 |                    train_labels_node: batch_labels}
294 | 
295 |       _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict)
296 | 
297 |       if step % EVAL_FREQUENCY == 0:
298 |         print('loss: %.6f' % los)
299 |         writer.add_summary(summary, step)
300 |     save_path = saver.save(sess, SAVE_MODEL)
301 | 
302 | def parse_args():
303 |     parser = argparse.ArgumentParser(description='YOLO demo')
304 |     parser.add_argument('--train', help='train the model', action='store_true')
305 |     parser.add_argument('--test', help='test the model', action='store_true')
306 |     parser.add_argument('--test_img_path', help='img path to test', type=str)
307 | 
308 |     args = parser.parse_args()
309 | 
310 |     return args
311 | if __name__ == '__main__':
312 |   args = parse_args()
313 |   if args.train and args.test:
314 |     print('Error: cannot train and test at the same time')
315 |   elif args.train:
316 |     tf.app.run()
317 |   else:
318 |     test(args.test_img_path)
319 | 


--------------------------------------------------------------------------------
/old_versions/very_tiny_yolov2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import gzip
  7 | import os
  8 | import sys
  9 | import time
 10 | import cv2
 11 | import numpy
 12 | from PIL import Image
 13 | from six.moves import urllib
 14 | from six.moves import xrange
 15 | import tensorflow as tf
 16 | 
 17 | IMAGE_SIZE = 224
 18 | NUM_CHANNELS = 3
 19 | PIXEL_DEPTH = 255
 20 | SEED = 66478
 21 | BATCH_SIZE = 1
 22 | NUM_EPOCHS = 100
 23 | S = 7
 24 | B = 2
 25 | CLASSES = 2
 26 | COORD_W = 5
 27 | NOOBJ_W = 0.5
 28 | PROB_THRESHOLD = 0.25
 29 | NMS_THRESHOLD = 0.5
 30 | TRAIN_SIZE = 122
 31 | alpha = 0.1
 32 | EVAL_FREQUENCY = 100
 33 | TRAIN_IMG_DIR = '/home/yy/train/'
 34 | TRAIN_LABEL_DIR = '/home/yy/labels/'
 35 | CLASSES_NAME = ["DaLai","NonDaLai"]
 36 | TEST_IMG_PATH = '/home/yy/109.jpg'
 37 | RES_DIR = '/home/yy/pred_decay1W/'
 38 | SAVE_MODEL = '/home/yy/tf_saver_models/model_conv5_epoch100_decay10000.ckpt'
 39 | SAVE_TENSORBOARD = '/home/yy/tensorboard'
 40 | TEST_MODEL = '/home/yy/tf_saver_models/model_conv5_epoch100_decay10000.ckpt'
 41 | 
 42 | 
 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS+2, 64], stddev=0.01, seed=SEED, dtype=tf.float32))
 44 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=tf.float32))
 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 47 | conv3_weights = tf.Variable(tf.truncated_normal([3, 3, 128, 256], stddev=0.01, seed=SEED, dtype=tf.float32))
 48 | conv3_biases = tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32))
 49 | conv4_weights = tf.Variable(tf.truncated_normal([3, 3, 256, 512], stddev=0.01, seed=SEED, dtype=tf.float32))
 50 | conv4_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32))
 51 | conv5_weights = tf.Variable(tf.truncated_normal([3, 3, 512, 1024], stddev=0.01, seed=SEED, dtype=tf.float32))
 52 | conv5_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32))
 53 | 
 54 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //1024  * 1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 55 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 56 | fc2_weights = tf.Variable(tf.truncated_normal([128, S*S*(B*5+CLASSES)], stddev=0.01, seed=SEED, dtype=tf.float32))
 57 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5+CLASSES)], dtype=tf.float32))
 58 | 
 59 | def model(data, train=False):
 60 |   conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
 61 |   conv_bias = tf.nn.bias_add(conv, conv1_biases)
 62 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 63 | 
 64 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 65 | 
 66 |   conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
 67 |   conv_bias = tf.nn.bias_add(conv, conv2_biases)
 68 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 69 | 
 70 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 71 | 
 72 |   conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
 73 |   conv_bias = tf.nn.bias_add(conv, conv3_biases)
 74 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 75 | 
 76 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 77 | 
 78 |   conv = tf.nn.conv2d(pool, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
 79 |   conv_bias = tf.nn.bias_add(conv, conv4_biases)
 80 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 81 | 
 82 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 83 | 
 84 |   conv = tf.nn.conv2d(pool, conv5_weights, strides=[1, 1, 1, 1], padding='SAME')
 85 |   conv_bias = tf.nn.bias_add(conv, conv5_biases)
 86 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 87 | 
 88 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 89 | 
 90 |   fc1_shape = pool.get_shape().as_list()
 91 |   reshape = tf.reshape(pool, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]])
 92 | 
 93 |   fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases
 94 |   fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden)
 95 | 
 96 |   return tf.matmul(fc1_out, fc2_weights) + fc2_biases
 97 | 
 98 | def nms(dets, thresh):
 99 |   """Non maximum suppression"""
100 |   x1 = dets[:, 0]
101 |   y1 = dets[:, 1]
102 |   x2 = dets[:, 2]
103 |   y2 = dets[:, 3]
104 |   scores = dets[:, 4]
105 | 
106 |   areas = (x2 - x1 + 1) * (y2 - y1 + 1)
107 |   order = scores.argsort()[::-1]
108 | 
109 |   keep = []
110 |   while order.size > 0:
111 |       i = order[0]
112 |       keep.append(i)
113 |       xx1 = numpy.maximum(x1[i], x1[order[1:]])
114 |       yy1 = numpy.maximum(y1[i], y1[order[1:]])
115 |       xx2 = numpy.minimum(x2[i], x2[order[1:]])
116 |       yy2 = numpy.minimum(y2[i], y2[order[1:]])
117 | 
118 |       w = numpy.maximum(0.0, xx2 - xx1 + 1)
119 |       h = numpy.maximum(0.0, yy2 - yy1 + 1)
120 |       inter = w * h
121 |       ovr = inter / (areas[i] + areas[order[1:]] - inter)
122 | 
123 |       inds = numpy.where(ovr <= thresh)[0]
124 |       order = order[inds + 1]
125 | 
126 |   return keep
127 | 
128 | def get_results(output):
129 |   results = []
130 |   classes = []
131 |   probs = numpy.ndarray(shape=[CLASSES,])
132 |   for p in range(B):
133 |     for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES):
134 |       for i in range(CLASSES):
135 |         probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i]
136 | 
137 |       cls_ind = probs.argsort()[::-1][0]
138 |       if probs[cls_ind] > PROB_THRESHOLD:
139 |         results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]])
140 |         classes.append(cls_ind)
141 | 
142 |   res = numpy.array(results).astype(numpy.float32)
143 |   if len(res) != 0:
144 |     keep = nms(res, NMS_THRESHOLD)
145 |     results_ = []
146 |     classes_ = []
147 |     for i in keep:
148 |       results_.append(results[i])
149 |       classes_.append(classes[i])
150 | 
151 |     return results_,classes_
152 |   else:
153 |     return [],[]
154 | 
155 | def show_results(img_path, results, classes):
156 |   img = cv2.imread(img_path).copy()
157 |   if len(results) != 0:
158 |     for i in range(len(results)):
159 |       x1 = int(results[i][0]*img.shape[1])
160 |       y1 = int(results[i][1]*img.shape[0])
161 |       x2 = int(results[i][2]*img.shape[1])
162 |       y2 = int(results[i][3]*img.shape[0])
163 |       score = results[i][4]
164 |       cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
165 |       cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2)
166 | 
167 |   cv2.imwrite(RES_DIR + img_path.split('/')[-1], img)
168 | 
169 | def get_next_minibatch(offset, path_list):
170 |   if offset+BATCH_SIZE > len(path_list):
171 |     # random.shuffle(path_list)
172 |     return path_list[:BATCH_SIZE]
173 |   else:
174 |     return path_list[offset:offset+BATCH_SIZE]
175 | 
176 | def extract_data_yolo(path_list, train=True):
177 |   if train:
178 |     data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2),dtype=numpy.float32)
179 | 
180 |     for i in range(len(path_list)):
181 |       for j in range(IMAGE_SIZE):
182 |           data[i,j,:,-2] = j/IMAGE_SIZE
183 | 
184 |     for i in range(len(path_list)):
185 |       for j in range(IMAGE_SIZE):
186 |           data[i,:,j,-1] = j/IMAGE_SIZE
187 | 
188 |     for i in range(len(path_list)):
189 |       img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg')
190 |       img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
191 |       data[i,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
192 |       data[i,:,:,:-2] = (data[i,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
193 |     
194 |     return data
195 |   else:
196 |     data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2), dtype=numpy.float32)
197 | 
198 |     for i in range(IMAGE_SIZE):
199 |         data[0,i,:,-2] = i/IMAGE_SIZE
200 | 
201 |     for i in range(IMAGE_SIZE):
202 |         data[0,:,i,-1] = i/IMAGE_SIZE
203 | 
204 |     img = Image.open(path_list)
205 |     img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
206 |     data[0,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
207 |     data[0,:,:,:-2] = (data[0,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
208 |     return data
209 | 
210 | def iou(box1,box2):
211 |   tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
212 |   lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
213 |   if tb < 0 or lr < 0 : intersection = 0
214 |   else : intersection =  tb*lr
215 |   return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
216 | 
217 | 
218 | def extract_labels_yolo(path_list):
219 |   labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32)
220 |   for i in range(labels.shape[0]):
221 |     for j in range(labels.shape[1]):
222 |       if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5:
223 |         labels[i][j] = 1.00001
224 |       else:
225 |         labels[i][j] = 0
226 |   for i in range(len(path_list)):
227 |     with open(TRAIN_LABEL_DIR + path_list[i] + '.txt',"r") as f:
228 |       lines = f.readlines()
229 |       for j in range(len(lines)):
230 |         data = lines[j].split()
231 |         col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
232 |         row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
233 |         grid_no = (row_no-1)*S+col_no
234 |         # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0])
235 |         labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1
236 |         for k in range(B):
237 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1])
238 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2])
239 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3])
240 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4])
241 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1
242 | 
243 |   return labels
244 | 
245 | def loss_func_yolo(output, label):
246 |   res = 0
247 | 
248 |   for i in range(BATCH_SIZE):
249 |     for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES):
250 |       res += COORD_W * tf.sign(label[i][j+2]) * (tf.square(output[i][j] - label[i][j]) + tf.square(output[i][j+1]-label[i][j+1]) + 
251 |                                                tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 
252 |                                                tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1))
253 | 
254 |       res += tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4]))
255 | 
256 |       res += NOOBJ_W * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4]))
257 | 
258 |       res += COORD_W * tf.sign(label[i][j+7]) * (tf.square(output[i][j+5] - label[i][j+5]) + tf.square(output[i][j+6]-label[i][j+6]) + 
259 |                                                tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 
260 |                                                tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1))
261 | 
262 |       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9]))
263 | 
264 |       res += NOOBJ_W * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9]))
265 | 
266 |       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11]))
267 | 
268 |   return res
269 | 
270 | def test(img):
271 |   with tf.Session() as sess:
272 |     tf.global_variables_initializer().run()
273 |     saver = tf.train.Saver()
274 |     saver.restore(sess, TEST_MODEL)
275 |     data = extract_data_yolo(img, False)
276 |     out = sess.run(model(data))
277 |     results,classes = get_results(out)
278 |     show_results(img, results, classes)
279 | 
280 | def main(argv=None):
281 |   num_epochs = NUM_EPOCHS
282 |   train_img_list = []
283 |   for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR):
284 |     for filename in filenames:
285 |       train_img_list.append(filename[:-4])
286 | 
287 |   train_data_node = tf.placeholder(
288 |       tf.float32,
289 |       shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS+2))
290 |   train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES)))
291 | 
292 |   logits = model(train_data_node, True)
293 |   loss = loss_func_yolo(logits, train_labels_node)
294 | 
295 |   regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
296 |                   tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
297 | 
298 |   loss += 5e-4 * regularizers
299 | 
300 |   batch = tf.Variable(0, dtype=tf.float32)
301 | 
302 |   learning_rate = tf.train.exponential_decay(
303 |       0.01,                
304 |       batch * BATCH_SIZE,  
305 |       10000,          
306 |       0.95,
307 |       staircase=True)
308 | 
309 |   op_func = tf.train.MomentumOptimizer(learning_rate,0.9)
310 | 
311 |   tvars = tf.trainable_variables()
312 |   grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 0.5)
313 |   optimizer = op_func.apply_gradients(zip(grads, tvars), global_step=batch)
314 | 
315 |   tf.summary.scalar("loss", loss)
316 |   tf.summary.scalar("lr", learning_rate)
317 |   merged_summary = tf.summary.merge_all()
318 |   with tf.Session() as sess:
319 | 
320 |     tf.global_variables_initializer().run()
321 |     saver = tf.train.Saver()
322 |     print('Initialized!')
323 |     writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph)
324 | 
325 |     for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE):
326 |       offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE)
327 |       batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list))
328 |       batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list))
329 | 
330 |       feed_dict = {train_data_node: batch_data,
331 |                    train_labels_node: batch_labels}
332 | 
333 |       _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict)
334 | 
335 |       if step % EVAL_FREQUENCY == 0:
336 |         print('loss: %.6f' % los)
337 |         #print('weight: %.5f' % sess.run(conv1_weights)[0,0,0,0])
338 |         writer.add_summary(summary, step)
339 |     save_path = saver.save(sess, SAVE_MODEL)
340 | 
341 | def parse_args():
342 |     parser = argparse.ArgumentParser(description='YOLO demo')
343 |     parser.add_argument('--train', help='train the model', action='store_true')
344 |     parser.add_argument('--test', help='test the model', action='store_true')
345 |     parser.add_argument('--test_img_path', help='img path to test', type=str)
346 | 
347 |     args = parser.parse_args()
348 | 
349 |     return args
350 | if __name__ == '__main__':
351 |   args = parse_args()
352 |   if args.train and args.test:
353 |     print('Error: cannot train and test at the same time')
354 |   elif args.train:
355 |     tf.app.run()
356 |   else:
357 |     test(args.test_img_path)
358 | 


--------------------------------------------------------------------------------
/old_versions/very_tiny_yolov3.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import gzip
  7 | import os
  8 | import sys
  9 | import time
 10 | import cv2
 11 | import numpy
 12 | from PIL import Image
 13 | from six.moves import urllib
 14 | from six.moves import xrange
 15 | import tensorflow as tf
 16 | 
 17 | IMAGE_SIZE = 224
 18 | NUM_CHANNELS = 3
 19 | PIXEL_DEPTH = 255
 20 | SEED = 66478
 21 | BATCH_SIZE = 1
 22 | NUM_EPOCHS = 100
 23 | S = 7
 24 | B = 2
 25 | CLASSES = 2
 26 | COORD_W = 5
 27 | NOOBJ_W = 0.5
 28 | PROB_THRESHOLD = 0.25
 29 | NMS_THRESHOLD = 0.5
 30 | TRAIN_SIZE = 122
 31 | alpha = 0.1
 32 | EVAL_FREQUENCY = 100
 33 | TRAIN_IMG_DIR = '/home/yy/train/'
 34 | TRAIN_LABEL_DIR = '/home/yy/labels/'
 35 | CLASSES_NAME = ["DaLai","NonDaLai"]
 36 | TEST_IMG_DIR = '/home/yy/train/'
 37 | TEST_LABEL_DIR = 'home/yy/labels/'
 38 | RES_DIR = '/home/yy/subnets/'
 39 | SAVE_MODEL = '/home/yy/tf_saver_models/model_subnets.ckpt'
 40 | SAVE_TENSORBOARD = '/home/yy/tensorboard'
 41 | 
 42 | 
 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS+2, 64], stddev=0.01, seed=SEED, dtype=tf.float32))
 44 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=tf.float32))
 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 47 | conv3_weights = tf.Variable(tf.truncated_normal([3, 3, 128, 256], stddev=0.01, seed=SEED, dtype=tf.float32))
 48 | conv3_biases = tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32))
 49 | conv4_weights = tf.Variable(tf.truncated_normal([3, 3, 256, 512], stddev=0.01, seed=SEED, dtype=tf.float32))
 50 | conv4_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32))
 51 | conv5_weights = tf.Variable(tf.truncated_normal([3, 3, 512, 1024], stddev=0.01, seed=SEED, dtype=tf.float32))
 52 | conv5_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32))
 53 | 
 54 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //1024  * 1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 55 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 56 | fc2_weights = tf.Variable(tf.truncated_normal([128, S*S*(B*5)], stddev=0.01, seed=SEED, dtype=tf.float32))
 57 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5)], dtype=tf.float32))
 58 | 
 59 | fc3_weights = tf.Variable(tf.truncated_normal([1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32))
 60 | fc3_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32))
 61 | fc4_weights = tf.Variable(tf.truncated_normal([128, S*S*CLASSES], stddev=0.01, seed=SEED, dtype=tf.float32))
 62 | fc4_biases = tf.Variable(tf.constant(0.1, shape=[S*S*CLASSES], dtype=tf.float32))
 63 | 
 64 | def model(data, train=False):
 65 |   conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
 66 |   conv_bias = tf.nn.bias_add(conv, conv1_biases)
 67 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 68 | 
 69 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 70 | 
 71 |   conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
 72 |   conv_bias = tf.nn.bias_add(conv, conv2_biases)
 73 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 74 | 
 75 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 76 | 
 77 |   conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
 78 |   conv_bias = tf.nn.bias_add(conv, conv3_biases)
 79 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 80 | 
 81 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 82 | 
 83 |   conv = tf.nn.conv2d(pool, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
 84 |   conv_bias = tf.nn.bias_add(conv, conv4_biases)
 85 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 86 | 
 87 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 88 | 
 89 |   conv = tf.nn.conv2d(pool, conv5_weights, strides=[1, 1, 1, 1], padding='SAME')
 90 |   conv_bias = tf.nn.bias_add(conv, conv5_biases)
 91 |   lrelu = tf.maximum(alpha*conv_bias, conv_bias)
 92 | 
 93 |   pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 94 | 
 95 |   fc1_shape = pool.get_shape().as_list()
 96 |   reshape = tf.reshape(pool, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]])
 97 | 
 98 |   fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases
 99 |   fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden)
100 | 
101 |   coors = tf.matmul(fc1_out, fc2_weights) + fc2_biases
102 | 
103 |   pool = tf.nn.avg_pool(lrelu, ksize=[1, IMAGE_SIZE*IMAGE_SIZE/1024, IMAGE_SIZE*IMAGE_SIZE/1024, 1], strides=[1, IMAGE_SIZE*IMAGE_SIZE/1024, IMAGE_SIZE*IMAGE_SIZE/1024, 1], padding='SAME')
104 |   
105 |   fc3_shape = pool.get_shape().as_list()
106 |   reshape = tf.reshape(pool, [fc3_shape[0], fc3_shape[1] * fc3_shape[2] * fc3_shape[3]])
107 | 
108 |   fc3_hidden = tf.matmul(reshape, fc3_weights) + fc3_biases
109 |   fc3_out = tf.maximum(alpha*fc3_hidden, fc3_hidden)
110 | 
111 |   probs = tf.matmul(fc3_out, fc4_weights) + fc4_biases
112 |   output = []
113 | 
114 |   for i in range(BATCH_SIZE):
115 |     for j in range(S*S):
116 |       for k in range(10):
117 |         output.append(coors[i,j*B*5+k])
118 |       for k in range(CLASSES):
119 |         output.append(probs[i,j*CLASSES+k])
120 |   output = tf.reshape(output, [BATCH_SIZE, S*S*(B*5+CLASSES)])
121 | 
122 |   return output
123 | 
124 | 
125 | def nms(dets, thresh):
126 |   """Non maximum suppression"""
127 |   x1 = dets[:, 0]
128 |   y1 = dets[:, 1]
129 |   x2 = dets[:, 2]
130 |   y2 = dets[:, 3]
131 |   scores = dets[:, 4]
132 | 
133 |   areas = (x2 - x1 + 1) * (y2 - y1 + 1)
134 |   order = scores.argsort()[::-1]
135 | 
136 |   keep = []
137 |   while order.size > 0:
138 |       i = order[0]
139 |       keep.append(i)
140 |       xx1 = numpy.maximum(x1[i], x1[order[1:]])
141 |       yy1 = numpy.maximum(y1[i], y1[order[1:]])
142 |       xx2 = numpy.minimum(x2[i], x2[order[1:]])
143 |       yy2 = numpy.minimum(y2[i], y2[order[1:]])
144 | 
145 |       w = numpy.maximum(0.0, xx2 - xx1 + 1)
146 |       h = numpy.maximum(0.0, yy2 - yy1 + 1)
147 |       inter = w * h
148 |       ovr = inter / (areas[i] + areas[order[1:]] - inter)
149 | 
150 |       inds = numpy.where(ovr <= thresh)[0]
151 |       order = order[inds + 1]
152 | 
153 |   return keep
154 | 
155 | def get_results(output):
156 |   results = []
157 |   classes = []
158 |   probs = numpy.ndarray(shape=[CLASSES,])
159 |   for p in range(B):
160 |     for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES):
161 |       for i in range(CLASSES):
162 |         probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i]
163 | 
164 |       cls_ind = probs.argsort()[::-1][0]
165 |       if probs[cls_ind] > PROB_THRESHOLD:
166 |         results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]])
167 |         classes.append(cls_ind)
168 | 
169 |   res = numpy.array(results).astype(numpy.float32)
170 |   if len(res) != 0:
171 |     keep = nms(res, NMS_THRESHOLD)
172 |     results_ = []
173 |     classes_ = []
174 |     for i in keep:
175 |       results_.append(results[i])
176 |       classes_.append(classes[i])
177 | 
178 |     return results_,classes_
179 |   else:
180 |     return [],[]
181 | 
182 | def show_results(img_path, results, classes):
183 |   img = cv2.imread(img_path).copy()
184 |   if len(results) != 0:
185 |     for i in range(len(results)):
186 |       x1 = int(results[i][0]*img.shape[1])
187 |       y1 = int(results[i][1]*img.shape[0])
188 |       x2 = int(results[i][2]*img.shape[1])
189 |       y2 = int(results[i][3]*img.shape[0])
190 |       score = results[i][4]
191 |       cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
192 |       cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2)
193 | 
194 |   cv2.imwrite(RES_DIR + img_path.split('/')[-1], img)
195 | 
196 | def get_next_minibatch(offset, path_list):
197 |   if offset+BATCH_SIZE > len(path_list):
198 |     # random.shuffle(path_list)
199 |     return path_list[:BATCH_SIZE]
200 |   else:
201 |     return path_list[offset:offset+BATCH_SIZE]
202 | 
203 | def extract_data_yolo(path_list, train=True):
204 |   if train:
205 |     data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2),dtype=numpy.float32)
206 | 
207 |     for i in range(len(path_list)):
208 |       for j in range(IMAGE_SIZE):
209 |           data[i,j,:,-2] = j/IMAGE_SIZE
210 | 
211 |     for i in range(len(path_list)):
212 |       for j in range(IMAGE_SIZE):
213 |           data[i,:,j,-1] = j/IMAGE_SIZE
214 | 
215 |     for i in range(len(path_list)):
216 |       img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg')
217 |       img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
218 |       data[i,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
219 |       data[i,:,:,:-2] = (data[i,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
220 |     
221 |     return data
222 |   else:
223 |     data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2), dtype=numpy.float32)
224 | 
225 |     for i in range(IMAGE_SIZE):
226 |         data[0,i,:,-2] = i/IMAGE_SIZE
227 | 
228 |     for i in range(IMAGE_SIZE):
229 |         data[0,:,i,-1] = i/IMAGE_SIZE
230 | 
231 |     img = Image.open(path_list)
232 |     img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE))
233 |     data[0,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
234 |     data[0,:,:,:-2] = (data[0,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
235 |     return data
236 | 
237 | def iou(box1,box2):
238 |   tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
239 |   lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
240 |   if tb < 0 or lr < 0 : intersection = 0
241 |   else : intersection =  tb*lr
242 |   return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
243 | 
244 | 
245 | def extract_labels_yolo(path_list, train=True):
246 |   if train:
247 |     root = TRAIN_LABEL_DIR
248 |   else:
249 |     root = TEST_LABEL_DIR
250 |   labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32)
251 |   for i in range(labels.shape[0]):
252 |     for j in range(labels.shape[1]):
253 |       if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5:
254 |         labels[i][j] = 1.00001
255 |       else:
256 |         labels[i][j] = 0
257 |   for i in range(len(path_list)):
258 |     with open(root + path_list[i] + '.txt',"r") as f:
259 |       lines = f.readlines()
260 |       for j in range(len(lines)):
261 |         data = lines[j].split()
262 |         col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
263 |         row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1)
264 |         grid_no = (row_no-1)*S+col_no
265 |         # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0])
266 |         labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1
267 |         for k in range(B):
268 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1])
269 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2])
270 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3])
271 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4])
272 |           labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1
273 | 
274 |   return labels
275 | 
276 | def loss_func_yolo(output, label):
277 |   res = 0
278 | 
279 |   for i in range(BATCH_SIZE):
280 |     for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES):
281 |       res += COORD_W * tf.sign(label[i][j+2]) * (tf.square(output[i][j] - label[i][j]) + tf.square(output[i][j+1]-label[i][j+1]) + 
282 |                                                tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 
283 |                                                tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1))
284 | 
285 |       res += tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4]))
286 | 
287 |       res += NOOBJ_W * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4]))
288 | 
289 |       res += COORD_W * tf.sign(label[i][j+7]) * (tf.square(output[i][j+5] - label[i][j+5]) + tf.square(output[i][j+6]-label[i][j+6]) + 
290 |                                                tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 
291 |                                                tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1))
292 | 
293 |       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9]))
294 | 
295 |       res += NOOBJ_W * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9]))
296 | 
297 |       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11]))
298 | 
299 |   return res
300 | 
301 | def test_from_img(img, test_model, display_loss=False):
302 |   with tf.Session() as sess:
303 |     tf.global_variables_initializer().run()
304 |     saver = tf.train.Saver()
305 |     saver.restore(sess, test_model)
306 |     data = extract_data_yolo(img, train=False)
307 |     out = sess.run(model(data))
308 |     if display_loss:
309 |       label = extract_labels_yolo([img], train=False)
310 |       print('loss: %.6f' % loss_func_yolo(out, label))
311 |     results,classes = get_results(out)
312 |     show_results(img, results, classes)
313 | 
314 | def test_from_dir(imgdir, test_model, display_loss=False):
315 |   with tf.Session() as sess:
316 |     tf.global_variables_initializer().run()
317 |     saver = tf.train.Saver()
318 |     saver.restore(sess, test_model)
319 |     if display_loss:
320 |       loss = 0
321 |       for root, dirs, files in os.walk(imgdir[:-1]):
322 |         for file in files:
323 |           img = os.path.join(root, file)
324 |           label = extract_labels_yolo([img], train=False)
325 |           data = extract_data_yolo(img, train=False)
326 |           out = sess.run(model(data))
327 |           loss += loss_func_yolo(out, label)
328 |           results,classes = get_results(out)
329 |           show_results(img, results, classes)
330 |       print('loss: %.6f' % loss)
331 |     else:
332 |       for root, dirs, files in os.walk(imgdir[:-1]):
333 |         for file in files:
334 |           img = os.path.join(root, file)
335 |           data = extract_data_yolo(img, train=False)
336 |           out = sess.run(model(data))
337 |           results,classes = get_results(out)
338 |           show_results(img, results, classes)
339 | 
340 | def main(argv=None):
341 |   num_epochs = NUM_EPOCHS
342 |   train_img_list = []
343 |   for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR):
344 |     for filename in filenames:
345 |       train_img_list.append(filename[:-4])
346 | 
347 |   train_data_node = tf.placeholder(
348 |       tf.float32,
349 |       shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS+2))
350 |   train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES)))
351 | 
352 |   logits = model(train_data_node, True)
353 |   loss = loss_func_yolo(logits, train_labels_node)
354 | 
355 |   regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
356 |                   tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
357 | 
358 |   loss += 5e-4 * regularizers
359 | 
360 |   batch = tf.Variable(0, dtype=tf.float32)
361 | 
362 |   learning_rate = tf.train.exponential_decay(
363 |       0.01,                
364 |       batch * BATCH_SIZE,  
365 |       10000,          
366 |       0.95,
367 |       staircase=True)
368 | 
369 |   op_func = tf.train.MomentumOptimizer(learning_rate,0.9)
370 | 
371 |   tvars = tf.trainable_variables()
372 |   grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 0.5)
373 |   optimizer = op_func.apply_gradients(zip(grads, tvars), global_step=batch)
374 | 
375 |   tf.summary.scalar("loss", loss)
376 |   tf.summary.scalar("lr", learning_rate)
377 |   merged_summary = tf.summary.merge_all()
378 |   with tf.Session() as sess:
379 | 
380 |     tf.global_variables_initializer().run()
381 |     saver = tf.train.Saver()
382 |     print('Initialized!')
383 |     writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph)
384 | 
385 |     for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE):
386 |       offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE)
387 |       batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list))
388 |       batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list))
389 | 
390 |       feed_dict = {train_data_node: batch_data,
391 |                    train_labels_node: batch_labels}
392 | 
393 |       _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict)
394 | 
395 |       if step % EVAL_FREQUENCY == 0:
396 |         print('loss: %.6f' % los)
397 |         #print('weight: %.5f' % sess.run(conv1_weights)[0,0,0,0])
398 |         writer.add_summary(summary, step)
399 |     save_path = saver.save(sess, SAVE_MODEL)
400 |     test_from_dir(TEST_IMG_DIR, display_loss=True)
401 | 
402 | def parse_args():
403 |     parser = argparse.ArgumentParser(description='YOLO demo')
404 |     parser.add_argument('--train', help='train the model', action='store_true')
405 |     parser.add_argument('--test', help='test the model', action='store_true')
406 |     parser.add_argument('--test_img_path', help='img path to test', type=str)
407 |     parser.add_argument('--display_loss', default=False, help='whether display the loss', action='store_true')
408 |     parser.add_argument('--test_model', help='model to test', type=str)
409 |     args = parser.parse_args()
410 | 
411 |     return args
412 | if __name__ == '__main__':
413 |   args = parse_args()
414 |   if args.train and args.test:
415 |     print('Error: cannot train and test at the same time')
416 |   elif args.train:
417 |     tf.app.run()
418 |   elif args.test_img_path[-1] == '/':
419 |     test_from_dir(args.test_img_path, args.test_model, args.display_loss)
420 |   else:
421 |     test_from_img(args.test_img_path, args.test_model, args.display_loss)
422 | 


--------------------------------------------------------------------------------
/some_tests/108.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/108.jpg


--------------------------------------------------------------------------------
/some_tests/109.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/109.jpg


--------------------------------------------------------------------------------
/some_tests/126.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/126.jpg


--------------------------------------------------------------------------------
/some_tests/152.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/152.jpg


--------------------------------------------------------------------------------
/some_tests/171.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/171.jpg


--------------------------------------------------------------------------------
/some_tests/173.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/173.jpg


--------------------------------------------------------------------------------
/yolo_v1.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gzip
  3 | import os
  4 | import sys
  5 | import time
  6 | import cv2
  7 | import numpy
  8 | import tensorflow as tf
  9 | from PIL import Image
 10 | from six.moves import urllib
 11 | from six.moves import xrange
 12 | from nets import nets_factory
 13 | 
 14 | 
 15 | flags = tf.app.flags
 16 | flags.DEFINE_integer("epoch", 25, "Epoch to train [25]")
 17 | flags.DEFINE_integer("S", 7, "cut the img to S*S grids[7]")
 18 | flags.DEFINE_integer("num_classes", 2, "number of classes [2]")
 19 | flags.DEFINE_integer("B", 2, "number of bboxs for one grid to predict [2]")
 20 | flags.DEFINE_float("learning_rate", 0.001, "Learning rate of for d network [0.0001]")
 21 | flags.DEFINE_float("alpha", 0.1, "alpha of leaky relu [0.1]")
 22 | flags.DEFINE_float("nms_threshold", 0.5, "threshold of nms [0.5]")
 23 | flags.DEFINE_float("prob_threshold", 0.25, "probablity threshold of test [0.25]")
 24 | flags.DEFINE_float("coordinate_weight", 5, "weight of coordinate regression in loss function [5]")
 25 | flags.DEFINE_float("noobj_weight", 0.5, "weight of confidence regression in loss function when there is no obj in grid [0.5]")
 26 | flags.DEFINE_integer("batch_size", 1, "The size of batch images [128]")
 27 | flags.DEFINE_integer("img_size", 224, "image size [224]")
 28 | flags.DEFINE_integer("channel_dim", 3, "Dimension of image color [3]")
 29 | flags.DEFINE_string("model_name", 'inception_v4', "which model to use")
 30 | flags.DEFINE_string("img_pattern", 'jpg', "jpg or png")
 31 | flags.DEFINE_integer("save_summary_step", 100, "save summary per [] steps [100]")
 32 | flags.DEFINE_integer("save_model_step", 100, "save model per [] steps [100]")
 33 | flags.DEFINE_integer("log_loss_step", 100, "log loss information per [] steps [100]")
 34 | flags.DEFINE_string("checkpoint_dir", '/home/yy/yolo_/ckpt', "Directory name to save the checkpoints")
 35 | flags.DEFINE_string("tensorboard_dir", '/home/yy/yolo_/tb', "Directory name to save the tensorboard")
 36 | flags.DEFINE_string("train_dir", '/home/yy/yolo_/train', "Directory name to train images")
 37 | flags.DEFINE_string("train_label", '/home/yy/yolo_/label', "Directory name to train labels")
 38 | flags.DEFINE_string("test_res_dir", None, "Directory name to save test images")
 39 | flags.DEFINE_string("test_data", None, "Directory name to test images")
 40 | flags.DEFINE_string("test_label", None, "Directory name to test labels")
 41 | flags.DEFINE_boolean("is_test", False, "True for testing, False for training [False]")
 42 | FLAGS = flags.FLAGS
 43 | 
 44 | slim = tf.contrib.slim
 45 | CLASSES_NAME = ["DaLai","NonDaLai"]
 46 | 
 47 | 
 48 | def nms(dets, thresh):
 49 |   """Non maximum suppression"""
 50 |   """code from rbg/py-faster-rcnn"""
 51 |   x1 = dets[:, 0]
 52 |   y1 = dets[:, 1]
 53 |   x2 = dets[:, 2]
 54 |   y2 = dets[:, 3]
 55 |   scores = dets[:, 4]
 56 | 
 57 |   areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 58 |   order = scores.argsort()[::-1]
 59 | 
 60 |   keep = []
 61 |   while order.size > 0:
 62 |     i = order[0]
 63 |     keep.append(i)
 64 |     xx1 = numpy.maximum(x1[i], x1[order[1:]])
 65 |     yy1 = numpy.maximum(y1[i], y1[order[1:]])
 66 |     xx2 = numpy.minimum(x2[i], x2[order[1:]])
 67 |     yy2 = numpy.minimum(y2[i], y2[order[1:]])
 68 | 
 69 |     w = numpy.maximum(0.0, xx2 - xx1 + 1)
 70 |     h = numpy.maximum(0.0, yy2 - yy1 + 1)
 71 |     inter = w * h
 72 |     ovr = inter / (areas[i] + areas[order[1:]] - inter)
 73 | 
 74 |     inds = numpy.where(ovr <= thresh)[0]
 75 |     order = order[inds + 1]
 76 | 
 77 |   return keep
 78 | 
 79 | def get_results(output):
 80 |   results = []
 81 |   classes = []
 82 |   probs = numpy.ndarray(shape=[FLAGS.num_classes,])
 83 |   for p in range(FLAGS.B):
 84 |     for j in range(4 + p*5, FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes), FLAGS.B*5+FLAGS.num_classes):
 85 |       for i in range(FLAGS.num_classes):
 86 |         probs[i] = output[0][j] * output[0][j + 1+ (FLAGS.B-1-p)*5 + i]
 87 | 
 88 |       cls_ind = probs.argsort()[::-1][0]
 89 |       if probs[cls_ind] > FLAGS.prob_threshold:
 90 |         results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]])
 91 |         classes.append(cls_ind)
 92 | 
 93 |   res = numpy.array(results).astype(numpy.float32)
 94 |   if len(res) != 0:
 95 |     keep = nms(res, FLAGS.nms_threshold)
 96 |     results_ = []
 97 |     classes_ = []
 98 |     for i in keep:
 99 |       results_.append(results[i])
100 |       classes_.append(classes[i])
101 | 
102 |     return results_,classes_
103 |   else:
104 |     return [],[]
105 | 
106 | def show_results(img_path, results, classes):
107 |   img = cv2.imread(img_path).copy()
108 |   if len(results) != 0:
109 |     for i in range(len(results)):
110 |       x1 = int(results[i][0]*img.shape[1])
111 |       y1 = int(results[i][1]*img.shape[0])
112 |       x2 = int(results[i][2]*img.shape[1])
113 |       y2 = int(results[i][3]*img.shape[0])
114 |       score = results[i][4]
115 |       cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
116 |       cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2)
117 | 
118 |   cv2.imwrite(FLAGS.test_res_dir + '/' + img_path.split('/')[-1], img)
119 | 
120 | def get_next_minibatch(offset, path_list):
121 |   if offset+FLAGS.batch_size > len(path_list):
122 |     random.shuffle(path_list)
123 |     return path_list[:FLAGS.batch_size]
124 |   else:
125 |     return path_list[offset:offset+FLAGS.batch_size]
126 | 
127 | def extract_data_yolo(path_list, train=True):
128 |   if train:
129 |     data = numpy.ndarray(shape=(len(path_list),FLAGS.img_size,FLAGS.img_size,FLAGS.channel_dim),dtype=numpy.float32)
130 | 
131 |     for i in range(len(path_list)):
132 |       img = Image.open(FLAGS.train_dir+'/'+path_list[i]+'.'+FLAGS.img_pattern)
133 |       img_resize = img.resize((FLAGS.img_size,FLAGS.img_size))
134 |       data[i] = numpy.array(img_resize).astype(numpy.float32).reshape(FLAGS.img_size,FLAGS.img_size,FLAGS.channel_dim)
135 | 
136 |     data = (data - 127.5) / 127.5
137 |     return data
138 |   else:
139 |     data = numpy.ndarray(shape=(1,FLAGS.img_size,FLAGS.img_size,FLAGS.channel_dim), dtype=numpy.float32)
140 |     img = Image.open(path_list)
141 |     img_resize = img.resize((FLAGS.img_size,FLAGS.img_size))
142 |     data = numpy.array(img_resize).astype(numpy.float32).reshape(1,FLAGS.img_size,FLAGS.img_size,FLAGS.channel_dim)
143 |     data = (data - 127.5) / 127.5
144 |     return data
145 | 
146 | def iou(box1,box2):
147 |   tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
148 |   lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
149 |   if tb < 0 or lr < 0 : intersection = 0
150 |   else : intersection =  tb*lr
151 |   return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
152 | 
153 | 
154 | def extract_labels_yolo(path_list, train=True):
155 |   if train:
156 |     root = FLAGS.train_label
157 |   else:
158 |     root = FLAGS.test_labels
159 |   labels = numpy.ndarray(shape=(len(path_list),FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes)), dtype=numpy.float32)
160 |   for i in range(labels.shape[0]):
161 |     for j in range(labels.shape[1]):
162 |       if j%(FLAGS.B*5+FLAGS.num_classes) == 0 or j%(FLAGS.B*5+FLAGS.num_classes) == 5:
163 |         labels[i][j] = 1.00001
164 |       else:
165 |         labels[i][j] = 0
166 |   for i in range(len(path_list)):
167 |     with open(root + '/' + path_list[i] + '.txt',"r") as f:
168 |       lines = f.readlines()
169 |       for j in range(len(lines)):
170 |         data = lines[j].split()
171 |         col_no = int(float(data[1])*FLAGS.img_size/(FLAGS.img_size/FLAGS.S)+1)
172 |         row_no = int(float(data[2])*FLAGS.img_size/(FLAGS.img_size/FLAGS.S)+1)
173 |         grid_no = (row_no-1)*FLAGS.S+col_no
174 |         # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0])
175 |         labels[i,(FLAGS.B*5+FLAGS.num_classes)*grid_no-FLAGS.num_classes + int(data[0])] = 1
176 |         for k in range(FLAGS.B):
177 |           labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k] = float(data[1])
178 |           labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k + 1] = float(data[2])
179 |           labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k + 2] = float(data[3])
180 |           labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k + 3] = float(data[4])
181 |           labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k + 4] = 1
182 | 
183 |   return labels
184 | 
185 | def loss_func_yolo(output, label):
186 |   res = 0
187 | 
188 |   for i in range(FLAGS.batch_size):
189 |     for j in range(0, FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes), FLAGS.B*5+FLAGS.num_classes):
190 |       highest_bbox = output[i][j+4]-output[i][j+9]
191 |       """here we only compute the loss of bbox which have the highest confidence"""
192 |       """we use tf.sign(tf.maximum(highest_bbox,0)) to do that"""
193 | 
194 |       res += FLAGS.coordinate_weight * tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(label[i][j+2]) * (
195 |                                                              tf.square(output[i][j] - label[i][j]) + 
196 |                                                              tf.square(output[i][j+1]-label[i][j+1]) + 
197 |                                                              tf.square(tf.sqrt(output[i][j+2])-tf.sqrt(label[i][j+2])) + 
198 |                                                              tf.square(tf.sqrt(output[i][j+3])-tf.sqrt(label[i][j+3])))
199 | 
200 |       res += tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4]))
201 | 
202 |       res += FLAGS.noobj_weight * tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4]))
203 | 
204 |       res += FLAGS.coordinate_weight * tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(label[i][j+7]) * (
205 |                                                               tf.square(output[i][j+5] - label[i][j+5]) + 
206 |                                                               tf.square(output[i][j+6]-label[i][j+6]) + 
207 |                                                               tf.square(tf.sqrt(output[i][j+7])-tf.sqrt(label[i][j+7])) + 
208 |                                                               tf.square(tf.sqrt(output[i][j+8])-tf.sqrt(label[i][j+8])))
209 | 
210 |       res += tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9]))
211 | 
212 |       res += FLAGS.noobj_weight * tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9]))
213 | 
214 |       res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11]))
215 | 
216 |   return res/FLAGS.batch_size
217 | 
218 | def test_from_dir(imgdir,display_loss=False):
219 |   network_fn = nets_factory.get_network_fn(FLAGS.model_name,
220 |     FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes),
221 |     is_training=False)
222 |   with tf.Session() as sess:
223 |     tf.global_variables_initializer().run()
224 |     saver = tf.train.Saver()
225 |     print("Reading checkpoints...")
226 | 
227 |     ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
228 |     if ckpt and ckpt.model_checkpoint_path:
229 |       ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
230 |       saver.restore(sess, os.path.join(FLAGS.checkpoint_dir, ckpt_name))
231 |       print("Success to read {}".format(ckpt_name))
232 |     else:
233 |       print("Failed to find a checkpoint")
234 | 
235 |     #saver.restore(sess, FLAGS.checkpoint_dir)
236 | 
237 |     for root, dirs, files in os.walk(imgdir):
238 |       for file in files:
239 |         img = os.path.join(root, file)
240 |         data = extract_data_yolo(img, train=False)
241 |         output,_ = network_fn(data)
242 |         out = sess.run(output)
243 |         results,classes = get_results(out)
244 |         show_results(img, results, classes)
245 | 
246 | def main(_):
247 |   train_img_list = []
248 |   for rt,dirs,filenames in os.walk(FLAGS.train_dir):
249 |     for filename in filenames:
250 |       train_img_list.append(filename[:-4])
251 | 
252 |   train_size = len(train_img_list)
253 |   numpy.random.shuffle(train_img_list)
254 |   train_data_node = tf.placeholder(
255 |     tf.float32,
256 |     shape=(FLAGS.batch_size, FLAGS.img_size, FLAGS.img_size, FLAGS.channel_dim))
257 |   train_labels_node = tf.placeholder(tf.float32,
258 |     shape=(FLAGS.batch_size, FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes)))
259 | 
260 |   network_fn = nets_factory.get_network_fn(FLAGS.model_name,
261 |     FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes),
262 |     is_training=True)
263 | 
264 |   logits,_ = network_fn(train_data_node)
265 |   logtis = tf.nn.sigmoid(logits)
266 |   loss = loss_func_yolo(logits, train_labels_node)
267 | 
268 |   batch = slim.create_global_step()
269 | 
270 |   optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss, global_step=batch)
271 | 
272 |   tf.summary.scalar("loss", loss)
273 |   merged_summary = tf.summary.merge_all()
274 |   with tf.Session() as sess:
275 | 
276 |     tf.global_variables_initializer().run()
277 |     saver = tf.train.Saver()
278 |     print('Initialized!')
279 |     writer = tf.summary.FileWriter(FLAGS.tensorboard_dir, sess.graph)
280 | 
281 |     print("loding models...")
282 |     ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
283 |     if ckpt and ckpt.model_checkpoint_path:
284 |       ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
285 |       saver.restore(sess, os.path.join(FLAGS.checkpoint_dir, ckpt_name))
286 |       print("Success to load {}".format(ckpt_name))
287 |     else:
288 |       print("Failed to find a checkpoint")
289 | 
290 |     start_time = time.time()
291 |     for step in xrange(int(FLAGS.epoch * train_size) // FLAGS.batch_size):
292 |       offset = (step * FLAGS.batch_size) % (train_size - FLAGS.batch_size)
293 |       batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list))
294 |       batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list))
295 | 
296 |       feed_dict = {train_data_node: batch_data,
297 |                    train_labels_node: batch_labels}
298 | 
299 |       _, los, summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict)
300 | 
301 |       if step%FLAGS.log_loss_step == 0:
302 |         end_time = time.time()
303 |         print('loss: %.6f time: %.2f' % (los, end_time-start_time))
304 |         start_time = time.time()
305 |       if step%FLAGS.save_summary_step == 0:
306 |         writer.add_summary(summary, step)
307 |       if step%FLAGS.save_model_step == 0:
308 |         save_path = saver.save(sess, os.path.join(FLAGS.checkpoint_dir, "yolo.model"), global_step=step)
309 | 
310 | if __name__ == '__main__':
311 |   if not FLAGS.is_test:
312 |     tf.app.run()
313 |   else:
314 |     test_from_dir(FLAGS.test_data)
315 | 


--------------------------------------------------------------------------------