├── README.md ├── old_versions ├── YOLO_tiny.py ├── tiny_yolo.py ├── very_tiny_yolo.py ├── very_tiny_yolov2.py └── very_tiny_yolov3.py ├── some_tests ├── 108.jpg ├── 109.jpg ├── 126.jpg ├── 152.jpg ├── 171.jpg └── 173.jpg └── yolo_v1.py /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv1_tf 2 | A simple Tensorflow implementation of [You Only Look Once:Unified, Real-Time Object Detection](https://arxiv.org/pdf/1506.02640.pdf) 3 | for face detection and recognition 4 | # Results 5 | ![](/some_tests/108.jpg) ![](/some_tests/109.jpg) 6 | ![](/some_tests/126.jpg) ![](/some_tests/152.jpg) 7 | ![](/some_tests/171.jpg) ![](/some_tests/173.jpg) 8 | -------------------------------------------------------------------------------- /old_versions/YOLO_tiny.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import gzip 7 | import os 8 | import sys 9 | import time 10 | import cv2 11 | import numpy 12 | from PIL import Image 13 | from six.moves import urllib 14 | from six.moves import xrange 15 | import tensorflow as tf 16 | 17 | IMAGE_SIZE = 224 18 | NUM_CHANNELS = 3 19 | PIXEL_DEPTH = 255 20 | SEED = 66478 21 | BATCH_SIZE = 2 22 | NUM_EPOCHS = 200 23 | S = 7 24 | B = 2 25 | CLASSES = 2 26 | COORD_W = 5 27 | NOOBJ_W = 0.5 28 | PROB_THRESHOLD = 0.25 29 | NMS_THRESHOLD = 0.5 30 | TRAIN_SIZE = 122 31 | alpha = 0.1 32 | EVAL_FREQUENCY = 100 33 | TRAIN_IMG_DIR = '/home/yy/train/' 34 | TRAIN_LABEL_DIR = '/home/yy/labels/' 35 | CLASSES_NAME = ["DaLai","NonDaLai"] 36 | TEST_IMG_DIR = '/home/yy/test1/' 37 | TEST_LABEL_DIR = 'home/yy/labels/' 38 | RES_DIR = '/home/yy/subnets2/' 39 | SAVE_MODEL = '/home/yy/tf_saver_models/model_yolo4.ckpt' 40 | SAVE_TENSORBOARD = '/home/yy/tensorboard' 41 | 42 | 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS+2, 64], stddev=0.01, seed=SEED, dtype=tf.float32)) 44 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=tf.float32)) 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 47 | conv3_weights = tf.Variable(tf.truncated_normal([3, 3, 128, 256], stddev=0.01, seed=SEED, dtype=tf.float32)) 48 | conv3_biases = tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32)) 49 | conv4_weights = tf.Variable(tf.truncated_normal([3, 3, 256, 512], stddev=0.01, seed=SEED, dtype=tf.float32)) 50 | conv4_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32)) 51 | conv5_weights = tf.Variable(tf.truncated_normal([3, 3, 512, 1024], stddev=0.01, seed=SEED, dtype=tf.float32)) 52 | conv5_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32)) 53 | 54 | """separate fc layer to fc1fc2 for coordinate regression and fc3fc4 for classify regression""" 55 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //1024 * 1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 56 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 57 | fc2_weights = tf.Variable(tf.truncated_normal([128, S*S*(B*5)], stddev=0.01, seed=SEED, dtype=tf.float32)) 58 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5)], dtype=tf.float32)) 59 | 60 | fc3_weights = tf.Variable(tf.truncated_normal([1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 61 | fc3_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 62 | fc4_weights = tf.Variable(tf.truncated_normal([128, S*S*CLASSES], stddev=0.01, seed=SEED, dtype=tf.float32)) 63 | fc4_biases = tf.Variable(tf.constant(0.1, shape=[S*S*CLASSES], dtype=tf.float32)) 64 | 65 | def model(data): 66 | conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') 67 | conv_bias = tf.nn.bias_add(conv, conv1_biases) 68 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 69 | 70 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 71 | 72 | conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') 73 | conv_bias = tf.nn.bias_add(conv, conv2_biases) 74 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 75 | 76 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 77 | 78 | conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') 79 | conv_bias = tf.nn.bias_add(conv, conv3_biases) 80 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 81 | 82 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 83 | 84 | conv = tf.nn.conv2d(pool, conv4_weights, strides=[1, 1, 1, 1], padding='SAME') 85 | conv_bias = tf.nn.bias_add(conv, conv4_biases) 86 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 87 | 88 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 89 | 90 | conv = tf.nn.conv2d(pool, conv5_weights, strides=[1, 1, 1, 1], padding='SAME') 91 | conv_bias = tf.nn.bias_add(conv, conv5_biases) 92 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 93 | 94 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 95 | 96 | fc1_shape = pool.get_shape().as_list() 97 | reshape = tf.reshape(pool, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]]) 98 | 99 | fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases 100 | fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden) 101 | 102 | coors = tf.sigmoid(tf.matmul(fc1_out, fc2_weights) + fc2_biases) 103 | 104 | pool = tf.nn.avg_pool(lrelu, ksize=[1, IMAGE_SIZE*IMAGE_SIZE/1024, IMAGE_SIZE*IMAGE_SIZE/1024, 1], strides=[1, IMAGE_SIZE*IMAGE_SIZE/1024, IMAGE_SIZE*IMAGE_SIZE/1024, 1], padding='SAME') 105 | 106 | fc3_shape = pool.get_shape().as_list() 107 | reshape = tf.reshape(pool, [fc3_shape[0], fc3_shape[1] * fc3_shape[2] * fc3_shape[3]]) 108 | 109 | fc3_hidden = tf.matmul(reshape, fc3_weights) + fc3_biases 110 | fc3_out = tf.maximum(alpha*fc3_hidden, fc3_hidden) 111 | 112 | probs = tf.sigmoid(tf.matmul(fc3_out, fc4_weights) + fc4_biases) 113 | output = [] 114 | 115 | for i in range(BATCH_SIZE): 116 | for j in range(S*S): 117 | for k in range(10): 118 | output.append(coors[i,j*B*5+k]) 119 | for k in range(CLASSES): 120 | output.append(probs[i,j*CLASSES+k]) 121 | output = tf.reshape(output, [BATCH_SIZE, S*S*(B*5+CLASSES)]) 122 | 123 | return output 124 | 125 | 126 | def nms(dets, thresh): 127 | """Non maximum suppression""" 128 | x1 = dets[:, 0] 129 | y1 = dets[:, 1] 130 | x2 = dets[:, 2] 131 | y2 = dets[:, 3] 132 | scores = dets[:, 4] 133 | 134 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 135 | order = scores.argsort()[::-1] 136 | 137 | keep = [] 138 | while order.size > 0: 139 | i = order[0] 140 | keep.append(i) 141 | xx1 = numpy.maximum(x1[i], x1[order[1:]]) 142 | yy1 = numpy.maximum(y1[i], y1[order[1:]]) 143 | xx2 = numpy.minimum(x2[i], x2[order[1:]]) 144 | yy2 = numpy.minimum(y2[i], y2[order[1:]]) 145 | 146 | w = numpy.maximum(0.0, xx2 - xx1 + 1) 147 | h = numpy.maximum(0.0, yy2 - yy1 + 1) 148 | inter = w * h 149 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 150 | 151 | inds = numpy.where(ovr <= thresh)[0] 152 | order = order[inds + 1] 153 | 154 | return keep 155 | 156 | def get_results(output): 157 | results = [] 158 | classes = [] 159 | probs = numpy.ndarray(shape=[CLASSES,]) 160 | for p in range(B): 161 | for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES): 162 | for i in range(CLASSES): 163 | probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i] 164 | 165 | cls_ind = probs.argsort()[::-1][0] 166 | if probs[cls_ind] > PROB_THRESHOLD: 167 | results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]]) 168 | classes.append(cls_ind) 169 | 170 | res = numpy.array(results).astype(numpy.float32) 171 | if len(res) != 0: 172 | keep = nms(res, NMS_THRESHOLD) 173 | results_ = [] 174 | classes_ = [] 175 | for i in keep: 176 | results_.append(results[i]) 177 | classes_.append(classes[i]) 178 | 179 | return results_,classes_ 180 | else: 181 | return [],[] 182 | 183 | def show_results(img_path, results, classes): 184 | img = cv2.imread(img_path).copy() 185 | if len(results) != 0: 186 | for i in range(len(results)): 187 | x1 = int(results[i][0]*img.shape[1]) 188 | y1 = int(results[i][1]*img.shape[0]) 189 | x2 = int(results[i][2]*img.shape[1]) 190 | y2 = int(results[i][3]*img.shape[0]) 191 | score = results[i][4] 192 | cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2) 193 | cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2) 194 | 195 | cv2.imwrite(RES_DIR + img_path.split('/')[-1], img) 196 | 197 | def get_next_minibatch(offset, path_list): 198 | if offset+BATCH_SIZE > len(path_list): 199 | random.shuffle(path_list) 200 | return path_list[:BATCH_SIZE] 201 | else: 202 | return path_list[offset:offset+BATCH_SIZE] 203 | 204 | def extract_data_yolo(path_list, train=True): 205 | if train: 206 | data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2),dtype=numpy.float32) 207 | 208 | """add original position information""" 209 | for i in range(len(path_list)): 210 | for j in range(IMAGE_SIZE): 211 | data[i,j,:,-2] = j 212 | 213 | for i in range(len(path_list)): 214 | for j in range(IMAGE_SIZE): 215 | data[i,:,j,-1] = j 216 | 217 | for i in range(len(path_list)): 218 | img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg') 219 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 220 | data[i,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 221 | 222 | return data 223 | else: 224 | data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2), dtype=numpy.float32) 225 | 226 | for i in range(IMAGE_SIZE): 227 | data[0,i,:,-2] = i 228 | 229 | for i in range(IMAGE_SIZE): 230 | data[0,:,i,-1] = i 231 | 232 | img = Image.open(path_list) 233 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 234 | data[0,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 235 | return data 236 | 237 | def iou(box1,box2): 238 | tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2]) 239 | lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3]) 240 | if tb < 0 or lr < 0 : intersection = 0 241 | else : intersection = tb*lr 242 | return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection) 243 | 244 | 245 | def extract_labels_yolo(path_list, train=True): 246 | if train: 247 | root = TRAIN_LABEL_DIR 248 | else: 249 | root = TEST_LABEL_DIR 250 | labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32) 251 | for i in range(labels.shape[0]): 252 | for j in range(labels.shape[1]): 253 | if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5: 254 | labels[i][j] = 1.00001 255 | else: 256 | labels[i][j] = 0 257 | for i in range(len(path_list)): 258 | with open(root + path_list[i] + '.txt',"r") as f: 259 | lines = f.readlines() 260 | for j in range(len(lines)): 261 | data = lines[j].split() 262 | col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 263 | row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 264 | grid_no = (row_no-1)*S+col_no 265 | # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0]) 266 | labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1 267 | for k in range(B): 268 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1]) 269 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2]) 270 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3]) 271 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4]) 272 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1 273 | 274 | return labels 275 | 276 | def loss_func_yolo(output, label): 277 | res = 0 278 | 279 | for i in range(BATCH_SIZE): 280 | for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES): 281 | highest_bbox = output[i][j+4]-output[i][j+9] 282 | """here we only compute the loss of bbox which have the highest confidence""" 283 | """we use tf.sign(tf.maximum(highest_bbox,0)) to do that""" 284 | 285 | res += COORD_W * tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(label[i][j+2]) * ( 286 | tf.square(output[i][j] - label[i][j]) + 287 | tf.square(output[i][j+1]-label[i][j+1]) + 288 | tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 289 | tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1)) 290 | 291 | res += tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4])) 292 | 293 | res += NOOBJ_W * tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4])) 294 | 295 | res += COORD_W * tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(label[i][j+7]) * ( 296 | tf.square(output[i][j+5] - label[i][j+5]) + 297 | tf.square(output[i][j+6]-label[i][j+6]) + 298 | tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 299 | tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1)) 300 | 301 | res += tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9])) 302 | 303 | res += NOOBJ_W * tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9])) 304 | 305 | res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11])) 306 | 307 | return res/BATCH_SIZE 308 | 309 | # def loss_func_yolo(output, label): 310 | # res = 0 311 | 312 | # for i in range(BATCH_SIZE): 313 | # for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES): 314 | # res += COORD_W * tf.sign(label[i][j+2]) * (tf.square(output[i][j] - label[i][j]) + tf.square(output[i][j+1]-label[i][j+1]) + 315 | # tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 316 | # tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1)) 317 | 318 | # res += tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4])) 319 | 320 | # res += NOOBJ_W * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4])) 321 | 322 | # res += COORD_W * tf.sign(label[i][j+7]) * (tf.square(output[i][j+5] - label[i][j+5]) + tf.square(output[i][j+6]-label[i][j+6]) + 323 | # tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 324 | # tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1)) 325 | 326 | # res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9])) 327 | 328 | # res += NOOBJ_W * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9])) 329 | 330 | # res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11])) 331 | 332 | # return res 333 | 334 | def test_from_img(img, test_model, display_loss=False): 335 | with tf.Session() as sess: 336 | tf.global_variables_initializer().run() 337 | saver = tf.train.Saver() 338 | saver.restore(sess, test_model) 339 | data = extract_data_yolo(img, train=False) 340 | out = sess.run(model(data)) 341 | if display_loss: 342 | label = extract_labels_yolo([img], train=False) 343 | print('loss: %.6f' % loss_func_yolo(out, label)) 344 | results,classes = get_results(out) 345 | show_results(img, results, classes) 346 | 347 | def test_from_dir(imgdir, test_model, display_loss=False): 348 | with tf.Session() as sess: 349 | tf.global_variables_initializer().run() 350 | saver = tf.train.Saver() 351 | saver.restore(sess, test_model) 352 | if display_loss: 353 | loss = 0 354 | for root, dirs, files in os.walk(imgdir[:-1]): 355 | for file in files: 356 | img = os.path.join(root, file) 357 | label = extract_labels_yolo([img], train=False) 358 | data = extract_data_yolo(img, train=False) 359 | out = sess.run(model(data)) 360 | loss += loss_func_yolo(out, label) 361 | results,classes = get_results(out) 362 | show_results(img, results, classes) 363 | print('loss: %.6f' % loss) 364 | else: 365 | for root, dirs, files in os.walk(imgdir[:-1]): 366 | for file in files: 367 | img = os.path.join(root, file) 368 | data = extract_data_yolo(img, train=False) 369 | out = sess.run(model(data)) 370 | results,classes = get_results(out) 371 | show_results(img, results, classes) 372 | 373 | def preprocessing(imgs): 374 | res = [] 375 | for i in range(BATCH_SIZE): 376 | res.append(tf.image.per_image_standardization(imgs[i])) 377 | return tf.stack(res) 378 | 379 | def main(argv=None): 380 | num_epochs = NUM_EPOCHS 381 | train_img_list = [] 382 | for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR): 383 | for filename in filenames: 384 | train_img_list.append(filename[:-4]) 385 | 386 | numpy.random.shuffle(train_img_list) 387 | train_data_node = tf.placeholder( 388 | tf.float32, 389 | shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS+2)) 390 | train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES))) 391 | 392 | train_data_node = preprocessing(train_data_node) 393 | logits = model(train_data_node) 394 | loss = loss_func_yolo(logits, train_labels_node) 395 | 396 | regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + 397 | tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases) + 398 | tf.nn.l2_loss(fc3_weights) + tf.nn.l2_loss(fc3_biases) + 399 | tf.nn.l2_loss(fc4_weights) + tf.nn.l2_loss(fc4_biases)) 400 | 401 | loss += 5e-4 * regularizers 402 | 403 | batch = tf.Variable(0, dtype=tf.float32) 404 | 405 | learning_rate = tf.train.exponential_decay( 406 | 0.001, 407 | batch * BATCH_SIZE, 408 | 10000, 409 | 0.95, 410 | staircase=True) 411 | 412 | optimizer = tf.train.MomentumOptimizer(learning_rate,0.9).minimize(loss, global_step=batch) 413 | 414 | tf.summary.scalar("loss", loss) 415 | tf.summary.scalar("lr", learning_rate) 416 | merged_summary = tf.summary.merge_all() 417 | with tf.Session() as sess: 418 | 419 | tf.global_variables_initializer().run() 420 | saver = tf.train.Saver() 421 | print('Initialized!') 422 | writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph) 423 | 424 | for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE): 425 | offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE) 426 | batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list)) 427 | batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list)) 428 | 429 | feed_dict = {train_data_node: batch_data, 430 | train_labels_node: batch_labels} 431 | 432 | _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict) 433 | 434 | if step % EVAL_FREQUENCY == 0: 435 | print('loss: %.6f' % los) 436 | #print('weight: %.5f' % sess.run(conv1_weights)[0,0,0,0]) 437 | writer.add_summary(summary, step) 438 | save_path = saver.save(sess, SAVE_MODEL) 439 | 440 | def parse_args(): 441 | parser = argparse.ArgumentParser(description='YOLO demo') 442 | parser.add_argument('--train', help='train the model', action='store_true') 443 | parser.add_argument('--test', help='test the model', action='store_true') 444 | parser.add_argument('--test_img_path', help='img path to test', type=str) 445 | parser.add_argument('--display_loss', default=False, help='whether display the loss', action='store_true') 446 | parser.add_argument('--test_model', help='model to test', type=str) 447 | args = parser.parse_args() 448 | 449 | return args 450 | if __name__ == '__main__': 451 | args = parse_args() 452 | if args.train and args.test: 453 | print('Error: cannot train and test at the same time') 454 | elif args.train: 455 | tf.app.run() 456 | elif args.test_img_path[-1] == '/': 457 | test_from_dir(args.test_img_path, args.test_model, args.display_loss) 458 | else: 459 | test_from_img(args.test_img_path, args.test_model, args.display_loss) 460 | -------------------------------------------------------------------------------- /old_versions/tiny_yolo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import gzip 7 | import os 8 | import sys 9 | import time 10 | import cv2 11 | import numpy 12 | from PIL import Image 13 | from six.moves import urllib 14 | from six.moves import xrange 15 | import tensorflow as tf 16 | 17 | IMAGE_SIZE = 224 18 | NUM_CHANNELS = 3 19 | PIXEL_DEPTH = 255 20 | SEED = 66478 21 | BATCH_SIZE = 1 22 | NUM_EPOCHS = 100 23 | S = 7 24 | B = 2 25 | CLASSES = 2 26 | COORD_W = 5 27 | NOOBJ_W = 0.5 28 | PROB_THRESHOLD = 0.25 29 | NMS_THRESHOLD = 0.5 30 | TRAIN_SIZE = 122 31 | alpha = 0.1 32 | EVAL_FREQUENCY = 100 33 | TRAIN_IMG_DIR = '/home/yy/train/' 34 | TRAIN_LABEL_DIR = '/home/yy/labels/' 35 | CLASSES_NAME = ["DaLai","NonDaLai"] 36 | TEST_IMG_PATH = '/home/yy/109.jpg' 37 | RES_DIR = '/home/yy/pred_decay1W/' 38 | SAVE_MODEL = '/home/yy/tf_saver_models/model_conv5_epoch100_decay10000.ckpt' 39 | SAVE_TENSORBOARD = '/home/yy/tensorboard' 40 | TEST_MODEL = '/home/yy/tf_saver_models/model_conv5_epoch100_decay10000.ckpt' 41 | 42 | 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS, 16], stddev=0.1, seed=SEED, dtype=tf.float32)) 44 | conv1_biases = tf.Variable(tf.zeros([16], dtype=tf.float32)) 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 16, 32], stddev=0.1, seed=SEED, dtype=tf.float32)) 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[32], dtype=tf.float32)) 47 | conv3_weights = tf.Variable(tf.truncated_normal([3, 3, 32, 64], stddev=0.1,seed=SEED, dtype=tf.float32)) 48 | conv3_biases = tf.Variable(tf.zeros([64], dtype=tf.float32)) 49 | conv4_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.1, seed=SEED, dtype=tf.float32)) 50 | conv4_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 51 | conv5_weights = tf.Variable(tf.truncated_normal([3, 3, 128, 256], stddev=0.1, seed=SEED, dtype=tf.float32)) 52 | conv5_biases = tf.Variable(tf.zeros([256], dtype=tf.float32)) 53 | conv6_weights = tf.Variable(tf.truncated_normal([3, 3, 256, 512], stddev=0.1, seed=SEED, dtype=tf.float32)) 54 | conv6_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32)) 55 | conv7_weights = tf.Variable(tf.truncated_normal([3, 3, 512, 1024], stddev=0.1, seed=SEED, dtype=tf.float32)) 56 | conv7_biases = tf.Variable(tf.zeros([1024], dtype=tf.float32)) 57 | conv8_weights = tf.Variable(tf.truncated_normal([3, 3, 1024, 1024], stddev=0.1, seed=SEED, dtype=tf.float32)) 58 | conv8_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32)) 59 | conv9_weights = tf.Variable(tf.truncated_normal([3, 3, 1024, 1024], stddev=0.1, seed=SEED, dtype=tf.float32)) 60 | conv9_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32)) 61 | conv10_weights = tf.Variable(tf.truncated_normal([3, 3, 1024, 1024], stddev=0.1, seed=SEED, dtype=tf.float32)) 62 | conv10_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32)) 63 | 64 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //4096 * 1024, 4096], stddev=0.1, seed=SEED, dtype=tf.float32)) 65 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[4096], dtype=tf.float32)) 66 | fc2_weights = tf.Variable(tf.truncated_normal([4096, 4096], stddev=0.1, seed=SEED, dtype=tf.float32)) 67 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[4096], dtype=tf.float32)) 68 | fc3_weights = tf.Variable(tf.truncated_normal([4096, S*S*(B*5+CLASSES)], stddev=0.1, seed=SEED, dtype=tf.float32)) 69 | fc3_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5+CLASSES)], dtype=tf.float32)) 70 | 71 | def model(data, train=False): 72 | conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') 73 | conv_bias = tf.nn.bias_add(conv, conv1_biases) 74 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 75 | 76 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 77 | 78 | conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') 79 | conv_bias = tf.nn.bias_add(conv, conv2_biases) 80 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 81 | 82 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 83 | 84 | conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') 85 | conv_bias = tf.nn.bias_add(conv, conv3_biases) 86 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 87 | 88 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 89 | 90 | conv = tf.nn.conv2d(pool, conv4_weights, strides=[1, 1, 1, 1], padding='SAME') 91 | conv_bias = tf.nn.bias_add(conv, conv4_biases) 92 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 93 | 94 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 95 | 96 | conv = tf.nn.conv2d(pool, conv5_weights, strides=[1, 1, 1, 1], padding='SAME') 97 | conv_bias = tf.nn.bias_add(conv, conv5_biases) 98 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 99 | 100 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 101 | 102 | conv = tf.nn.conv2d(pool, conv6_weights, strides=[1, 1, 1, 1], padding='SAME') 103 | conv_bias = tf.nn.bias_add(conv, conv6_biases) 104 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 105 | 106 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 107 | 108 | conv = tf.nn.conv2d(pool, conv7_weights, strides=[1, 1, 1, 1], padding='SAME') 109 | conv_bias = tf.nn.bias_add(conv, conv7_biases) 110 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 111 | 112 | 113 | conv = tf.nn.conv2d(lrelu, conv8_weights, strides=[1, 1, 1, 1], padding='SAME') 114 | conv_bias = tf.nn.bias_add(conv, conv8_biases) 115 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 116 | 117 | conv = tf.nn.conv2d(lrelu, conv9_weights, strides=[1, 1, 1, 1], padding='SAME') 118 | conv_bias = tf.nn.bias_add(conv, conv9_biases) 119 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 120 | 121 | conv = tf.nn.conv2d(lrelu, conv10_weights, strides=[1, 1, 1, 1], padding='SAME') 122 | conv_bias = tf.nn.bias_add(conv, conv10_biases) 123 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 124 | 125 | fc1_shape = lrelu.get_shape().as_list() 126 | reshape = tf.reshape(lrelu, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]]) 127 | 128 | fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases 129 | fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden) 130 | 131 | fc2_hidden = tf.matmul(fc1_out, fc2_weights) + fc2_biases 132 | fc2_out = tf.maximum(alpha*fc2_hidden, fc2_hidden) 133 | 134 | return tf.matmul(fc2_out, fc3_weights) + fc3_biases 135 | 136 | def nms(dets, thresh): 137 | """Non maximum suppression""" 138 | x1 = dets[:, 0] 139 | y1 = dets[:, 1] 140 | x2 = dets[:, 2] 141 | y2 = dets[:, 3] 142 | scores = dets[:, 4] 143 | 144 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 145 | order = scores.argsort()[::-1] 146 | 147 | keep = [] 148 | while order.size > 0: 149 | i = order[0] 150 | keep.append(i) 151 | xx1 = numpy.maximum(x1[i], x1[order[1:]]) 152 | yy1 = numpy.maximum(y1[i], y1[order[1:]]) 153 | xx2 = numpy.minimum(x2[i], x2[order[1:]]) 154 | yy2 = numpy.minimum(y2[i], y2[order[1:]]) 155 | 156 | w = numpy.maximum(0.0, xx2 - xx1 + 1) 157 | h = numpy.maximum(0.0, yy2 - yy1 + 1) 158 | inter = w * h 159 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 160 | 161 | inds = numpy.where(ovr <= thresh)[0] 162 | order = order[inds + 1] 163 | 164 | return keep 165 | 166 | def get_results(output): 167 | results = [] 168 | classes = [] 169 | probs = numpy.ndarray(shape=[CLASSES,]) 170 | for p in range(B): 171 | for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES): 172 | for i in range(CLASSES): 173 | probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i] 174 | 175 | cls_ind = probs.argsort()[::-1][0] 176 | if probs[cls_ind] > PROB_THRESHOLD: 177 | results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]]) 178 | classes.append(cls_ind) 179 | 180 | res = numpy.array(results).astype(numpy.float32) 181 | if len(res) != 0: 182 | keep = nms(res, NMS_THRESHOLD) 183 | results_ = [] 184 | classes_ = [] 185 | for i in keep: 186 | results_.append(results[i]) 187 | classes_.append(classes[i]) 188 | 189 | return results_,classes_ 190 | else: 191 | return [],[] 192 | 193 | def show_results(img_path, results, classes): 194 | img = cv2.imread(img_path).copy() 195 | if len(results) != 0: 196 | for i in range(len(results)): 197 | x1 = int(results[i][0]*img.shape[1]) 198 | y1 = int(results[i][1]*img.shape[0]) 199 | x2 = int(results[i][2]*img.shape[1]) 200 | y2 = int(results[i][3]*img.shape[0]) 201 | score = results[i][4] 202 | cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2) 203 | cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2) 204 | 205 | cv2.imwrite(RES_DIR + img_path.split('/')[-1], img) 206 | 207 | def get_next_minibatch(offset, path_list): 208 | if offset+BATCH_SIZE > len(path_list): 209 | # random.shuffle(path_list) 210 | return path_list[:BATCH_SIZE] 211 | else: 212 | return path_list[offset:offset+BATCH_SIZE] 213 | 214 | def extract_data_yolo(path_list, train=True): 215 | if train: 216 | data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2),dtype=numpy.float32) 217 | 218 | for i in range(len(path_list)): 219 | for j in range(IMAGE_SIZE): 220 | data[i,j,:,-2] = j/IMAGE_SIZE 221 | 222 | for i in range(len(path_list)): 223 | for j in range(IMAGE_SIZE): 224 | data[i,:,j,-1] = j/IMAGE_SIZE 225 | 226 | for i in range(len(path_list)): 227 | img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg') 228 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 229 | data[i,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 230 | data[i,:,:,:-2] = (data[i,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH 231 | 232 | return data 233 | else: 234 | data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2), dtype=numpy.float32) 235 | 236 | for i in range(IMAGE_SIZE): 237 | data[0,i,:,-2] = i/IMAGE_SIZE 238 | 239 | for i in range(IMAGE_SIZE): 240 | data[0,:,i,-1] = i/IMAGE_SIZE 241 | 242 | img = Image.open(path_list) 243 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 244 | data[0,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 245 | data[0,:,:,:-2] = (data[0,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH 246 | return data 247 | 248 | def iou(box1,box2): 249 | tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2]) 250 | lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3]) 251 | if tb < 0 or lr < 0 : intersection = 0 252 | else : intersection = tb*lr 253 | return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection) 254 | 255 | 256 | def extract_labels_yolo(path_list): 257 | labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32) 258 | for i in range(labels.shape[0]): 259 | for j in range(labels.shape[1]): 260 | if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5: 261 | labels[i][j] = 1.00001 262 | else: 263 | labels[i][j] = 0 264 | for i in range(len(path_list)): 265 | with open(TRAIN_LABEL_DIR + path_list[i] + '.txt',"r") as f: 266 | lines = f.readlines() 267 | for j in range(len(lines)): 268 | data = lines[j].split() 269 | col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 270 | row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 271 | grid_no = (row_no-1)*S+col_no 272 | # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0]) 273 | labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1 274 | for k in range(B): 275 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1]) 276 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2]) 277 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3]) 278 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4]) 279 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1 280 | 281 | return labels 282 | 283 | def loss_func_yolo(output, label): 284 | res = 0 285 | 286 | for i in range(BATCH_SIZE): 287 | for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES): 288 | res += COORD_W * tf.sign(label[i][j+2]) * (tf.square(output[i][j] - label[i][j]) + tf.square(output[i][j+1]-label[i][j+1]) + 289 | tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 290 | tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1)) 291 | 292 | res += tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4])) 293 | 294 | res += NOOBJ_W * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4])) 295 | 296 | res += COORD_W * tf.sign(label[i][j+7]) * (tf.square(output[i][j+5] - label[i][j+5]) + tf.square(output[i][j+6]-label[i][j+6]) + 297 | tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 298 | tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1)) 299 | 300 | res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9])) 301 | 302 | res += NOOBJ_W * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9])) 303 | 304 | res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11])) 305 | 306 | return res 307 | 308 | def test(img): 309 | with tf.Session() as sess: 310 | tf.global_variables_initializer().run() 311 | saver = tf.train.Saver() 312 | saver.restore(sess, TEST_MODEL) 313 | data = extract_data_yolo(img, False) 314 | out = sess.run(model(data)) 315 | results,classes = get_results(out) 316 | show_results(img, results, classes) 317 | 318 | def main(argv=None): 319 | num_epochs = NUM_EPOCHS 320 | train_img_list = [] 321 | for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR): 322 | for filename in filenames: 323 | train_img_list.append(filename[:-4]) 324 | 325 | train_data_node = tf.placeholder( 326 | tf.float32, 327 | shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS+2)) 328 | train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES))) 329 | 330 | logits = model(train_data_node, True) 331 | loss = loss_func_yolo(logits, train_labels_node) 332 | 333 | regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + 334 | tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) 335 | 336 | loss += 5e-4 * regularizers 337 | 338 | batch = tf.Variable(0, dtype=tf.float32) 339 | 340 | learning_rate = tf.train.exponential_decay( 341 | 0.01, 342 | batch * BATCH_SIZE, 343 | 10000, 344 | 0.95, 345 | staircase=True) 346 | 347 | op_func = tf.train.MomentumOptimizer(learning_rate,0.9) 348 | 349 | tvars = tf.trainable_variables() 350 | grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 0.5) 351 | optimizer = op_func.apply_gradients(zip(grads, tvars), global_step=batch) 352 | 353 | tf.summary.scalar("loss", loss) 354 | tf.summary.scalar("lr", learning_rate) 355 | merged_summary = tf.summary.merge_all() 356 | with tf.Session() as sess: 357 | 358 | tf.global_variables_initializer().run() 359 | saver = tf.train.Saver() 360 | print('Initialized!') 361 | writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph) 362 | 363 | for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE): 364 | offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE) 365 | batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list)) 366 | batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list)) 367 | 368 | feed_dict = {train_data_node: batch_data, 369 | train_labels_node: batch_labels} 370 | 371 | _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict) 372 | 373 | if step % EVAL_FREQUENCY == 0: 374 | print('loss: %.6f' % los) 375 | #print('weight: %.5f' % sess.run(conv1_weights)[0,0,0,0]) 376 | writer.add_summary(summary, step) 377 | save_path = saver.save(sess, SAVE_MODEL) 378 | 379 | def parse_args(): 380 | parser = argparse.ArgumentParser(description='YOLO demo') 381 | parser.add_argument('--train', help='train the model', action='store_true') 382 | parser.add_argument('--test', help='test the model', action='store_true') 383 | parser.add_argument('--test_img_path', help='img path to test', type=str) 384 | 385 | args = parser.parse_args() 386 | 387 | return args 388 | if __name__ == '__main__': 389 | args = parse_args() 390 | if args.train and args.test: 391 | print('Error: cannot train and test at the same time') 392 | elif args.train: 393 | tf.app.run() 394 | else: 395 | test(args.test_img_path) 396 | -------------------------------------------------------------------------------- /old_versions/very_tiny_yolo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import gzip 7 | import os 8 | import sys 9 | import time 10 | import cv2 11 | import numpy 12 | from PIL import Image 13 | from six.moves import urllib 14 | from six.moves import xrange 15 | import tensorflow as tf 16 | 17 | IMAGE_SIZE = 224 18 | NUM_CHANNELS = 3 19 | PIXEL_DEPTH = 255 20 | SEED = 66478 21 | BATCH_SIZE = 1 22 | NUM_EPOCHS = 20 23 | S = 7 24 | B = 2 25 | CLASSES = 2 26 | COORD_W = 5 27 | NOOBJ_W = 0.5 28 | PROB_THRESHOLD = 0.5 29 | NMS_THRESHOLD = 0.5 30 | TRAIN_SIZE = 122 31 | alpha = 0.1 32 | EVAL_FREQUENCY = 100 33 | TRAIN_IMG_DIR = '/home/yy/train/' 34 | TRAIN_LABEL_DIR = '/home/yy/labels/' 35 | CLASSES_NAME = ["DaLai","NonDaLai"] 36 | TEST_IMG_PATH = '/home/yy/109.jpg' 37 | RES_DIR = '/home/yy/pred_shuffle/' 38 | SAVE_MODEL = '/home/yy/tf_saver_models/model_newls.ckpt' 39 | SAVE_TENSORBOARD = '/home/yy/tensorboard' 40 | TEST_MODEL = '/home/yy/tf_saver_models/model_newls.ckpt' 41 | 42 | 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS, 64], stddev=0.01, seed=SEED, dtype=tf.float32)) 44 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=tf.float32)) 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 47 | 48 | 49 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //16 * 128, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 50 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 51 | fc2_weights = tf.Variable(tf.truncated_normal([128, S*S*(B*5+CLASSES)], stddev=0.01, seed=SEED, dtype=tf.float32)) 52 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5+CLASSES)], dtype=tf.float32)) 53 | 54 | def model(data, train=False): 55 | conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') 56 | conv_bias = tf.nn.bias_add(conv, conv1_biases) 57 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 58 | 59 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 60 | 61 | conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') 62 | conv_bias = tf.nn.bias_add(conv, conv2_biases) 63 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 64 | 65 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 66 | 67 | fc1_shape = pool.get_shape().as_list() 68 | reshape = tf.reshape(pool, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]]) 69 | 70 | fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases 71 | fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden) 72 | 73 | return tf.matmul(fc1_out, fc2_weights) + fc2_biases 74 | 75 | def nms(dets, thresh): 76 | """Non maximum suppression""" 77 | x1 = dets[:, 0] 78 | y1 = dets[:, 1] 79 | x2 = dets[:, 2] 80 | y2 = dets[:, 3] 81 | scores = dets[:, 4] 82 | 83 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 84 | order = scores.argsort()[::-1] 85 | 86 | keep = [] 87 | while order.size > 0: 88 | i = order[0] 89 | keep.append(i) 90 | xx1 = numpy.maximum(x1[i], x1[order[1:]]) 91 | yy1 = numpy.maximum(y1[i], y1[order[1:]]) 92 | xx2 = numpy.minimum(x2[i], x2[order[1:]]) 93 | yy2 = numpy.minimum(y2[i], y2[order[1:]]) 94 | 95 | w = numpy.maximum(0.0, xx2 - xx1 + 1) 96 | h = numpy.maximum(0.0, yy2 - yy1 + 1) 97 | inter = w * h 98 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 99 | 100 | inds = numpy.where(ovr <= thresh)[0] 101 | order = order[inds + 1] 102 | 103 | return keep 104 | 105 | def get_results(output): 106 | results = [] 107 | classes = [] 108 | probs = numpy.ndarray(shape=[CLASSES,]) 109 | for p in range(B): 110 | for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES): 111 | for i in range(CLASSES): 112 | probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i] 113 | 114 | cls_ind = probs.argsort()[::-1][0] 115 | if probs[cls_ind] > PROB_THRESHOLD: 116 | results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]]) 117 | classes.append(cls_ind) 118 | 119 | res = numpy.array(results).astype(numpy.float32) 120 | if len(res) != 0: 121 | keep = nms(res, NMS_THRESHOLD) 122 | results_ = [] 123 | classes_ = [] 124 | for i in keep: 125 | results_.append(results[i]) 126 | classes_.append(classes[i]) 127 | 128 | return results_,classes_ 129 | else: 130 | return [],[] 131 | 132 | def show_results(img_path, results, classes): 133 | img = cv2.imread(img_path).copy() 134 | if len(results) != 0: 135 | for i in range(len(results)): 136 | x1 = int(results[i][0]*img.shape[1]) 137 | y1 = int(results[i][1]*img.shape[0]) 138 | x2 = int(results[i][2]*img.shape[1]) 139 | y2 = int(results[i][3]*img.shape[0]) 140 | score = results[i][4] 141 | cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2) 142 | cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1) 143 | 144 | cv2.imwrite(RES_DIR + img_path.split('/')[-1], img) 145 | 146 | def get_next_minibatch(offset, path_list): 147 | if offset+BATCH_SIZE > len(path_list): 148 | # random.shuffle(path_list) 149 | return path_list[:BATCH_SIZE] 150 | else: 151 | return path_list[offset:offset+BATCH_SIZE] 152 | 153 | def extract_data_yolo(path_list, train=True): 154 | if train: 155 | data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS),dtype=numpy.float32) 156 | 157 | for i in range(len(path_list)): 158 | img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg') 159 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 160 | data[i] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 161 | 162 | data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH 163 | return data 164 | else: 165 | data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS), dtype=numpy.float32) 166 | img = Image.open(path_list) 167 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 168 | data = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 169 | data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH 170 | return data 171 | 172 | def iou(box1,box2): 173 | tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2]) 174 | lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3]) 175 | if tb < 0 or lr < 0 : intersection = 0 176 | else : intersection = tb*lr 177 | return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection) 178 | 179 | 180 | def extract_labels_yolo(path_list): 181 | labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32) 182 | for i in range(labels.shape[0]): 183 | for j in range(labels.shape[1]): 184 | if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5: 185 | labels[i][j] = 1.00001 186 | else: 187 | labels[i][j] = 0 188 | for i in range(len(path_list)): 189 | with open(TRAIN_LABEL_DIR + path_list[i] + '.txt',"r") as f: 190 | lines = f.readlines() 191 | for j in range(len(lines)): 192 | data = lines[j].split() 193 | col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 194 | row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 195 | grid_no = (row_no-1)*S+col_no 196 | # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0]) 197 | labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1 198 | for k in range(B): 199 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1]) 200 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2]) 201 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3]) 202 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4]) 203 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1 204 | 205 | return labels 206 | 207 | def loss_func_yolo(output, exp): 208 | res = 0 209 | 210 | for i in range(BATCH_SIZE): 211 | for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES): 212 | res += COORD_W * tf.sign(exp[i][j+2]) * (tf.square(output[i][j] - exp[i][j]) + tf.square(output[i][j+1]-exp[i][j+1]) + 213 | tf.square(tf.sqrt(tf.abs(output[i][j+2])) - tf.sqrt(exp[i][j+2])) + 214 | tf.square(tf.sqrt(tf.abs(output[i][j+3])) - tf.sqrt(exp[i][j+3]))) 215 | 216 | res += tf.sign(exp[i][j+2]) * (tf.square(output[i][j+4] - exp[i][j+4])) 217 | 218 | res += NOOBJ_W * tf.sign(tf.floor(exp[i][j])) * (tf.square(output[i][j+4] - exp[i][j+4])) 219 | 220 | res += COORD_W * tf.sign(exp[i][j+7]) * (tf.square(output[i][j+5] - exp[i][j+5]) + tf.square(output[i][j+6]-exp[i][j+6]) + 221 | tf.square(tf.sqrt(tf.abs(output[i][j+7])) - tf.sqrt(exp[i][j+7])) + 222 | tf.square(tf.sqrt(tf.abs(output[i][j+8])) - tf.sqrt(exp[i][j+8]))) 223 | 224 | res += tf.sign(exp[i][j+7]) * (tf.square(output[i][j+9] - exp[i][j+9])) 225 | 226 | res += NOOBJ_W * tf.sign(tf.floor(exp[i][j+5])) * (tf.square(output[i][j+9] - exp[i][j+9])) 227 | 228 | res += tf.sign(exp[i][j+7]) * (tf.square(output[i][j+10] - exp[i][j+10]) + tf.square(output[i][j+11] - exp[i][j+11])) 229 | 230 | return res 231 | 232 | def test(img): 233 | with tf.Session() as sess: 234 | tf.global_variables_initializer().run() 235 | saver = tf.train.Saver() 236 | saver.restore(sess, TEST_MODEL) 237 | data = extract_data_yolo(img, False) 238 | out = sess.run(model(data)) 239 | results,classes = get_results(out) 240 | show_results(img, results, classes) 241 | 242 | def main(argv=None): 243 | num_epochs = NUM_EPOCHS 244 | train_img_list = [] 245 | for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR): 246 | for filename in filenames: 247 | train_img_list.append(filename[:-4]) 248 | 249 | train_data_node = tf.placeholder( 250 | tf.float32, 251 | shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) 252 | train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES))) 253 | 254 | logits = model(train_data_node, True) 255 | loss = loss_func_yolo(logits, train_labels_node) 256 | 257 | regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + 258 | tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) 259 | 260 | loss += 5e-4 * regularizers 261 | 262 | batch = tf.Variable(0, dtype=tf.float32) 263 | 264 | learning_rate = tf.train.exponential_decay( 265 | 0.01, 266 | batch * BATCH_SIZE, 267 | TRAIN_SIZE, 268 | 0.95, 269 | staircase=True) 270 | 271 | op_func = tf.train.MomentumOptimizer(learning_rate,0.9) 272 | 273 | tvars = tf.trainable_variables() 274 | grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 0.5) 275 | optimizer = op_func.apply_gradients(zip(grads, tvars), global_step=batch) 276 | 277 | tf.summary.scalar("loss", loss) 278 | tf.summary.scalar("lr", learning_rate) 279 | merged_summary = tf.summary.merge_all() 280 | with tf.Session() as sess: 281 | 282 | tf.global_variables_initializer().run() 283 | saver = tf.train.Saver() 284 | print('Initialized!') 285 | writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph) 286 | 287 | for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE): 288 | offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE) 289 | batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list)) 290 | batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list)) 291 | 292 | feed_dict = {train_data_node: batch_data, 293 | train_labels_node: batch_labels} 294 | 295 | _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict) 296 | 297 | if step % EVAL_FREQUENCY == 0: 298 | print('loss: %.6f' % los) 299 | writer.add_summary(summary, step) 300 | save_path = saver.save(sess, SAVE_MODEL) 301 | 302 | def parse_args(): 303 | parser = argparse.ArgumentParser(description='YOLO demo') 304 | parser.add_argument('--train', help='train the model', action='store_true') 305 | parser.add_argument('--test', help='test the model', action='store_true') 306 | parser.add_argument('--test_img_path', help='img path to test', type=str) 307 | 308 | args = parser.parse_args() 309 | 310 | return args 311 | if __name__ == '__main__': 312 | args = parse_args() 313 | if args.train and args.test: 314 | print('Error: cannot train and test at the same time') 315 | elif args.train: 316 | tf.app.run() 317 | else: 318 | test(args.test_img_path) 319 | -------------------------------------------------------------------------------- /old_versions/very_tiny_yolov2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import gzip 7 | import os 8 | import sys 9 | import time 10 | import cv2 11 | import numpy 12 | from PIL import Image 13 | from six.moves import urllib 14 | from six.moves import xrange 15 | import tensorflow as tf 16 | 17 | IMAGE_SIZE = 224 18 | NUM_CHANNELS = 3 19 | PIXEL_DEPTH = 255 20 | SEED = 66478 21 | BATCH_SIZE = 1 22 | NUM_EPOCHS = 100 23 | S = 7 24 | B = 2 25 | CLASSES = 2 26 | COORD_W = 5 27 | NOOBJ_W = 0.5 28 | PROB_THRESHOLD = 0.25 29 | NMS_THRESHOLD = 0.5 30 | TRAIN_SIZE = 122 31 | alpha = 0.1 32 | EVAL_FREQUENCY = 100 33 | TRAIN_IMG_DIR = '/home/yy/train/' 34 | TRAIN_LABEL_DIR = '/home/yy/labels/' 35 | CLASSES_NAME = ["DaLai","NonDaLai"] 36 | TEST_IMG_PATH = '/home/yy/109.jpg' 37 | RES_DIR = '/home/yy/pred_decay1W/' 38 | SAVE_MODEL = '/home/yy/tf_saver_models/model_conv5_epoch100_decay10000.ckpt' 39 | SAVE_TENSORBOARD = '/home/yy/tensorboard' 40 | TEST_MODEL = '/home/yy/tf_saver_models/model_conv5_epoch100_decay10000.ckpt' 41 | 42 | 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS+2, 64], stddev=0.01, seed=SEED, dtype=tf.float32)) 44 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=tf.float32)) 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 47 | conv3_weights = tf.Variable(tf.truncated_normal([3, 3, 128, 256], stddev=0.01, seed=SEED, dtype=tf.float32)) 48 | conv3_biases = tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32)) 49 | conv4_weights = tf.Variable(tf.truncated_normal([3, 3, 256, 512], stddev=0.01, seed=SEED, dtype=tf.float32)) 50 | conv4_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32)) 51 | conv5_weights = tf.Variable(tf.truncated_normal([3, 3, 512, 1024], stddev=0.01, seed=SEED, dtype=tf.float32)) 52 | conv5_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32)) 53 | 54 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //1024 * 1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 55 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 56 | fc2_weights = tf.Variable(tf.truncated_normal([128, S*S*(B*5+CLASSES)], stddev=0.01, seed=SEED, dtype=tf.float32)) 57 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5+CLASSES)], dtype=tf.float32)) 58 | 59 | def model(data, train=False): 60 | conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') 61 | conv_bias = tf.nn.bias_add(conv, conv1_biases) 62 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 63 | 64 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 65 | 66 | conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') 67 | conv_bias = tf.nn.bias_add(conv, conv2_biases) 68 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 69 | 70 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 71 | 72 | conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') 73 | conv_bias = tf.nn.bias_add(conv, conv3_biases) 74 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 75 | 76 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 77 | 78 | conv = tf.nn.conv2d(pool, conv4_weights, strides=[1, 1, 1, 1], padding='SAME') 79 | conv_bias = tf.nn.bias_add(conv, conv4_biases) 80 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 81 | 82 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 83 | 84 | conv = tf.nn.conv2d(pool, conv5_weights, strides=[1, 1, 1, 1], padding='SAME') 85 | conv_bias = tf.nn.bias_add(conv, conv5_biases) 86 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 87 | 88 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 89 | 90 | fc1_shape = pool.get_shape().as_list() 91 | reshape = tf.reshape(pool, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]]) 92 | 93 | fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases 94 | fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden) 95 | 96 | return tf.matmul(fc1_out, fc2_weights) + fc2_biases 97 | 98 | def nms(dets, thresh): 99 | """Non maximum suppression""" 100 | x1 = dets[:, 0] 101 | y1 = dets[:, 1] 102 | x2 = dets[:, 2] 103 | y2 = dets[:, 3] 104 | scores = dets[:, 4] 105 | 106 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 107 | order = scores.argsort()[::-1] 108 | 109 | keep = [] 110 | while order.size > 0: 111 | i = order[0] 112 | keep.append(i) 113 | xx1 = numpy.maximum(x1[i], x1[order[1:]]) 114 | yy1 = numpy.maximum(y1[i], y1[order[1:]]) 115 | xx2 = numpy.minimum(x2[i], x2[order[1:]]) 116 | yy2 = numpy.minimum(y2[i], y2[order[1:]]) 117 | 118 | w = numpy.maximum(0.0, xx2 - xx1 + 1) 119 | h = numpy.maximum(0.0, yy2 - yy1 + 1) 120 | inter = w * h 121 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 122 | 123 | inds = numpy.where(ovr <= thresh)[0] 124 | order = order[inds + 1] 125 | 126 | return keep 127 | 128 | def get_results(output): 129 | results = [] 130 | classes = [] 131 | probs = numpy.ndarray(shape=[CLASSES,]) 132 | for p in range(B): 133 | for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES): 134 | for i in range(CLASSES): 135 | probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i] 136 | 137 | cls_ind = probs.argsort()[::-1][0] 138 | if probs[cls_ind] > PROB_THRESHOLD: 139 | results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]]) 140 | classes.append(cls_ind) 141 | 142 | res = numpy.array(results).astype(numpy.float32) 143 | if len(res) != 0: 144 | keep = nms(res, NMS_THRESHOLD) 145 | results_ = [] 146 | classes_ = [] 147 | for i in keep: 148 | results_.append(results[i]) 149 | classes_.append(classes[i]) 150 | 151 | return results_,classes_ 152 | else: 153 | return [],[] 154 | 155 | def show_results(img_path, results, classes): 156 | img = cv2.imread(img_path).copy() 157 | if len(results) != 0: 158 | for i in range(len(results)): 159 | x1 = int(results[i][0]*img.shape[1]) 160 | y1 = int(results[i][1]*img.shape[0]) 161 | x2 = int(results[i][2]*img.shape[1]) 162 | y2 = int(results[i][3]*img.shape[0]) 163 | score = results[i][4] 164 | cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2) 165 | cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2) 166 | 167 | cv2.imwrite(RES_DIR + img_path.split('/')[-1], img) 168 | 169 | def get_next_minibatch(offset, path_list): 170 | if offset+BATCH_SIZE > len(path_list): 171 | # random.shuffle(path_list) 172 | return path_list[:BATCH_SIZE] 173 | else: 174 | return path_list[offset:offset+BATCH_SIZE] 175 | 176 | def extract_data_yolo(path_list, train=True): 177 | if train: 178 | data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2),dtype=numpy.float32) 179 | 180 | for i in range(len(path_list)): 181 | for j in range(IMAGE_SIZE): 182 | data[i,j,:,-2] = j/IMAGE_SIZE 183 | 184 | for i in range(len(path_list)): 185 | for j in range(IMAGE_SIZE): 186 | data[i,:,j,-1] = j/IMAGE_SIZE 187 | 188 | for i in range(len(path_list)): 189 | img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg') 190 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 191 | data[i,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 192 | data[i,:,:,:-2] = (data[i,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH 193 | 194 | return data 195 | else: 196 | data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2), dtype=numpy.float32) 197 | 198 | for i in range(IMAGE_SIZE): 199 | data[0,i,:,-2] = i/IMAGE_SIZE 200 | 201 | for i in range(IMAGE_SIZE): 202 | data[0,:,i,-1] = i/IMAGE_SIZE 203 | 204 | img = Image.open(path_list) 205 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 206 | data[0,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 207 | data[0,:,:,:-2] = (data[0,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH 208 | return data 209 | 210 | def iou(box1,box2): 211 | tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2]) 212 | lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3]) 213 | if tb < 0 or lr < 0 : intersection = 0 214 | else : intersection = tb*lr 215 | return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection) 216 | 217 | 218 | def extract_labels_yolo(path_list): 219 | labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32) 220 | for i in range(labels.shape[0]): 221 | for j in range(labels.shape[1]): 222 | if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5: 223 | labels[i][j] = 1.00001 224 | else: 225 | labels[i][j] = 0 226 | for i in range(len(path_list)): 227 | with open(TRAIN_LABEL_DIR + path_list[i] + '.txt',"r") as f: 228 | lines = f.readlines() 229 | for j in range(len(lines)): 230 | data = lines[j].split() 231 | col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 232 | row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 233 | grid_no = (row_no-1)*S+col_no 234 | # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0]) 235 | labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1 236 | for k in range(B): 237 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1]) 238 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2]) 239 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3]) 240 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4]) 241 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1 242 | 243 | return labels 244 | 245 | def loss_func_yolo(output, label): 246 | res = 0 247 | 248 | for i in range(BATCH_SIZE): 249 | for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES): 250 | res += COORD_W * tf.sign(label[i][j+2]) * (tf.square(output[i][j] - label[i][j]) + tf.square(output[i][j+1]-label[i][j+1]) + 251 | tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 252 | tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1)) 253 | 254 | res += tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4])) 255 | 256 | res += NOOBJ_W * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4])) 257 | 258 | res += COORD_W * tf.sign(label[i][j+7]) * (tf.square(output[i][j+5] - label[i][j+5]) + tf.square(output[i][j+6]-label[i][j+6]) + 259 | tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 260 | tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1)) 261 | 262 | res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9])) 263 | 264 | res += NOOBJ_W * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9])) 265 | 266 | res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11])) 267 | 268 | return res 269 | 270 | def test(img): 271 | with tf.Session() as sess: 272 | tf.global_variables_initializer().run() 273 | saver = tf.train.Saver() 274 | saver.restore(sess, TEST_MODEL) 275 | data = extract_data_yolo(img, False) 276 | out = sess.run(model(data)) 277 | results,classes = get_results(out) 278 | show_results(img, results, classes) 279 | 280 | def main(argv=None): 281 | num_epochs = NUM_EPOCHS 282 | train_img_list = [] 283 | for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR): 284 | for filename in filenames: 285 | train_img_list.append(filename[:-4]) 286 | 287 | train_data_node = tf.placeholder( 288 | tf.float32, 289 | shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS+2)) 290 | train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES))) 291 | 292 | logits = model(train_data_node, True) 293 | loss = loss_func_yolo(logits, train_labels_node) 294 | 295 | regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + 296 | tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) 297 | 298 | loss += 5e-4 * regularizers 299 | 300 | batch = tf.Variable(0, dtype=tf.float32) 301 | 302 | learning_rate = tf.train.exponential_decay( 303 | 0.01, 304 | batch * BATCH_SIZE, 305 | 10000, 306 | 0.95, 307 | staircase=True) 308 | 309 | op_func = tf.train.MomentumOptimizer(learning_rate,0.9) 310 | 311 | tvars = tf.trainable_variables() 312 | grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 0.5) 313 | optimizer = op_func.apply_gradients(zip(grads, tvars), global_step=batch) 314 | 315 | tf.summary.scalar("loss", loss) 316 | tf.summary.scalar("lr", learning_rate) 317 | merged_summary = tf.summary.merge_all() 318 | with tf.Session() as sess: 319 | 320 | tf.global_variables_initializer().run() 321 | saver = tf.train.Saver() 322 | print('Initialized!') 323 | writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph) 324 | 325 | for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE): 326 | offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE) 327 | batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list)) 328 | batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list)) 329 | 330 | feed_dict = {train_data_node: batch_data, 331 | train_labels_node: batch_labels} 332 | 333 | _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict) 334 | 335 | if step % EVAL_FREQUENCY == 0: 336 | print('loss: %.6f' % los) 337 | #print('weight: %.5f' % sess.run(conv1_weights)[0,0,0,0]) 338 | writer.add_summary(summary, step) 339 | save_path = saver.save(sess, SAVE_MODEL) 340 | 341 | def parse_args(): 342 | parser = argparse.ArgumentParser(description='YOLO demo') 343 | parser.add_argument('--train', help='train the model', action='store_true') 344 | parser.add_argument('--test', help='test the model', action='store_true') 345 | parser.add_argument('--test_img_path', help='img path to test', type=str) 346 | 347 | args = parser.parse_args() 348 | 349 | return args 350 | if __name__ == '__main__': 351 | args = parse_args() 352 | if args.train and args.test: 353 | print('Error: cannot train and test at the same time') 354 | elif args.train: 355 | tf.app.run() 356 | else: 357 | test(args.test_img_path) 358 | -------------------------------------------------------------------------------- /old_versions/very_tiny_yolov3.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import gzip 7 | import os 8 | import sys 9 | import time 10 | import cv2 11 | import numpy 12 | from PIL import Image 13 | from six.moves import urllib 14 | from six.moves import xrange 15 | import tensorflow as tf 16 | 17 | IMAGE_SIZE = 224 18 | NUM_CHANNELS = 3 19 | PIXEL_DEPTH = 255 20 | SEED = 66478 21 | BATCH_SIZE = 1 22 | NUM_EPOCHS = 100 23 | S = 7 24 | B = 2 25 | CLASSES = 2 26 | COORD_W = 5 27 | NOOBJ_W = 0.5 28 | PROB_THRESHOLD = 0.25 29 | NMS_THRESHOLD = 0.5 30 | TRAIN_SIZE = 122 31 | alpha = 0.1 32 | EVAL_FREQUENCY = 100 33 | TRAIN_IMG_DIR = '/home/yy/train/' 34 | TRAIN_LABEL_DIR = '/home/yy/labels/' 35 | CLASSES_NAME = ["DaLai","NonDaLai"] 36 | TEST_IMG_DIR = '/home/yy/train/' 37 | TEST_LABEL_DIR = 'home/yy/labels/' 38 | RES_DIR = '/home/yy/subnets/' 39 | SAVE_MODEL = '/home/yy/tf_saver_models/model_subnets.ckpt' 40 | SAVE_TENSORBOARD = '/home/yy/tensorboard' 41 | 42 | 43 | conv1_weights = tf.Variable(tf.truncated_normal([3, 3, NUM_CHANNELS+2, 64], stddev=0.01, seed=SEED, dtype=tf.float32)) 44 | conv1_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=tf.float32)) 45 | conv2_weights = tf.Variable(tf.truncated_normal([3, 3, 64, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 46 | conv2_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 47 | conv3_weights = tf.Variable(tf.truncated_normal([3, 3, 128, 256], stddev=0.01, seed=SEED, dtype=tf.float32)) 48 | conv3_biases = tf.Variable(tf.constant(0.1, shape=[256], dtype=tf.float32)) 49 | conv4_weights = tf.Variable(tf.truncated_normal([3, 3, 256, 512], stddev=0.01, seed=SEED, dtype=tf.float32)) 50 | conv4_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32)) 51 | conv5_weights = tf.Variable(tf.truncated_normal([3, 3, 512, 1024], stddev=0.01, seed=SEED, dtype=tf.float32)) 52 | conv5_biases = tf.Variable(tf.constant(0.1, shape=[1024], dtype=tf.float32)) 53 | 54 | fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE * IMAGE_SIZE //1024 * 1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 55 | fc1_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 56 | fc2_weights = tf.Variable(tf.truncated_normal([128, S*S*(B*5)], stddev=0.01, seed=SEED, dtype=tf.float32)) 57 | fc2_biases = tf.Variable(tf.constant(0.1, shape=[S*S*(B*5)], dtype=tf.float32)) 58 | 59 | fc3_weights = tf.Variable(tf.truncated_normal([1024, 128], stddev=0.01, seed=SEED, dtype=tf.float32)) 60 | fc3_biases = tf.Variable(tf.constant(0.1, shape=[128], dtype=tf.float32)) 61 | fc4_weights = tf.Variable(tf.truncated_normal([128, S*S*CLASSES], stddev=0.01, seed=SEED, dtype=tf.float32)) 62 | fc4_biases = tf.Variable(tf.constant(0.1, shape=[S*S*CLASSES], dtype=tf.float32)) 63 | 64 | def model(data, train=False): 65 | conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') 66 | conv_bias = tf.nn.bias_add(conv, conv1_biases) 67 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 68 | 69 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 70 | 71 | conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') 72 | conv_bias = tf.nn.bias_add(conv, conv2_biases) 73 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 74 | 75 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 76 | 77 | conv = tf.nn.conv2d(pool, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') 78 | conv_bias = tf.nn.bias_add(conv, conv3_biases) 79 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 80 | 81 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 82 | 83 | conv = tf.nn.conv2d(pool, conv4_weights, strides=[1, 1, 1, 1], padding='SAME') 84 | conv_bias = tf.nn.bias_add(conv, conv4_biases) 85 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 86 | 87 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 88 | 89 | conv = tf.nn.conv2d(pool, conv5_weights, strides=[1, 1, 1, 1], padding='SAME') 90 | conv_bias = tf.nn.bias_add(conv, conv5_biases) 91 | lrelu = tf.maximum(alpha*conv_bias, conv_bias) 92 | 93 | pool = tf.nn.max_pool(lrelu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 94 | 95 | fc1_shape = pool.get_shape().as_list() 96 | reshape = tf.reshape(pool, [fc1_shape[0], fc1_shape[1] * fc1_shape[2] * fc1_shape[3]]) 97 | 98 | fc1_hidden = tf.matmul(reshape, fc1_weights) + fc1_biases 99 | fc1_out = tf.maximum(alpha*fc1_hidden, fc1_hidden) 100 | 101 | coors = tf.matmul(fc1_out, fc2_weights) + fc2_biases 102 | 103 | pool = tf.nn.avg_pool(lrelu, ksize=[1, IMAGE_SIZE*IMAGE_SIZE/1024, IMAGE_SIZE*IMAGE_SIZE/1024, 1], strides=[1, IMAGE_SIZE*IMAGE_SIZE/1024, IMAGE_SIZE*IMAGE_SIZE/1024, 1], padding='SAME') 104 | 105 | fc3_shape = pool.get_shape().as_list() 106 | reshape = tf.reshape(pool, [fc3_shape[0], fc3_shape[1] * fc3_shape[2] * fc3_shape[3]]) 107 | 108 | fc3_hidden = tf.matmul(reshape, fc3_weights) + fc3_biases 109 | fc3_out = tf.maximum(alpha*fc3_hidden, fc3_hidden) 110 | 111 | probs = tf.matmul(fc3_out, fc4_weights) + fc4_biases 112 | output = [] 113 | 114 | for i in range(BATCH_SIZE): 115 | for j in range(S*S): 116 | for k in range(10): 117 | output.append(coors[i,j*B*5+k]) 118 | for k in range(CLASSES): 119 | output.append(probs[i,j*CLASSES+k]) 120 | output = tf.reshape(output, [BATCH_SIZE, S*S*(B*5+CLASSES)]) 121 | 122 | return output 123 | 124 | 125 | def nms(dets, thresh): 126 | """Non maximum suppression""" 127 | x1 = dets[:, 0] 128 | y1 = dets[:, 1] 129 | x2 = dets[:, 2] 130 | y2 = dets[:, 3] 131 | scores = dets[:, 4] 132 | 133 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 134 | order = scores.argsort()[::-1] 135 | 136 | keep = [] 137 | while order.size > 0: 138 | i = order[0] 139 | keep.append(i) 140 | xx1 = numpy.maximum(x1[i], x1[order[1:]]) 141 | yy1 = numpy.maximum(y1[i], y1[order[1:]]) 142 | xx2 = numpy.minimum(x2[i], x2[order[1:]]) 143 | yy2 = numpy.minimum(y2[i], y2[order[1:]]) 144 | 145 | w = numpy.maximum(0.0, xx2 - xx1 + 1) 146 | h = numpy.maximum(0.0, yy2 - yy1 + 1) 147 | inter = w * h 148 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 149 | 150 | inds = numpy.where(ovr <= thresh)[0] 151 | order = order[inds + 1] 152 | 153 | return keep 154 | 155 | def get_results(output): 156 | results = [] 157 | classes = [] 158 | probs = numpy.ndarray(shape=[CLASSES,]) 159 | for p in range(B): 160 | for j in range(4 + p*5, S*S*(B*5+CLASSES), B*5+CLASSES): 161 | for i in range(CLASSES): 162 | probs[i] = output[0][j] * output[0][j + 1+ (B-1-p)*5 + i] 163 | 164 | cls_ind = probs.argsort()[::-1][0] 165 | if probs[cls_ind] > PROB_THRESHOLD: 166 | results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]]) 167 | classes.append(cls_ind) 168 | 169 | res = numpy.array(results).astype(numpy.float32) 170 | if len(res) != 0: 171 | keep = nms(res, NMS_THRESHOLD) 172 | results_ = [] 173 | classes_ = [] 174 | for i in keep: 175 | results_.append(results[i]) 176 | classes_.append(classes[i]) 177 | 178 | return results_,classes_ 179 | else: 180 | return [],[] 181 | 182 | def show_results(img_path, results, classes): 183 | img = cv2.imread(img_path).copy() 184 | if len(results) != 0: 185 | for i in range(len(results)): 186 | x1 = int(results[i][0]*img.shape[1]) 187 | y1 = int(results[i][1]*img.shape[0]) 188 | x2 = int(results[i][2]*img.shape[1]) 189 | y2 = int(results[i][3]*img.shape[0]) 190 | score = results[i][4] 191 | cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2) 192 | cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2) 193 | 194 | cv2.imwrite(RES_DIR + img_path.split('/')[-1], img) 195 | 196 | def get_next_minibatch(offset, path_list): 197 | if offset+BATCH_SIZE > len(path_list): 198 | # random.shuffle(path_list) 199 | return path_list[:BATCH_SIZE] 200 | else: 201 | return path_list[offset:offset+BATCH_SIZE] 202 | 203 | def extract_data_yolo(path_list, train=True): 204 | if train: 205 | data = numpy.ndarray(shape=(len(path_list),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2),dtype=numpy.float32) 206 | 207 | for i in range(len(path_list)): 208 | for j in range(IMAGE_SIZE): 209 | data[i,j,:,-2] = j/IMAGE_SIZE 210 | 211 | for i in range(len(path_list)): 212 | for j in range(IMAGE_SIZE): 213 | data[i,:,j,-1] = j/IMAGE_SIZE 214 | 215 | for i in range(len(path_list)): 216 | img = Image.open(TRAIN_IMG_DIR+path_list[i]+'.jpg') 217 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 218 | data[i,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 219 | data[i,:,:,:-2] = (data[i,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH 220 | 221 | return data 222 | else: 223 | data = numpy.ndarray(shape=(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS + 2), dtype=numpy.float32) 224 | 225 | for i in range(IMAGE_SIZE): 226 | data[0,i,:,-2] = i/IMAGE_SIZE 227 | 228 | for i in range(IMAGE_SIZE): 229 | data[0,:,i,-1] = i/IMAGE_SIZE 230 | 231 | img = Image.open(path_list) 232 | img_resize = img.resize((IMAGE_SIZE,IMAGE_SIZE)) 233 | data[0,:,:,:-2] = numpy.array(img_resize).astype(numpy.float32).reshape(1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS) 234 | data[0,:,:,:-2] = (data[0,:,:,:-2] - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH 235 | return data 236 | 237 | def iou(box1,box2): 238 | tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2]) 239 | lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3]) 240 | if tb < 0 or lr < 0 : intersection = 0 241 | else : intersection = tb*lr 242 | return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection) 243 | 244 | 245 | def extract_labels_yolo(path_list, train=True): 246 | if train: 247 | root = TRAIN_LABEL_DIR 248 | else: 249 | root = TEST_LABEL_DIR 250 | labels = numpy.ndarray(shape=(len(path_list),S*S*(B*5+CLASSES)), dtype=numpy.float32) 251 | for i in range(labels.shape[0]): 252 | for j in range(labels.shape[1]): 253 | if j%(B*5+CLASSES) == 0 or j%(B*5+CLASSES) == 5: 254 | labels[i][j] = 1.00001 255 | else: 256 | labels[i][j] = 0 257 | for i in range(len(path_list)): 258 | with open(root + path_list[i] + '.txt',"r") as f: 259 | lines = f.readlines() 260 | for j in range(len(lines)): 261 | data = lines[j].split() 262 | col_no = int(float(data[1])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 263 | row_no = int(float(data[2])*IMAGE_SIZE/(IMAGE_SIZE/S)+1) 264 | grid_no = (row_no-1)*S+col_no 265 | # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0]) 266 | labels[i,(B*5+CLASSES)*grid_no-CLASSES + int(data[0])] = 1 267 | for k in range(B): 268 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k] = float(data[1]) 269 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 1] = float(data[2]) 270 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 2] = float(data[3]) 271 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 3] = float(data[4]) 272 | labels[i,(B*5+CLASSES)*(grid_no-1) + 5*k + 4] = 1 273 | 274 | return labels 275 | 276 | def loss_func_yolo(output, label): 277 | res = 0 278 | 279 | for i in range(BATCH_SIZE): 280 | for j in range(0, S*S*(B*5+CLASSES), B*5+CLASSES): 281 | res += COORD_W * tf.sign(label[i][j+2]) * (tf.square(output[i][j] - label[i][j]) + tf.square(output[i][j+1]-label[i][j+1]) + 282 | tf.square(output[i][j+2]/(label[i][j+2]+1e-7) - 1) + 283 | tf.square(output[i][j+3]/(label[i][j+3]+1e-7) - 1)) 284 | 285 | res += tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4])) 286 | 287 | res += NOOBJ_W * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4])) 288 | 289 | res += COORD_W * tf.sign(label[i][j+7]) * (tf.square(output[i][j+5] - label[i][j+5]) + tf.square(output[i][j+6]-label[i][j+6]) + 290 | tf.square(output[i][j+7]/(label[i][j+7]+1e-7) - 1) + 291 | tf.square(output[i][j+8]/(label[i][j+8]+1e-7) - 1)) 292 | 293 | res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9])) 294 | 295 | res += NOOBJ_W * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9])) 296 | 297 | res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11])) 298 | 299 | return res 300 | 301 | def test_from_img(img, test_model, display_loss=False): 302 | with tf.Session() as sess: 303 | tf.global_variables_initializer().run() 304 | saver = tf.train.Saver() 305 | saver.restore(sess, test_model) 306 | data = extract_data_yolo(img, train=False) 307 | out = sess.run(model(data)) 308 | if display_loss: 309 | label = extract_labels_yolo([img], train=False) 310 | print('loss: %.6f' % loss_func_yolo(out, label)) 311 | results,classes = get_results(out) 312 | show_results(img, results, classes) 313 | 314 | def test_from_dir(imgdir, test_model, display_loss=False): 315 | with tf.Session() as sess: 316 | tf.global_variables_initializer().run() 317 | saver = tf.train.Saver() 318 | saver.restore(sess, test_model) 319 | if display_loss: 320 | loss = 0 321 | for root, dirs, files in os.walk(imgdir[:-1]): 322 | for file in files: 323 | img = os.path.join(root, file) 324 | label = extract_labels_yolo([img], train=False) 325 | data = extract_data_yolo(img, train=False) 326 | out = sess.run(model(data)) 327 | loss += loss_func_yolo(out, label) 328 | results,classes = get_results(out) 329 | show_results(img, results, classes) 330 | print('loss: %.6f' % loss) 331 | else: 332 | for root, dirs, files in os.walk(imgdir[:-1]): 333 | for file in files: 334 | img = os.path.join(root, file) 335 | data = extract_data_yolo(img, train=False) 336 | out = sess.run(model(data)) 337 | results,classes = get_results(out) 338 | show_results(img, results, classes) 339 | 340 | def main(argv=None): 341 | num_epochs = NUM_EPOCHS 342 | train_img_list = [] 343 | for rt,dirs,filenames in os.walk(TRAIN_IMG_DIR): 344 | for filename in filenames: 345 | train_img_list.append(filename[:-4]) 346 | 347 | train_data_node = tf.placeholder( 348 | tf.float32, 349 | shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS+2)) 350 | train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, S*S*(B*5+CLASSES))) 351 | 352 | logits = model(train_data_node, True) 353 | loss = loss_func_yolo(logits, train_labels_node) 354 | 355 | regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + 356 | tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) 357 | 358 | loss += 5e-4 * regularizers 359 | 360 | batch = tf.Variable(0, dtype=tf.float32) 361 | 362 | learning_rate = tf.train.exponential_decay( 363 | 0.01, 364 | batch * BATCH_SIZE, 365 | 10000, 366 | 0.95, 367 | staircase=True) 368 | 369 | op_func = tf.train.MomentumOptimizer(learning_rate,0.9) 370 | 371 | tvars = tf.trainable_variables() 372 | grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), 0.5) 373 | optimizer = op_func.apply_gradients(zip(grads, tvars), global_step=batch) 374 | 375 | tf.summary.scalar("loss", loss) 376 | tf.summary.scalar("lr", learning_rate) 377 | merged_summary = tf.summary.merge_all() 378 | with tf.Session() as sess: 379 | 380 | tf.global_variables_initializer().run() 381 | saver = tf.train.Saver() 382 | print('Initialized!') 383 | writer = tf.summary.FileWriter(SAVE_TENSORBOARD, sess.graph) 384 | 385 | for step in xrange(int(num_epochs * TRAIN_SIZE) // BATCH_SIZE): 386 | offset = (step * BATCH_SIZE) % (TRAIN_SIZE - BATCH_SIZE) 387 | batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list)) 388 | batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list)) 389 | 390 | feed_dict = {train_data_node: batch_data, 391 | train_labels_node: batch_labels} 392 | 393 | _,los,summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict) 394 | 395 | if step % EVAL_FREQUENCY == 0: 396 | print('loss: %.6f' % los) 397 | #print('weight: %.5f' % sess.run(conv1_weights)[0,0,0,0]) 398 | writer.add_summary(summary, step) 399 | save_path = saver.save(sess, SAVE_MODEL) 400 | test_from_dir(TEST_IMG_DIR, display_loss=True) 401 | 402 | def parse_args(): 403 | parser = argparse.ArgumentParser(description='YOLO demo') 404 | parser.add_argument('--train', help='train the model', action='store_true') 405 | parser.add_argument('--test', help='test the model', action='store_true') 406 | parser.add_argument('--test_img_path', help='img path to test', type=str) 407 | parser.add_argument('--display_loss', default=False, help='whether display the loss', action='store_true') 408 | parser.add_argument('--test_model', help='model to test', type=str) 409 | args = parser.parse_args() 410 | 411 | return args 412 | if __name__ == '__main__': 413 | args = parse_args() 414 | if args.train and args.test: 415 | print('Error: cannot train and test at the same time') 416 | elif args.train: 417 | tf.app.run() 418 | elif args.test_img_path[-1] == '/': 419 | test_from_dir(args.test_img_path, args.test_model, args.display_loss) 420 | else: 421 | test_from_img(args.test_img_path, args.test_model, args.display_loss) 422 | -------------------------------------------------------------------------------- /some_tests/108.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/108.jpg -------------------------------------------------------------------------------- /some_tests/109.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/109.jpg -------------------------------------------------------------------------------- /some_tests/126.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/126.jpg -------------------------------------------------------------------------------- /some_tests/152.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/152.jpg -------------------------------------------------------------------------------- /some_tests/171.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/171.jpg -------------------------------------------------------------------------------- /some_tests/173.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lesliejackson/YOLOv1_tensorflow/2a64ab376607babcebb7c54e92142c5d9c28f537/some_tests/173.jpg -------------------------------------------------------------------------------- /yolo_v1.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gzip 3 | import os 4 | import sys 5 | import time 6 | import cv2 7 | import numpy 8 | import tensorflow as tf 9 | from PIL import Image 10 | from six.moves import urllib 11 | from six.moves import xrange 12 | from nets import nets_factory 13 | 14 | 15 | flags = tf.app.flags 16 | flags.DEFINE_integer("epoch", 25, "Epoch to train [25]") 17 | flags.DEFINE_integer("S", 7, "cut the img to S*S grids[7]") 18 | flags.DEFINE_integer("num_classes", 2, "number of classes [2]") 19 | flags.DEFINE_integer("B", 2, "number of bboxs for one grid to predict [2]") 20 | flags.DEFINE_float("learning_rate", 0.001, "Learning rate of for d network [0.0001]") 21 | flags.DEFINE_float("alpha", 0.1, "alpha of leaky relu [0.1]") 22 | flags.DEFINE_float("nms_threshold", 0.5, "threshold of nms [0.5]") 23 | flags.DEFINE_float("prob_threshold", 0.25, "probablity threshold of test [0.25]") 24 | flags.DEFINE_float("coordinate_weight", 5, "weight of coordinate regression in loss function [5]") 25 | flags.DEFINE_float("noobj_weight", 0.5, "weight of confidence regression in loss function when there is no obj in grid [0.5]") 26 | flags.DEFINE_integer("batch_size", 1, "The size of batch images [128]") 27 | flags.DEFINE_integer("img_size", 224, "image size [224]") 28 | flags.DEFINE_integer("channel_dim", 3, "Dimension of image color [3]") 29 | flags.DEFINE_string("model_name", 'inception_v4', "which model to use") 30 | flags.DEFINE_string("img_pattern", 'jpg', "jpg or png") 31 | flags.DEFINE_integer("save_summary_step", 100, "save summary per [] steps [100]") 32 | flags.DEFINE_integer("save_model_step", 100, "save model per [] steps [100]") 33 | flags.DEFINE_integer("log_loss_step", 100, "log loss information per [] steps [100]") 34 | flags.DEFINE_string("checkpoint_dir", '/home/yy/yolo_/ckpt', "Directory name to save the checkpoints") 35 | flags.DEFINE_string("tensorboard_dir", '/home/yy/yolo_/tb', "Directory name to save the tensorboard") 36 | flags.DEFINE_string("train_dir", '/home/yy/yolo_/train', "Directory name to train images") 37 | flags.DEFINE_string("train_label", '/home/yy/yolo_/label', "Directory name to train labels") 38 | flags.DEFINE_string("test_res_dir", None, "Directory name to save test images") 39 | flags.DEFINE_string("test_data", None, "Directory name to test images") 40 | flags.DEFINE_string("test_label", None, "Directory name to test labels") 41 | flags.DEFINE_boolean("is_test", False, "True for testing, False for training [False]") 42 | FLAGS = flags.FLAGS 43 | 44 | slim = tf.contrib.slim 45 | CLASSES_NAME = ["DaLai","NonDaLai"] 46 | 47 | 48 | def nms(dets, thresh): 49 | """Non maximum suppression""" 50 | """code from rbg/py-faster-rcnn""" 51 | x1 = dets[:, 0] 52 | y1 = dets[:, 1] 53 | x2 = dets[:, 2] 54 | y2 = dets[:, 3] 55 | scores = dets[:, 4] 56 | 57 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 58 | order = scores.argsort()[::-1] 59 | 60 | keep = [] 61 | while order.size > 0: 62 | i = order[0] 63 | keep.append(i) 64 | xx1 = numpy.maximum(x1[i], x1[order[1:]]) 65 | yy1 = numpy.maximum(y1[i], y1[order[1:]]) 66 | xx2 = numpy.minimum(x2[i], x2[order[1:]]) 67 | yy2 = numpy.minimum(y2[i], y2[order[1:]]) 68 | 69 | w = numpy.maximum(0.0, xx2 - xx1 + 1) 70 | h = numpy.maximum(0.0, yy2 - yy1 + 1) 71 | inter = w * h 72 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 73 | 74 | inds = numpy.where(ovr <= thresh)[0] 75 | order = order[inds + 1] 76 | 77 | return keep 78 | 79 | def get_results(output): 80 | results = [] 81 | classes = [] 82 | probs = numpy.ndarray(shape=[FLAGS.num_classes,]) 83 | for p in range(FLAGS.B): 84 | for j in range(4 + p*5, FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes), FLAGS.B*5+FLAGS.num_classes): 85 | for i in range(FLAGS.num_classes): 86 | probs[i] = output[0][j] * output[0][j + 1+ (FLAGS.B-1-p)*5 + i] 87 | 88 | cls_ind = probs.argsort()[::-1][0] 89 | if probs[cls_ind] > FLAGS.prob_threshold: 90 | results.append([output[0][j-4] - output[0][j-2]/2, output[0][j-3] - output[0][j-3]/2, output[0][j-4] + output[0][j-2]/2, output[0][j-3] + output[0][j-3]/2, probs[cls_ind]]) 91 | classes.append(cls_ind) 92 | 93 | res = numpy.array(results).astype(numpy.float32) 94 | if len(res) != 0: 95 | keep = nms(res, FLAGS.nms_threshold) 96 | results_ = [] 97 | classes_ = [] 98 | for i in keep: 99 | results_.append(results[i]) 100 | classes_.append(classes[i]) 101 | 102 | return results_,classes_ 103 | else: 104 | return [],[] 105 | 106 | def show_results(img_path, results, classes): 107 | img = cv2.imread(img_path).copy() 108 | if len(results) != 0: 109 | for i in range(len(results)): 110 | x1 = int(results[i][0]*img.shape[1]) 111 | y1 = int(results[i][1]*img.shape[0]) 112 | x2 = int(results[i][2]*img.shape[1]) 113 | y2 = int(results[i][3]*img.shape[0]) 114 | score = results[i][4] 115 | cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2) 116 | cv2.putText(img, CLASSES_NAME[classes[i]] + ' : %.2f' % results[i][4], (x1+5,y1-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2) 117 | 118 | cv2.imwrite(FLAGS.test_res_dir + '/' + img_path.split('/')[-1], img) 119 | 120 | def get_next_minibatch(offset, path_list): 121 | if offset+FLAGS.batch_size > len(path_list): 122 | random.shuffle(path_list) 123 | return path_list[:FLAGS.batch_size] 124 | else: 125 | return path_list[offset:offset+FLAGS.batch_size] 126 | 127 | def extract_data_yolo(path_list, train=True): 128 | if train: 129 | data = numpy.ndarray(shape=(len(path_list),FLAGS.img_size,FLAGS.img_size,FLAGS.channel_dim),dtype=numpy.float32) 130 | 131 | for i in range(len(path_list)): 132 | img = Image.open(FLAGS.train_dir+'/'+path_list[i]+'.'+FLAGS.img_pattern) 133 | img_resize = img.resize((FLAGS.img_size,FLAGS.img_size)) 134 | data[i] = numpy.array(img_resize).astype(numpy.float32).reshape(FLAGS.img_size,FLAGS.img_size,FLAGS.channel_dim) 135 | 136 | data = (data - 127.5) / 127.5 137 | return data 138 | else: 139 | data = numpy.ndarray(shape=(1,FLAGS.img_size,FLAGS.img_size,FLAGS.channel_dim), dtype=numpy.float32) 140 | img = Image.open(path_list) 141 | img_resize = img.resize((FLAGS.img_size,FLAGS.img_size)) 142 | data = numpy.array(img_resize).astype(numpy.float32).reshape(1,FLAGS.img_size,FLAGS.img_size,FLAGS.channel_dim) 143 | data = (data - 127.5) / 127.5 144 | return data 145 | 146 | def iou(box1,box2): 147 | tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2]) 148 | lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3]) 149 | if tb < 0 or lr < 0 : intersection = 0 150 | else : intersection = tb*lr 151 | return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection) 152 | 153 | 154 | def extract_labels_yolo(path_list, train=True): 155 | if train: 156 | root = FLAGS.train_label 157 | else: 158 | root = FLAGS.test_labels 159 | labels = numpy.ndarray(shape=(len(path_list),FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes)), dtype=numpy.float32) 160 | for i in range(labels.shape[0]): 161 | for j in range(labels.shape[1]): 162 | if j%(FLAGS.B*5+FLAGS.num_classes) == 0 or j%(FLAGS.B*5+FLAGS.num_classes) == 5: 163 | labels[i][j] = 1.00001 164 | else: 165 | labels[i][j] = 0 166 | for i in range(len(path_list)): 167 | with open(root + '/' + path_list[i] + '.txt',"r") as f: 168 | lines = f.readlines() 169 | for j in range(len(lines)): 170 | data = lines[j].split() 171 | col_no = int(float(data[1])*FLAGS.img_size/(FLAGS.img_size/FLAGS.S)+1) 172 | row_no = int(float(data[2])*FLAGS.img_size/(FLAGS.img_size/FLAGS.S)+1) 173 | grid_no = (row_no-1)*FLAGS.S+col_no 174 | # labels[i,(B*5+CLASSES)*grid_no-1] = float(data[0]) 175 | labels[i,(FLAGS.B*5+FLAGS.num_classes)*grid_no-FLAGS.num_classes + int(data[0])] = 1 176 | for k in range(FLAGS.B): 177 | labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k] = float(data[1]) 178 | labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k + 1] = float(data[2]) 179 | labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k + 2] = float(data[3]) 180 | labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k + 3] = float(data[4]) 181 | labels[i,(FLAGS.B*5+FLAGS.num_classes)*(grid_no-1) + 5*k + 4] = 1 182 | 183 | return labels 184 | 185 | def loss_func_yolo(output, label): 186 | res = 0 187 | 188 | for i in range(FLAGS.batch_size): 189 | for j in range(0, FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes), FLAGS.B*5+FLAGS.num_classes): 190 | highest_bbox = output[i][j+4]-output[i][j+9] 191 | """here we only compute the loss of bbox which have the highest confidence""" 192 | """we use tf.sign(tf.maximum(highest_bbox,0)) to do that""" 193 | 194 | res += FLAGS.coordinate_weight * tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(label[i][j+2]) * ( 195 | tf.square(output[i][j] - label[i][j]) + 196 | tf.square(output[i][j+1]-label[i][j+1]) + 197 | tf.square(tf.sqrt(output[i][j+2])-tf.sqrt(label[i][j+2])) + 198 | tf.square(tf.sqrt(output[i][j+3])-tf.sqrt(label[i][j+3]))) 199 | 200 | res += tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(label[i][j+2]) * (tf.square(output[i][j+4] - label[i][j+4])) 201 | 202 | res += FLAGS.noobj_weight * tf.sign(tf.maximum(highest_bbox,0)) * tf.sign(tf.floor(label[i][j])) * (tf.square(output[i][j+4] - label[i][j+4])) 203 | 204 | res += FLAGS.coordinate_weight * tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(label[i][j+7]) * ( 205 | tf.square(output[i][j+5] - label[i][j+5]) + 206 | tf.square(output[i][j+6]-label[i][j+6]) + 207 | tf.square(tf.sqrt(output[i][j+7])-tf.sqrt(label[i][j+7])) + 208 | tf.square(tf.sqrt(output[i][j+8])-tf.sqrt(label[i][j+8]))) 209 | 210 | res += tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(label[i][j+7]) * (tf.square(output[i][j+9] - label[i][j+9])) 211 | 212 | res += FLAGS.noobj_weight * tf.sign(tf.maximum(-highest_bbox,0)) * tf.sign(tf.floor(label[i][j+5])) * (tf.square(output[i][j+9] - label[i][j+9])) 213 | 214 | res += tf.sign(label[i][j+7]) * (tf.square(output[i][j+10] - label[i][j+10]) + tf.square(output[i][j+11] - label[i][j+11])) 215 | 216 | return res/FLAGS.batch_size 217 | 218 | def test_from_dir(imgdir,display_loss=False): 219 | network_fn = nets_factory.get_network_fn(FLAGS.model_name, 220 | FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes), 221 | is_training=False) 222 | with tf.Session() as sess: 223 | tf.global_variables_initializer().run() 224 | saver = tf.train.Saver() 225 | print("Reading checkpoints...") 226 | 227 | ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) 228 | if ckpt and ckpt.model_checkpoint_path: 229 | ckpt_name = os.path.basename(ckpt.model_checkpoint_path) 230 | saver.restore(sess, os.path.join(FLAGS.checkpoint_dir, ckpt_name)) 231 | print("Success to read {}".format(ckpt_name)) 232 | else: 233 | print("Failed to find a checkpoint") 234 | 235 | #saver.restore(sess, FLAGS.checkpoint_dir) 236 | 237 | for root, dirs, files in os.walk(imgdir): 238 | for file in files: 239 | img = os.path.join(root, file) 240 | data = extract_data_yolo(img, train=False) 241 | output,_ = network_fn(data) 242 | out = sess.run(output) 243 | results,classes = get_results(out) 244 | show_results(img, results, classes) 245 | 246 | def main(_): 247 | train_img_list = [] 248 | for rt,dirs,filenames in os.walk(FLAGS.train_dir): 249 | for filename in filenames: 250 | train_img_list.append(filename[:-4]) 251 | 252 | train_size = len(train_img_list) 253 | numpy.random.shuffle(train_img_list) 254 | train_data_node = tf.placeholder( 255 | tf.float32, 256 | shape=(FLAGS.batch_size, FLAGS.img_size, FLAGS.img_size, FLAGS.channel_dim)) 257 | train_labels_node = tf.placeholder(tf.float32, 258 | shape=(FLAGS.batch_size, FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes))) 259 | 260 | network_fn = nets_factory.get_network_fn(FLAGS.model_name, 261 | FLAGS.S*FLAGS.S*(FLAGS.B*5+FLAGS.num_classes), 262 | is_training=True) 263 | 264 | logits,_ = network_fn(train_data_node) 265 | logtis = tf.nn.sigmoid(logits) 266 | loss = loss_func_yolo(logits, train_labels_node) 267 | 268 | batch = slim.create_global_step() 269 | 270 | optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss, global_step=batch) 271 | 272 | tf.summary.scalar("loss", loss) 273 | merged_summary = tf.summary.merge_all() 274 | with tf.Session() as sess: 275 | 276 | tf.global_variables_initializer().run() 277 | saver = tf.train.Saver() 278 | print('Initialized!') 279 | writer = tf.summary.FileWriter(FLAGS.tensorboard_dir, sess.graph) 280 | 281 | print("loding models...") 282 | ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) 283 | if ckpt and ckpt.model_checkpoint_path: 284 | ckpt_name = os.path.basename(ckpt.model_checkpoint_path) 285 | saver.restore(sess, os.path.join(FLAGS.checkpoint_dir, ckpt_name)) 286 | print("Success to load {}".format(ckpt_name)) 287 | else: 288 | print("Failed to find a checkpoint") 289 | 290 | start_time = time.time() 291 | for step in xrange(int(FLAGS.epoch * train_size) // FLAGS.batch_size): 292 | offset = (step * FLAGS.batch_size) % (train_size - FLAGS.batch_size) 293 | batch_data = extract_data_yolo(get_next_minibatch(offset, train_img_list)) 294 | batch_labels = extract_labels_yolo(get_next_minibatch(offset, train_img_list)) 295 | 296 | feed_dict = {train_data_node: batch_data, 297 | train_labels_node: batch_labels} 298 | 299 | _, los, summary = sess.run([optimizer, loss, merged_summary], feed_dict=feed_dict) 300 | 301 | if step%FLAGS.log_loss_step == 0: 302 | end_time = time.time() 303 | print('loss: %.6f time: %.2f' % (los, end_time-start_time)) 304 | start_time = time.time() 305 | if step%FLAGS.save_summary_step == 0: 306 | writer.add_summary(summary, step) 307 | if step%FLAGS.save_model_step == 0: 308 | save_path = saver.save(sess, os.path.join(FLAGS.checkpoint_dir, "yolo.model"), global_step=step) 309 | 310 | if __name__ == '__main__': 311 | if not FLAGS.is_test: 312 | tf.app.run() 313 | else: 314 | test_from_dir(FLAGS.test_data) 315 | --------------------------------------------------------------------------------