├── LICENSE
├── README.md
├── demo_yolo_v1.py
├── demo_yolo_v2.py
├── models
    └── yolov2-coco.meta
├── test
    ├── cat.jpg
    ├── person.jpg
    ├── sample_computer.jpg
    ├── sample_dog.jpg
    ├── sample_eagle.jpg
    ├── sample_giraffe.jpg
    ├── sample_horses.jpg
    ├── sample_office.jpg
    ├── sample_person.jpg
    ├── sample_scream.jpg
    └── test.mp4
├── train.py
├── utils
    ├── __init__.py
    ├── box.py
    ├── im_transform.py
    ├── pascal_voc.py
    ├── timer.py
    └── tool.py
└── yolo
    ├── __init__.py
    ├── config.py
    └── yolo_net.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Peng Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## yolov2_tensorflow
 2 | 
 3 | ### Requirements
 4 | 1. Tensorflow
 5 | 2. OpenCV
 6 | 
 7 | Tensorflow implementation of [YOLO](https://pjreddie.com/darknet/yolo/), including yolov1 and yolov2 demo.
 8 | 
 9 | 
10 | ### Installation
11 | 
12 | 1. Clone yolov2_tensorflow repository
13 | 	```Shell
14 | 	$ git clone https://github.com/shishichang/yolov2-tensorflow.git
15 |     $ cd yolov2_tensorflow
16 | 	```
17 | 
18 | 2. Download [YOLO_v1](http://pan.baidu.com/s/1cGV694) [YOLO_v2_pb](http://pan.baidu.com/s/1hrRszrA) [YOLO_v2_meta](http://pan.baidu.com/s/1dEOaGPr) 
19 |    put it in `models`
20 | 
21 | 3. Modify configuration in `yolo/config.py` for yolov1
22 | 
23 | 4. Run
24 | 	```Shell
25 | 	$ python demo_yolo_v1.py
26 | 	$ python demo_yolo_v2.py
27 | 	```
28 | 


--------------------------------------------------------------------------------
/demo_yolo_v1.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import os
  4 | import cv2
  5 | import argparse
  6 | import yolo.config as cfg
  7 | from yolo.yolo_net import YOLONet
  8 | from utils.timer import Timer
  9 | 
 10 | 
 11 | class Detector(object):
 12 | 
 13 |     def __init__(self, net, weight_file):
 14 |         self.net = net
 15 |         self.weights_file = weight_file
 16 | 
 17 |         self.classes = cfg.CLASSES
 18 |         self.num_class = len(self.classes)
 19 |         self.image_size = cfg.IMAGE_SIZE
 20 |         self.cell_size = cfg.CELL_SIZE
 21 |         self.boxes_per_cell = cfg.BOXES_PER_CELL
 22 |         self.threshold = cfg.THRESHOLD
 23 |         self.iou_threshold = cfg.IOU_THRESHOLD
 24 |         self.boundary1 = self.cell_size * self.cell_size * self.num_class
 25 |         self.boundary2 = self.boundary1 + self.cell_size * self.cell_size * self.boxes_per_cell
 26 | 
 27 |         self.sess = tf.Session()
 28 |         self.sess.run(tf.global_variables_initializer())
 29 | 
 30 |         print('Restoring weights from: ' + self.weights_file)
 31 |         self.saver = tf.train.Saver()
 32 |         self.saver.restore(self.sess, self.weights_file)
 33 | 
 34 |     def draw_result(self, img, result):
 35 |         for i in range(len(result)):
 36 |             x = int(result[i][1])
 37 |             y = int(result[i][2])
 38 |             w = int(result[i][3] / 2)
 39 |             h = int(result[i][4] / 2)
 40 |             cv2.rectangle(img, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)
 41 |             cv2.rectangle(img, (x - w, y - h - 20),
 42 |                           (x + w, y - h), (125, 125, 125), -1)
 43 |             cv2.putText(img, result[i][0] + ' : %.2f' % result[i][5], (x - w + 5, y - h - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
 44 | 
 45 |     def detect(self, img):
 46 |         img_h, img_w, _ = img.shape
 47 |         inputs = cv2.resize(img, (self.image_size, self.image_size))
 48 |         inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB).astype(np.float32)
 49 |         inputs = (inputs / 255.0) * 2.0 - 1.0
 50 |         inputs = np.reshape(inputs, (1, self.image_size, self.image_size, 3))
 51 | 
 52 |         result = self.detect_from_cvmat(inputs)[0]
 53 | 
 54 |         for i in range(len(result)):
 55 |             result[i][1] *= (1.0 * img_w / self.image_size)
 56 |             result[i][2] *= (1.0 * img_h / self.image_size)
 57 |             result[i][3] *= (1.0 * img_w / self.image_size)
 58 |             result[i][4] *= (1.0 * img_h / self.image_size)
 59 | 
 60 |         return result
 61 | 
 62 |     def detect_from_cvmat(self, inputs):
 63 |         net_output = self.sess.run(self.net.logits,
 64 |                                    feed_dict={self.net.images: inputs})
 65 |         results = []
 66 |         for i in range(net_output.shape[0]):
 67 |             results.append(self.interpret_output(net_output[i]))
 68 | 
 69 |         return results
 70 | 
 71 |     def interpret_output(self, output):
 72 |         probs = np.zeros((self.cell_size, self.cell_size,
 73 |                           self.boxes_per_cell, self.num_class))
 74 |         class_probs = np.reshape(output[0:self.boundary1], (self.cell_size, self.cell_size, self.num_class))
 75 |         scales = np.reshape(output[self.boundary1:self.boundary2], (self.cell_size, self.cell_size, self.boxes_per_cell))
 76 |         boxes = np.reshape(output[self.boundary2:], (self.cell_size, self.cell_size, self.boxes_per_cell, 4))
 77 |         offset = np.transpose(np.reshape(np.array([np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
 78 |                                          [self.boxes_per_cell, self.cell_size, self.cell_size]), (1, 2, 0))
 79 | 
 80 |         boxes[:, :, :, 0] += offset
 81 |         boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2))
 82 |         boxes[:, :, :, :2] = 1.0 * boxes[:, :, :, 0:2] / self.cell_size
 83 |         boxes[:, :, :, 2:] = np.square(boxes[:, :, :, 2:])
 84 | 
 85 |         boxes *= self.image_size
 86 | 
 87 |         for i in range(self.boxes_per_cell):
 88 |             for j in range(self.num_class):
 89 |                 probs[:, :, i, j] = np.multiply(
 90 |                     class_probs[:, :, j], scales[:, :, i])
 91 | 
 92 |         filter_mat_probs = np.array(probs >= self.threshold, dtype='bool')
 93 |         filter_mat_boxes = np.nonzero(filter_mat_probs)
 94 |         boxes_filtered = boxes[filter_mat_boxes[0],
 95 |                                filter_mat_boxes[1], filter_mat_boxes[2]]
 96 |         probs_filtered = probs[filter_mat_probs]
 97 |         classes_num_filtered = np.argmax(filter_mat_probs, axis=3)[filter_mat_boxes[
 98 |             0], filter_mat_boxes[1], filter_mat_boxes[2]]
 99 | 
100 |         argsort = np.array(np.argsort(probs_filtered))[::-1]
101 |         boxes_filtered = boxes_filtered[argsort]
102 |         probs_filtered = probs_filtered[argsort]
103 |         classes_num_filtered = classes_num_filtered[argsort]
104 | 
105 |         for i in range(len(boxes_filtered)):
106 |             if probs_filtered[i] == 0:
107 |                 continue
108 |             for j in range(i + 1, len(boxes_filtered)):
109 |                 if self.iou(boxes_filtered[i], boxes_filtered[j]) > self.iou_threshold:
110 |                     probs_filtered[j] = 0.0
111 | 
112 |         filter_iou = np.array(probs_filtered > 0.0, dtype='bool')
113 |         boxes_filtered = boxes_filtered[filter_iou]
114 |         probs_filtered = probs_filtered[filter_iou]
115 |         classes_num_filtered = classes_num_filtered[filter_iou]
116 | 
117 |         result = []
118 |         for i in range(len(boxes_filtered)):
119 |             result.append([self.classes[classes_num_filtered[i]], boxes_filtered[i][0], boxes_filtered[
120 |                           i][1], boxes_filtered[i][2], boxes_filtered[i][3], probs_filtered[i]])
121 | 
122 |         return result
123 | 
124 |     def iou(self, box1, box2):
125 |         tb = min(box1[0] + 0.5 * box1[2], box2[0] + 0.5 * box2[2]) - \
126 |             max(box1[0] - 0.5 * box1[2], box2[0] - 0.5 * box2[2])
127 |         lr = min(box1[1] + 0.5 * box1[3], box2[1] + 0.5 * box2[3]) - \
128 |             max(box1[1] - 0.5 * box1[3], box2[1] - 0.5 * box2[3])
129 |         if tb < 0 or lr < 0:
130 |             intersection = 0
131 |         else:
132 |             intersection = tb * lr
133 |         return intersection / (box1[2] * box1[3] + box2[2] * box2[3] - intersection)
134 | 
135 |     def camera_detector(self, cap, wait=10):
136 |         detect_timer = Timer()
137 |         ret, _ = cap.read()
138 | 
139 |         while ret:
140 |             ret, frame = cap.read()
141 |             detect_timer.tic()
142 |             result = self.detect(frame)
143 |             detect_timer.toc()
144 |             print('Average detecting time: {:.3f}s'.format(detect_timer.average_time))
145 | 
146 |             self.draw_result(frame, result)
147 |             cv2.imshow('Camera', frame)
148 |             cv2.waitKey(wait)
149 | 
150 |             ret, frame = cap.read()
151 | 
152 |     def image_detector(self, imname, wait=0):
153 |         detect_timer = Timer()
154 |         image = cv2.imread(imname)
155 | 
156 |         detect_timer.tic()
157 |         result = self.detect(image)
158 |         detect_timer.toc()
159 |         print('Average detecting time: {:.3f}s'.format(detect_timer.average_time))
160 | 
161 |         self.draw_result(image, result)
162 |         cv2.imshow('Image', image)
163 |         cv2.waitKey(wait)
164 | 
165 | 
166 | def main():
167 |     parser = argparse.ArgumentParser()
168 |     parser.add_argument('--weights', default="YOLO_small.ckpt", type=str)
169 |     parser.add_argument('--weight_dir', default='models', type=str)
170 |     parser.add_argument('--data_dir', default="data", type=str)
171 |     parser.add_argument('--gpu', default='2', type=str)
172 |     args = parser.parse_args()
173 | 
174 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
175 | 
176 |     yolo = YOLONet(False)
177 |     weight_file = os.path.join(args.weight_dir, args.weights)
178 |     detector = Detector(yolo, weight_file)
179 | 
180 |     select = 2
181 |     if 1 == select:
182 |         # detect from camera
183 |         cap = cv2.VideoCapture('test/test.mp4')
184 |         detector.camera_detector(cap)
185 |     if 2 == select:
186 |         #detect from image file
187 |         imname = 'test/person.jpg'
188 |         detector.image_detector(imname)
189 | 
190 | 
191 | if __name__ == '__main__':
192 |     main()
193 | 


--------------------------------------------------------------------------------
/demo_yolo_v2.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append("./")
  3 | from utils.im_transform import imcv2_recolor, imcv2_affine_trans
  4 | from utils import box
  5 | import math
  6 | import random
  7 | import time
  8 | import os
  9 | 
 10 | import numpy as np
 11 | import tensorflow as tf
 12 | import cv2
 13 | slim = tf.contrib.slim
 14 | import matplotlib.pyplot as plt
 15 | from multiprocessing.pool import ThreadPool
 16 | from utils import tool 
 17 | from collections import Counter
 18 | import json
 19 | 
 20 | pool = ThreadPool()
 21 | os.environ["CUDA_VISIBLE_DEVICES"]='3'
 22 | 
 23 | class YOLO_detector(object):
 24 |     
 25 |     def __init__(self):
 26 |         model_name = 'yolov2-coco'
 27 |         model_dir = './models'
 28 |         gpu_id = 4
 29 |         self.gpu_utility = 0.9
 30 |         
 31 |         self.pb_file = '{}/{}.pb'.format(model_dir, model_name)
 32 |         self.meta_file = '{}/{}.meta'.format(model_dir, model_name)
 33 |         self.batch = 4
 34 |         
 35 |         self.graph = tf.Graph()
 36 |         with tf.device('/gpu:1'):
 37 |             with self.graph.as_default() as g:
 38 |                 self.build_from_pb()
 39 |                 gpu_options = tf.GPUOptions(allow_growth=True)
 40 |                 sess_config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)
 41 |                 self.sess = tf.Session(config = sess_config)
 42 |                 self.sess.run(tf.global_variables_initializer())
 43 |         return
 44 |     
 45 |     def build_from_pb(self):
 46 |         with tf.gfile.FastGFile(self.pb_file, "rb") as f:
 47 |             graph_def = tf.GraphDef()
 48 |             graph_def.ParseFromString(f.read())
 49 |         tf.import_graph_def(graph_def, name="")
 50 |         
 51 |         with open(self.meta_file, "r") as fp:
 52 |             self.meta = json.load(fp)
 53 |         #Placeholders
 54 |         self.inp = tf.get_default_graph().get_tensor_by_name('input:0')
 55 |         self.out = tf.get_default_graph().get_tensor_by_name('output:0')
 56 | 
 57 |         #self.setup_meta_ops()
 58 |         
 59 |     def setup_meta_ops(self):
 60 |         cfg = dict({
 61 |             'allow_soft_placement': False,
 62 |             'log_device_placement': False
 63 |             })
 64 |         utility = min(self.gpu_utility, 1.0)
 65 |         if utility > 0.0:
 66 |             print('GPU model with {} usage'.format(utility))
 67 |             cfg['gpu_options'] = tf.GPUOptions(per_process_gpu_memory_fraction = utility)        
 68 |             cfg['allow_soft_placement'] = True
 69 |         else:
 70 |             print('Run totally on CPU')
 71 |             cfg['device_count'] = {'GPU': 0}
 72 | 
 73 |         self.sess = tf.Session(config = tf.ConfigProto(**cfg))
 74 |         self.sess.run(tf.global_variables_initializer())
 75 | 
 76 |     def resize_input(self, im):
 77 |         h, w, c = self.meta['inp_size']
 78 |         imsz = cv2.resize(im, (w, h))
 79 |         imsz = imsz / 255.
 80 |         imsz = imsz[:,:,::-1]
 81 |         return imsz
 82 |     
 83 |     def process_box(self, b, h, w, threshold):
 84 |         max_indx = np.argmax(b.probs)
 85 |         max_prob = b.probs[max_indx]
 86 |         label = self.meta['labels'][max_indx]
 87 |         if max_prob > threshold:
 88 |         	left  = int ((b.x - b.w/2.) * w)
 89 |         	right = int ((b.x + b.w/2.) * w)
 90 |         	top   = int ((b.y - b.h/2.) * h)
 91 |         	bot   = int ((b.y + b.h/2.) * h)
 92 |         	if left  < 0    :  left = 0
 93 |         	if right > w - 1: right = w - 1
 94 |         	if top   < 0    :   top = 0
 95 |         	if bot   > h - 1:   bot = h - 1
 96 |         	mess = '{}'.format(label)
 97 |         	return (left, right, top, bot, mess, max_indx, max_prob)
 98 |         return None
 99 |        
100 |     def preprocess(self, im, allobj = None):
101 |         """
102 |         """
103 |         if type(im) is not np.ndarray:
104 |         	im = cv2.imread(im)
105 |         
106 |         if allobj is not None: # in training mode
107 |         	result = imcv2_affine_trans(im)
108 |         	im, dims, trans_param = result
109 |         	scale, offs, flip = trans_param
110 |         	for obj in allobj:
111 |         		_fix(obj, dims, scale, offs)
112 |         		if not flip: continue
113 |         		obj_1_ =  obj[1]
114 |         		obj[1] = dims[0] - obj[3]
115 |         		obj[3] = dims[0] - obj_1_
116 |         	im = imcv2_recolor(im)
117 |         
118 |         im = self.resize_input(im)
119 |         if allobj is None: return im
120 |         return im#, np.array(im) # for unit testing
121 |     
122 |     def postprocess(self, net_out):
123 |         meta = self.meta
124 |         result = box.box_constructor(meta,net_out)
125 |         return result
126 | 
127 |     
128 |     def detect_object(self, im):
129 |         this_inp = self.preprocess(im)
130 |         expanded = np.expand_dims(this_inp, 0)
131 |         inp_feed = list()
132 |         feed_dict = {self.inp: expanded}
133 |         inp_feed.append(expanded)
134 |         feed_dict = {self.inp : expanded}    
135 |        
136 |         print("Forwarding the image input.")
137 |         start = time.time()
138 |         out = self.sess.run(self.out, feed_dict)
139 |         
140 |         time_value = time.time()
141 |         last = time_value - start
142 |         print('Cost time of run = {}s.'.format(last))
143 |         result = self.postprocess(out[0])
144 |         last = time.time() - time_value
145 |         
146 |         print('Cost time of postprocess = {}s.'.format(last))
147 |         return result
148 |         
149 | def demo_image():
150 |     yolo = YOLO_detector()
151 |     colors = yolo.meta['colors']
152 |     img_dir = "./test"
153 |     image_names = tool.find_files(img_dir) 
154 |     for filename in image_names:
155 |         im = cv2.imread(filename)
156 |         h,w,_ = im.shape
157 |         results = yolo.detect_object(im) 
158 |         thick = int((h + w) // 300)
159 |         draw = im.copy()
160 |         h, w, _ = draw.shape
161 |         for i in range(len(results)):
162 |             cv2.putText(draw,str(results[i]['category']),(int(w*results[i]['x1']),int(h*results[i]['y1'])-12), 0, 1e-3*h, colors[results[i]['label']], thick//3)
163 |             cv2.rectangle(draw,(int(w*results[i]['x1']),int(h*results[i]['y1'])),(int(w*results[i]['x2']),int(h*results[i]['y2'])), colors[results[i]['label']], thick)
164 |         cv2.imshow("result", draw)
165 |         cv2.waitKey()
166 | 
167 | def demo_video():
168 |     yolo = YOLO_detector()
169 |     colors = yolo.meta['colors']
170 |     video_name = 'test.mp4'
171 |     data_dir = "./test" 
172 |     video_file = os.path.join(data_dir, video_name)
173 |     
174 |     print(video_file)
175 |     vcap = cv2.VideoCapture(video_file)
176 |     if False == vcap.isOpened():
177 |         print("video cannot open!\n")
178 |         return -1
179 |     idx = 0
180 |     while True:
181 |         idx += 1
182 |         ret, img = vcap.read()
183 |         if False == ret:
184 |             break
185 |         print('video is read')
186 |         im = img
187 |         h,w,_ = im.shape
188 |         start = time.time()
189 |         results = yolo.detect_object(im) 
190 |         last = (time.time() - start)
191 |         thick = int((h + w) // 300)
192 |         draw = im.copy()
193 |         h, w, _ = draw.shape
194 |         for i in range(len(results)):
195 |             cv2.putText(draw,"fps:{}".format(1/last),(1,18), 0, 1e-3*h, colors[results[i]['label']], thick//3)
196 |             cv2.putText(draw,"{},{}".format(str(results[i]['category']), results[i]['score']),(int(w*results[i]['x1']),int(h*results[i]['y1'])-12), 0, 1e-3*h, colors[results[i]['label']], thick//3)
197 |             cv2.rectangle(draw,(int(w*results[i]['x1']),int(h*results[i]['y1'])),(int(w*results[i]['x2']),int(h*results[i]['y2'])), colors[results[i]['label']], thick)
198 |         cv2.imshow("result", draw)
199 |         cv2.waitKey()
200 | 
201 | if __name__ == '__main__':
202 |     print("run demo_video...")
203 |     demo_image()
204 | 
205 | 


--------------------------------------------------------------------------------
/models/yolov2-coco.meta:
--------------------------------------------------------------------------------
1 | {"jitter": 0.3, "object_scale": 5, "model": "../cfg/yolo-coco.cfg", "bias_match": 1, "absolute": 1, "thresh": 0.6, "random": 1, "net": {"exposure": 1.5, "momentum": 0.9, "saturation": 1.5, "batch": 1, "hue": 0.1, "width": 608, "channels": 3, "scales": ".1,.1", "type": "[net]", "learning_rate": 0.001, "max_batches": 500200, "policy": "steps", "steps": "400000,450000", "burn_in": 1000, "height": 608, "subdivisions": 1, "decay": 0.0005, "angle": 0}, "colors": [[254.0, 254.0, 254], [248.92, 228.6, 127], [243.84, 203.20000000000002, 0], [238.76, 177.79999999999998, -127], [233.68, 152.4, -254], [228.6, 127.0, 254], [223.52, 101.60000000000001, 127], [218.44, 76.20000000000002, 0], [213.35999999999999, 50.79999999999999, -127], [208.28000000000003, 25.399999999999995, -254], [203.20000000000002, 0.0, 254], [198.12, -25.400000000000023, 127], [193.04, -50.79999999999999, 0], [187.96, -76.20000000000002, -127], [182.88, -101.59999999999998, -254], [177.79999999999998, -127.0, 254], [172.71999999999997, -152.40000000000003, 127], [167.64, -177.79999999999998, 0], [162.56, -203.20000000000002, -127], [157.48, -228.59999999999997, -254], [152.4, -254.0, 254], [147.32000000000002, -279.40000000000003, 127], [142.24, -304.80000000000007, 0], [137.16, -330.19999999999993, -127], [132.08, -355.59999999999997, -254], [127.0, 254.0, 254], [121.92, 228.6, 127], [116.83999999999999, 203.20000000000002, 0], [111.75999999999999, 177.79999999999998, -127], [106.68, 152.4, -254], [101.60000000000001, 127.0, 254], [96.52, 101.60000000000001, 127], [91.44, 76.20000000000002, 0], [86.35999999999999, 50.79999999999999, -127], [81.27999999999999, 25.399999999999995, -254], [76.20000000000002, 0.0, 254], [71.12, -25.400000000000023, 127], [66.04, -50.79999999999999, 0], [60.96, -76.20000000000002, -127], [55.879999999999995, -101.59999999999998, -254], [50.79999999999999, -127.0, 254], [45.72000000000001, -152.40000000000003, 127], [40.64000000000001, -177.79999999999998, 0], [35.56, -203.20000000000002, -127], [30.48, -228.59999999999997, -254], [25.399999999999995, -254.0, 254], [20.31999999999999, -279.40000000000003, 127], [15.240000000000013, -304.80000000000007, 0], [10.160000000000009, -330.19999999999993, -127], [5.0800000000000045, -355.59999999999997, -254], [0.0, 254.0, 254], [-5.0800000000000045, 228.6, 127], [-10.160000000000009, 203.20000000000002, 0], [-15.240000000000013, 177.79999999999998, -127], [-20.320000000000018, 152.4, -254], [-25.400000000000023, 127.0, 254], [-30.480000000000025, 101.60000000000001, 127], [-35.559999999999974, 76.20000000000002, 0], [-40.63999999999998, 50.79999999999999, -127], [-45.719999999999985, 25.399999999999995, -254], [-50.79999999999999, 0.0, 254], [-55.879999999999995, -25.400000000000023, 127], [-60.96, -50.79999999999999, 0], [-66.04, -76.20000000000002, -127], [-71.12, -101.59999999999998, -254], [-76.20000000000002, -127.0, 254], [-81.28000000000002, -152.40000000000003, 127], [-86.36000000000001, -177.79999999999998, 0], [-91.44000000000003, -203.20000000000002, -127], [-96.51999999999997, -228.59999999999997, -254], [-101.59999999999998, -254.0, 254], [-106.67999999999998, -279.40000000000003, 127], [-111.75999999999999, -304.80000000000007, 0], [-116.83999999999999, -330.19999999999993, -127], [-121.92, -355.59999999999997, -254], [-127.0, 254.0, 254], [-132.08, 228.6, 127], [-137.16, 203.20000000000002, 0], [-142.24, 177.79999999999998, -127], [-147.32000000000002, 152.4, -254]], "class_scale": 1, "type": "[region]", "rescore": 1, "anchors": [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828], "coords": 4, "classes": 80, "noobject_scale": 1, "inp_size": [608, 608, 3], "num": 5, "coord_scale": 1, "out_size": [19, 19, 425], "labels": ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"], "softmax": 1, "name": "yolo-coco"}


--------------------------------------------------------------------------------
/test/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/cat.jpg


--------------------------------------------------------------------------------
/test/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/person.jpg


--------------------------------------------------------------------------------
/test/sample_computer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/sample_computer.jpg


--------------------------------------------------------------------------------
/test/sample_dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/sample_dog.jpg


--------------------------------------------------------------------------------
/test/sample_eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/sample_eagle.jpg


--------------------------------------------------------------------------------
/test/sample_giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/sample_giraffe.jpg


--------------------------------------------------------------------------------
/test/sample_horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/sample_horses.jpg


--------------------------------------------------------------------------------
/test/sample_office.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/sample_office.jpg


--------------------------------------------------------------------------------
/test/sample_person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/sample_person.jpg


--------------------------------------------------------------------------------
/test/sample_scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/sample_scream.jpg


--------------------------------------------------------------------------------
/test/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/test/test.mp4


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import datetime
  3 | import os
  4 | import argparse
  5 | import yolo.config as cfg
  6 | from yolo.yolo_net import YOLONet
  7 | from utils.timer import Timer
  8 | from utils.pascal_voc import pascal_voc
  9 | 
 10 | 
 11 | class Solver(object):
 12 | 
 13 |     def __init__(self, net, data):
 14 |         self.net = net
 15 |         self.data = data
 16 |         self.weights_file = cfg.WEIGHTS_FILE
 17 |         self.max_iter = cfg.MAX_ITER
 18 |         self.initial_learning_rate = cfg.LEARNING_RATE
 19 |         self.decay_steps = cfg.DECAY_STEPS
 20 |         self.decay_rate = cfg.DECAY_RATE
 21 |         self.staircase = cfg.STAIRCASE
 22 |         self.summary_iter = cfg.SUMMARY_ITER
 23 |         self.save_iter = cfg.SAVE_ITER
 24 |         self.output_dir = os.path.join(
 25 |             cfg.OUTPUT_DIR, datetime.datetime.now().strftime('%Y_%m_%d_%H_%M'))
 26 |         if not os.path.exists(self.output_dir):
 27 |             os.makedirs(self.output_dir)
 28 |         self.save_cfg()
 29 | 
 30 |         self.variable_to_restore = tf.global_variables()
 31 |         self.restorer = tf.train.Saver(self.variable_to_restore, max_to_keep=None)
 32 |         self.saver = tf.train.Saver(self.variable_to_restore, max_to_keep=None)
 33 |         self.ckpt_file = os.path.join(self.output_dir, 'save.ckpt')
 34 |         self.summary_op = tf.summary.merge_all()
 35 |         self.writer = tf.summary.FileWriter(self.output_dir, flush_secs=60)
 36 | 
 37 |         self.global_step = tf.get_variable(
 38 |             'global_step', [], initializer=tf.constant_initializer(0), trainable=False)
 39 |         self.learning_rate = tf.train.exponential_decay(
 40 |             self.initial_learning_rate, self.global_step, self.decay_steps,
 41 |             self.decay_rate, self.staircase, name='learning_rate')
 42 |         self.optimizer = tf.train.GradientDescentOptimizer(
 43 |             learning_rate=self.learning_rate).minimize(
 44 |             self.net.total_loss, global_step=self.global_step)
 45 |         self.ema = tf.train.ExponentialMovingAverage(decay=0.9999)
 46 |         self.averages_op = self.ema.apply(tf.trainable_variables())
 47 |         with tf.control_dependencies([self.optimizer]):
 48 |             self.train_op = tf.group(self.averages_op)
 49 | 
 50 |         gpu_options = tf.GPUOptions()
 51 |         config = tf.ConfigProto(gpu_options=gpu_options)
 52 |         self.sess = tf.Session(config=config)
 53 |         self.sess.run(tf.global_variables_initializer())
 54 | 
 55 |         if self.weights_file is not None:
 56 |             print('Restoring weights from: ' + self.weights_file)
 57 |             self.restorer.restore(self.sess, self.weights_file)
 58 | 
 59 |         self.writer.add_graph(self.sess.graph)
 60 | 
 61 |     def train(self):
 62 | 
 63 |         train_timer = Timer()
 64 |         load_timer = Timer()
 65 | 
 66 |         for step in xrange(1, self.max_iter + 1):
 67 | 
 68 |             load_timer.tic()
 69 |             images, labels = self.data.get()
 70 |             load_timer.toc()
 71 |             feed_dict = {self.net.images: images, self.net.labels: labels}
 72 | 
 73 |             if step % self.summary_iter == 0:
 74 |                 if step % (self.summary_iter * 10) == 0:
 75 | 
 76 |                     train_timer.tic()
 77 |                     summary_str, loss, _ = self.sess.run(
 78 |                         [self.summary_op, self.net.total_loss, self.train_op],
 79 |                         feed_dict=feed_dict)
 80 |                     train_timer.toc()
 81 | 
 82 |                     log_str = ('{} Epoch: {}, Step: {}, Learning rate: {},'
 83 |                         ' Loss: {:5.3f}\nSpeed: {:.3f}s/iter,'
 84 |                         ' Load: {:.3f}s/iter, Remain: {}').format(
 85 |                         datetime.datetime.now().strftime('%m/%d %H:%M:%S'),
 86 |                         self.data.epoch,
 87 |                         int(step),
 88 |                         round(self.learning_rate.eval(session=self.sess), 6),
 89 |                         loss,
 90 |                         train_timer.average_time,
 91 |                         load_timer.average_time,
 92 |                         train_timer.remain(step, self.max_iter))
 93 |                     print(log_str)
 94 | 
 95 |                 else:
 96 |                     train_timer.tic()
 97 |                     summary_str, _ = self.sess.run(
 98 |                         [self.summary_op, self.train_op],
 99 |                         feed_dict=feed_dict)
100 |                     train_timer.toc()
101 | 
102 |                 self.writer.add_summary(summary_str, step)
103 | 
104 |             else:
105 |                 train_timer.tic()
106 |                 self.sess.run(self.train_op, feed_dict=feed_dict)
107 |                 train_timer.toc()
108 | 
109 |             if step % self.save_iter == 0:
110 |                 print('{} Saving checkpoint file to: {}'.format(
111 |                     datetime.datetime.now().strftime('%m/%d %H:%M:%S'),
112 |                     self.output_dir))
113 |                 self.saver.save(self.sess, self.ckpt_file,
114 |                                 global_step=self.global_step)
115 | 
116 |     def save_cfg(self):
117 | 
118 |         with open(os.path.join(self.output_dir, 'config.txt'), 'w') as f:
119 |             cfg_dict = cfg.__dict__
120 |             for key in sorted(cfg_dict.keys()):
121 |                 if key[0].isupper():
122 |                     cfg_str = '{}: {}\n'.format(key, cfg_dict[key])
123 |                     f.write(cfg_str)
124 | 
125 | 
126 | def update_config_paths(data_dir, weights_file):
127 |     cfg.DATA_PATH = data_dir
128 |     cfg.PASCAL_PATH = os.path.join(data_dir, 'pascal_voc')
129 |     cfg.CACHE_PATH = os.path.join(cfg.PASCAL_PATH, 'cache')
130 |     cfg.OUTPUT_DIR = os.path.join(cfg.PASCAL_PATH, 'output')
131 |     cfg.WEIGHTS_DIR = os.path.join(cfg.PASCAL_PATH, 'weights')
132 | 
133 |     cfg.WEIGHTS_FILE = os.path.join(cfg.WEIGHTS_DIR, weights_file)
134 | 
135 | 
136 | def main():
137 |     parser = argparse.ArgumentParser()
138 |     parser.add_argument('--weights', default="YOLO_small.ckpt", type=str)
139 |     parser.add_argument('--data_dir', default="data", type=str)
140 |     parser.add_argument('--threshold', default=0.2, type=float)
141 |     parser.add_argument('--iou_threshold', default=0.5, type=float)
142 |     parser.add_argument('--gpu', default='', type=str)
143 |     args = parser.parse_args()
144 | 
145 |     if args.gpu is not None:
146 |         cfg.GPU = args.gpu
147 | 
148 |     if args.data_dir != cfg.DATA_PATH:
149 |         update_config_paths(args.data_dir, args.weights)
150 | 
151 |     os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU
152 | 
153 |     yolo = YOLONet()
154 |     pascal = pascal_voc('train')
155 | 
156 |     solver = Solver(yolo, pascal)
157 | 
158 |     print('Start training ...')
159 |     solver.train()
160 |     print('Done training.')
161 | 
162 | if __name__ == '__main__':
163 | 
164 |     # python train.py --weights YOLO_small.ckpt --gpu 0
165 |     main()
166 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/utils/__init__.py


--------------------------------------------------------------------------------
/utils/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class BoundBox:
  4 |     def __init__(self, classes):
  5 |         self.x, self.y = float(), float()
  6 |         self.w, self.h = float(), float()
  7 |         self.c = float()
  8 |         self.class_num = classes
  9 |         self.probs = np.zeros((classes,))
 10 | 
 11 | def overlap(x1,w1,x2,w2):
 12 |     l1 = x1 - w1 / 2.;
 13 |     l2 = x2 - w2 / 2.;
 14 |     left = max(l1, l2)
 15 |     r1 = x1 + w1 / 2.;
 16 |     r2 = x2 + w2 / 2.;
 17 |     right = min(r1, r2)
 18 |     return right - left;
 19 | 
 20 | def box_intersection(a, b):
 21 |     w = overlap(a.x, a.w, b.x, b.w);
 22 |     h = overlap(a.y, a.h, b.y, b.h);
 23 |     if w < 0 or h < 0: return 0;
 24 |     area = w * h;
 25 |     return area;
 26 | 
 27 | def box_union(a, b):
 28 |     i = box_intersection(a, b);
 29 |     u = a.w * a.h + b.w * b.h - i;
 30 |     return u;
 31 | 
 32 | def box_iou(a, b):
 33 |     return box_intersection(a, b) / box_union(a, b);
 34 | 
 35 | def iou(box1,box2):
 36 |     tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
 37 |     lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
 38 |     if tb < 0 or lr < 0 : intersection = 0
 39 |     else : intersection =  tb*lr
 40 |     return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)
 41 | 
 42 | def explit_c_mine(x):
 43 |     y = 1.0/(1.0 + np.exp(-x))
 44 |     return y
 45 | 
 46 | def box_constructor(meta, net_out_in):
 47 |     threshold = meta['thresh']
 48 |     classes = meta['labels']
 49 |     anchors = np.asarray(meta['anchors'])
 50 |     H, W, _ = meta['out_size']
 51 |     
 52 |     C = int(meta['classes'])
 53 |     B = int(meta['num'])
 54 |     net_out = net_out_in.reshape([H, W, B, int(net_out_in.shape[2]/B)])
 55 |     Classes = net_out[:,:,:,5:]
 56 |     Bbox_pred = net_out[:,:,:,:5]
 57 |     probs = np.zeros((H,W,B,C), dtype=np.float32)
 58 |     probs_filtered = np.zeros((H,W,B,C), dtype=np.float32)
 59 |     Bbox_pred[:,:,:,4] = explit_c_mine(Bbox_pred[:,:,:,4])
 60 |     offset = np.transpose(np.reshape(np.array([np.arange(19)]*95), (5,19,19)),(1,2,0))
 61 |     Bbox_pred[:,:,:,0] = (offset + explit_c_mine(Bbox_pred[:,:,:,0])) / W  
 62 |     Bbox_pred[:,:,:,1] = (np.transpose(offset, (1,0,2)) + explit_c_mine(Bbox_pred[:,:,:,1])) / H  
 63 |     for box_loop in range(B):
 64 |         Bbox_pred[:,:,box_loop,2] = np.exp(Bbox_pred[:,:,box_loop,2]) * anchors[2*box_loop + 0] /W
 65 |         Bbox_pred[:,:,box_loop,3] = np.exp(Bbox_pred[:,:,box_loop,3]) * anchors[2*box_loop + 1] /H
 66 |     
 67 |     
 68 |     class_probs = np.ascontiguousarray(Classes).reshape([H*W*B, C])
 69 |     max_all = np.max(class_probs, 1)
 70 |     max_all = np.expand_dims(max_all, 0)
 71 |     max_all = np.tile(max_all.T, (1, class_probs.shape[1]))
 72 | 
 73 |     class_probs = np.exp(class_probs - max_all)
 74 |     sum_all = np.sum(class_probs, 1)
 75 | 
 76 | 
 77 |     temp_pred = np.ascontiguousarray(Bbox_pred[:,:,:,4]).reshape([H*W*B, 1])
 78 |     temp_pred = np.tile(temp_pred, (1, class_probs.shape[1]))
 79 |     sum_all = np.expand_dims(sum_all, 0)
 80 |     sum_all = np.tile(sum_all.T, (1, class_probs.shape[1]))
 81 |     probs = class_probs * temp_pred /sum_all 
 82 |     probs = np.ascontiguousarray(probs).reshape([H, W, B, C])
 83 | 
 84 | 
 85 | 
 86 |     #apply score threshold
 87 |     bboxes = Bbox_pred[:,:,:,:4]
 88 |     filter_mat_probs = np.array(probs > threshold, dtype = 'bool')
 89 |     probs_filtered = probs[filter_mat_probs]
 90 |     filter_mat_bboxes = np.nonzero(filter_mat_probs)
 91 |     bboxes_filtered = bboxes[filter_mat_bboxes[0], filter_mat_bboxes[1], filter_mat_bboxes[2]]
 92 |     probs_filtered = probs[filter_mat_probs]
 93 |     classes_num_filtered = np.argmax(probs, axis=3)[filter_mat_bboxes[0], filter_mat_bboxes[1], filter_mat_bboxes[2]]
 94 |     
 95 |     #NMS
 96 |     argsort = np.array(np.argsort(probs_filtered))[::-1]
 97 |     bboxes_filtered = bboxes_filtered[argsort]
 98 |     probs_filtered = probs_filtered[argsort]
 99 |     classes_num_filtered = classes_num_filtered[argsort]
100 |     
101 |     for i in range(len(probs_filtered)):
102 |         if probs_filtered[i] == 0: continue
103 |         for j in range(i+1, len(bboxes_filtered)):
104 |             a = BoundBox(0)
105 |             b = BoundBox(0) 
106 |             a.x = bboxes_filtered[i, 0]
107 |             a.y = bboxes_filtered[i, 1]
108 |             a.w = bboxes_filtered[i, 2]
109 |             a.h = bboxes_filtered[i, 3]
110 |             b.x = bboxes_filtered[j, 0]
111 |             b.y = bboxes_filtered[j, 1]
112 |             b.w = bboxes_filtered[j, 2]
113 |             b.h = bboxes_filtered[j, 3]
114 |             
115 |             if box_iou(a, b) > 0.4:
116 |                 probs_filtered[j] = 0
117 |     filter_iou = np.array(probs_filtered>0.0,dtype='bool')
118 |     bboxes_filtered = bboxes_filtered[filter_iou]
119 |     probs_filtered = probs_filtered[filter_iou]
120 |     classes_num_filtered = classes_num_filtered[filter_iou]
121 |     
122 |     results = []
123 |     numbox = len(bboxes_filtered)
124 |     for i in range(len(bboxes_filtered)):
125 |         result = dict()
126 |         result['score'] = probs_filtered[i] 
127 |         result['x1'] = bboxes_filtered[i][0] - bboxes_filtered[i][2]/2.0 
128 |         result['y1'] = bboxes_filtered[i][1] - bboxes_filtered[i][3]/2.0  
129 |         result['x2'] = bboxes_filtered[i][0] + bboxes_filtered[i][2]/2.0 
130 |         result['y2'] = bboxes_filtered[i][1] + bboxes_filtered[i][3]/2.0
131 |         result['x1'] = max(0.0, result['x1']) 
132 |         result['y1'] = max(0.0, result['y1'])  
133 |         result['x2'] = min(1.0, result['x2']) 
134 |         result['y2'] = min(1.0, result['y2']) 
135 |         result['label'] = classes_num_filtered[i]
136 |         result['category'] = classes[classes_num_filtered[i]]
137 |         results.append(result)
138 |     #print(results)
139 | 
140 | 
141 |     return results
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/utils/im_transform.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | def imcv2_recolor(im, a = .1):
 5 | 	t = [np.random.uniform()]
 6 | 	t += [np.random.uniform()]
 7 | 	t += [np.random.uniform()]
 8 | 	t = np.array(t) * 2. - 1.
 9 | 
10 | 	# random amplify each channel
11 | 	im = im * (1 + t * a)
12 | 	mx = 255. * (1 + a)
13 | 	up = np.random.uniform() * 2 - 1
14 | 	im = np.power(im/mx, 1. + up * .5)
15 | 	return np.array(im * 255., np.uint8)
16 | 
17 | def imcv2_affine_trans(im):
18 | 	# Scale and translate
19 | 	h, w, c = im.shape
20 | 	scale = np.random.uniform() / 10. + 1.
21 | 	max_offx = (scale-1.) * w
22 | 	max_offy = (scale-1.) * h
23 | 	offx = int(np.random.uniform() * max_offx)
24 | 	offy = int(np.random.uniform() * max_offy)
25 | 	
26 | 	im = cv2.resize(im, (0,0), fx = scale, fy = scale)
27 | 	im = im[offy : (offy + h), offx : (offx + w)]
28 | 	flip = np.random.binomial(1, .5)
29 | 	if flip: im = cv2.flip(im, 1)
30 | 	return im, [w, h, c], [scale, [offx, offy], flip]


--------------------------------------------------------------------------------
/utils/pascal_voc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import xml.etree.ElementTree as ET
  3 | import numpy as np
  4 | import cv2
  5 | import cPickle
  6 | import copy
  7 | import yolo.config as cfg
  8 | 
  9 | 
 10 | class pascal_voc(object):
 11 |     def __init__(self, phase, rebuild=False):
 12 |         self.devkil_path = os.path.join(cfg.PASCAL_PATH, 'VOCdevkit')
 13 |         self.data_path = os.path.join(self.devkil_path, 'VOC2007')
 14 |         self.cache_path = cfg.CACHE_PATH
 15 |         self.batch_size = cfg.BATCH_SIZE
 16 |         self.image_size = cfg.IMAGE_SIZE
 17 |         self.cell_size = cfg.CELL_SIZE
 18 |         self.classes = cfg.CLASSES
 19 |         self.class_to_ind = dict(zip(self.classes, xrange(len(self.classes))))
 20 |         self.flipped = cfg.FLIPPED
 21 |         self.phase = phase
 22 |         self.rebuild = rebuild
 23 |         self.cursor = 0
 24 |         self.epoch = 1
 25 |         self.gt_labels = None
 26 |         self.prepare()
 27 | 
 28 |     def get(self):
 29 |         images = np.zeros((self.batch_size, self.image_size, self.image_size, 3))
 30 |         labels = np.zeros((self.batch_size, self.cell_size, self.cell_size, 25))
 31 |         count = 0
 32 |         while count < self.batch_size:
 33 |             imname = self.gt_labels[self.cursor]['imname']
 34 |             flipped = self.gt_labels[self.cursor]['flipped']
 35 |             images[count, :, :, :] = self.image_read(imname, flipped)
 36 |             labels[count, :, :, :] = self.gt_labels[self.cursor]['label']
 37 |             count += 1
 38 |             self.cursor += 1
 39 |             if self.cursor >= len(self.gt_labels):
 40 |                 np.random.shuffle(self.gt_labels)
 41 |                 self.cursor = 0
 42 |                 self.epoch += 1
 43 |         return images, labels
 44 | 
 45 |     def image_read(self, imname, flipped=False):
 46 |         image = cv2.imread(imname)
 47 |         image = cv2.resize(image, (self.image_size, self.image_size))
 48 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
 49 |         image = (image / 255.0) * 2.0 - 1.0
 50 |         if flipped:
 51 |             image = image[:, ::-1, :]
 52 |         return image
 53 | 
 54 |     def prepare(self):
 55 |         gt_labels = self.load_labels()
 56 |         if self.flipped:
 57 |             print('Appending horizontally-flipped training examples ...')
 58 |             gt_labels_cp = copy.deepcopy(gt_labels)
 59 |             for idx in range(len(gt_labels_cp)):
 60 |                 gt_labels_cp[idx]['flipped'] = True
 61 |                 gt_labels_cp[idx]['label'] = gt_labels_cp[idx]['label'][:, ::-1, :]
 62 |                 for i in xrange(self.cell_size):
 63 |                     for j in xrange(self.cell_size):
 64 |                         if gt_labels_cp[idx]['label'][i, j, 0] == 1:
 65 |                             gt_labels_cp[idx]['label'][i, j, 1] = self.image_size - 1 - gt_labels_cp[idx]['label'][i, j, 1]
 66 |             gt_labels += gt_labels_cp
 67 |         np.random.shuffle(gt_labels)
 68 |         self.gt_labels = gt_labels
 69 |         return gt_labels
 70 | 
 71 |     def load_labels(self):
 72 |         cache_file = os.path.join(self.cache_path, 'pascal_' + self.phase + '_gt_labels.pkl')
 73 | 
 74 |         if os.path.isfile(cache_file) and not self.rebuild:
 75 |             print('Loading gt_labels from: ' + cache_file)
 76 |             with open(cache_file, 'rb') as f:
 77 |                 gt_labels = cPickle.load(f)
 78 |             return gt_labels
 79 | 
 80 |         print('Processing gt_labels from: ' + self.data_path)
 81 | 
 82 |         if not os.path.exists(self.cache_path):
 83 |             os.makedirs(self.cache_path)
 84 | 
 85 |         if self.phase == 'train':
 86 |             txtname = os.path.join(self.data_path, 'ImageSets', 'Main',
 87 |                                    'trainval.txt')
 88 |         else:
 89 |             txtname = os.path.join(self.data_path, 'ImageSets', 'Main',
 90 |                                    'test.txt')
 91 |         with open(txtname, 'r') as f:
 92 |             self.image_index = [x.strip() for x in f.readlines()]
 93 | 
 94 |         gt_labels = []
 95 |         for index in self.image_index:
 96 |             label, num = self.load_pascal_annotation(index)
 97 |             if num == 0:
 98 |                 continue
 99 |             imname = os.path.join(self.data_path, 'JPEGImages', index + '.jpg')
100 |             gt_labels.append({'imname': imname, 'label': label, 'flipped': False})
101 |         print('Saving gt_labels to: ' + cache_file)
102 |         with open(cache_file, 'wb') as f:
103 |             cPickle.dump(gt_labels, f)
104 |         return gt_labels
105 | 
106 |     def load_pascal_annotation(self, index):
107 |         """
108 |         Load image and bounding boxes info from XML file in the PASCAL VOC
109 |         format.
110 |         """
111 | 
112 |         imname = os.path.join(self.data_path, 'JPEGImages', index + '.jpg')
113 |         im = cv2.imread(imname)
114 |         h_ratio = 1.0 * self.image_size / im.shape[0]
115 |         w_ratio = 1.0 * self.image_size / im.shape[1]
116 |         # im = cv2.resize(im, [self.image_size, self.image_size])
117 | 
118 |         label = np.zeros((self.cell_size, self.cell_size, 25))
119 |         filename = os.path.join(self.data_path, 'Annotations', index + '.xml')
120 |         tree = ET.parse(filename)
121 |         objs = tree.findall('object')
122 | 
123 |         for obj in objs:
124 |             bbox = obj.find('bndbox')
125 |             # Make pixel indexes 0-based
126 |             x1 = max(min((float(bbox.find('xmin').text) - 1) * w_ratio, self.image_size - 1), 0)
127 |             y1 = max(min((float(bbox.find('ymin').text) - 1) * h_ratio, self.image_size - 1), 0)
128 |             x2 = max(min((float(bbox.find('xmax').text) - 1) * w_ratio, self.image_size - 1), 0)
129 |             y2 = max(min((float(bbox.find('ymax').text) - 1) * h_ratio, self.image_size - 1), 0)
130 |             cls_ind = self.class_to_ind[obj.find('name').text.lower().strip()]
131 |             boxes = [(x2 + x1) / 2.0, (y2 + y1) / 2.0, x2 - x1, y2 - y1]
132 |             x_ind = int(boxes[0] * self.cell_size / self.image_size)
133 |             y_ind = int(boxes[1] * self.cell_size / self.image_size)
134 |             if label[y_ind, x_ind, 0] == 1:
135 |                 continue
136 |             label[y_ind, x_ind, 0] = 1
137 |             label[y_ind, x_ind, 1:5] = boxes
138 |             label[y_ind, x_ind, 5 + cls_ind] = 1
139 | 
140 |         return label, len(objs)
141 | 


--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time, datetime
 2 | 
 3 | class Timer(object):
 4 |     '''
 5 |     A simple timer.
 6 |     '''
 7 |     def __init__(self):
 8 |         self.init_time = time.time()
 9 |         self.total_time = 0.
10 |         self.calls = 0
11 |         self.start_time = 0.
12 |         self.diff = 0.
13 |         self.average_time = 0.
14 |         self.remain_time = 0.
15 | 
16 |     def tic(self):
17 |         # using time.time instead of time.clock because time time.clock
18 |         # does not normalize for multithreading
19 |         self.start_time = time.time()
20 | 
21 |     def toc(self, average=True):
22 |         self.diff = time.time() - self.start_time
23 |         self.total_time += self.diff
24 |         self.calls += 1
25 |         self.average_time = self.total_time / self.calls
26 |         if average:
27 |             return self.average_time
28 |         else:
29 |             return self.diff
30 | 
31 |     def remain(self, iters, max_iters):
32 |         if iters == 0:
33 |             self.remain_time = 0
34 |         else:
35 |             self.remain_time = (time.time() - self.init_time) * \
36 |                                 (max_iters - iters) / iters
37 |         return str(datetime.timedelta(seconds=int(self.remain_time)))
38 | 


--------------------------------------------------------------------------------
/utils/tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | 
 5 | PATTERN = ('.jpg', '.jpeg')
 6 | def find_files(directory, pattern=PATTERN):
 7 |   files = []
 8 |   for path, d, filelist in os.walk(directory):
 9 |       for filename in filelist:
10 |           if filename.lower().endswith(pattern):
11 |               files.append(os.path.join(path, filename))
12 |   return files
13 | 
14 | def map2classnames(labelmap_file):
15 |     classes = []
16 |     f = open(labelmap_file, 'r')
17 |     pat = 'display_name'
18 |     for line in f.readlines():
19 |         if re.search(pat, line):
20 |             line_strs = line.split('"')
21 |             class_name = line_strs[-2]
22 |             classes.append(class_name)
23 |     f.close()
24 |     return classes
25 | 


--------------------------------------------------------------------------------
/yolo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shishichang/yolov2-tensorflow/a71a57465422c4806e1dcbcf8b40a36463a10272/yolo/__init__.py


--------------------------------------------------------------------------------
/yolo/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | #
 4 | # path and dataset parameter
 5 | #
 6 | 
 7 | DATA_PATH = 'data'
 8 | 
 9 | PASCAL_PATH = os.path.join(DATA_PATH, 'pascal_voc')
10 | 
11 | CACHE_PATH = os.path.join(PASCAL_PATH, 'cache')
12 | 
13 | OUTPUT_DIR = os.path.join(PASCAL_PATH, 'output')
14 | 
15 | WEIGHTS_DIR = os.path.join(PASCAL_PATH, 'weights')
16 | 
17 | # WEIGHTS_FILE = None
18 | WEIGHTS_FILE = os.path.join(DATA_PATH, 'weights', 'YOLO_small.ckpt')
19 | 
20 | CLASSES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
21 |            'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
22 |            'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
23 |            'train', 'tvmonitor']
24 | 
25 | FLIPPED = True
26 | 
27 | 
28 | #
29 | # model parameter
30 | #
31 | 
32 | IMAGE_SIZE = 448
33 | 
34 | CELL_SIZE = 7
35 | 
36 | BOXES_PER_CELL = 2
37 | 
38 | ALPHA = 0.1
39 | 
40 | DISP_CONSOLE = False
41 | 
42 | OBJECT_SCALE = 1.0
43 | NOOBJECT_SCALE = 1.0
44 | CLASS_SCALE = 2.0
45 | COORD_SCALE = 5.0
46 | 
47 | 
48 | #
49 | # solver parameter
50 | #
51 | 
52 | GPU = '2'
53 | 
54 | LEARNING_RATE = 0.0001
55 | 
56 | DECAY_STEPS = 30000
57 | 
58 | DECAY_RATE = 0.1
59 | 
60 | STAIRCASE = True
61 | 
62 | BATCH_SIZE = 45
63 | 
64 | MAX_ITER = 15000
65 | 
66 | SUMMARY_ITER = 10
67 | 
68 | SAVE_ITER = 1000
69 | 
70 | 
71 | #
72 | # test parameter
73 | #
74 | 
75 | 
76 | THRESHOLD = 0.3
77 | 
78 | IOU_THRESHOLD = 0.5
79 | 


--------------------------------------------------------------------------------
/yolo/yolo_net.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import yolo.config as cfg
  4 | 
  5 | slim = tf.contrib.slim
  6 | 
  7 | 
  8 | class YOLONet(object):
  9 | 
 10 |     def __init__(self, is_training=True):
 11 |         self.classes = cfg.CLASSES
 12 |         self.num_class = len(self.classes)
 13 |         self.image_size = cfg.IMAGE_SIZE
 14 |         self.cell_size = cfg.CELL_SIZE
 15 |         self.boxes_per_cell = cfg.BOXES_PER_CELL
 16 |         self.output_size = (self.cell_size * self.cell_size) * (self.num_class + self.boxes_per_cell * 5)
 17 |         self.scale = 1.0 * self.image_size / self.cell_size
 18 |         self.boundary1 = self.cell_size * self.cell_size * self.num_class
 19 |         self.boundary2 = self.boundary1 + self.cell_size * self.cell_size * self.boxes_per_cell
 20 | 
 21 |         self.object_scale = cfg.OBJECT_SCALE
 22 |         self.noobject_scale = cfg.NOOBJECT_SCALE
 23 |         self.class_scale = cfg.CLASS_SCALE
 24 |         self.coord_scale = cfg.COORD_SCALE
 25 | 
 26 |         self.learning_rate = cfg.LEARNING_RATE
 27 |         self.batch_size = cfg.BATCH_SIZE
 28 |         self.alpha = cfg.ALPHA
 29 | 
 30 |         self.offset = np.transpose(np.reshape(np.array(
 31 |             [np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
 32 |             (self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))
 33 | 
 34 |         self.images = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3], name='images')
 35 |         self.logits = self.build_network(self.images, num_outputs=self.output_size, alpha=self.alpha, is_training=is_training)
 36 | 
 37 |         if is_training:
 38 |             self.labels = tf.placeholder(tf.float32, [None, self.cell_size, self.cell_size, 5 + self.num_class])
 39 |             self.loss_layer(self.logits, self.labels)
 40 |             self.total_loss = tf.losses.get_total_loss()
 41 |             tf.summary.scalar('total_loss', self.total_loss)
 42 | 
 43 |     def build_network(self,
 44 |                       images,
 45 |                       num_outputs,
 46 |                       alpha,
 47 |                       keep_prob=0.5,
 48 |                       is_training=True,
 49 |                       scope='yolo'):
 50 |         with tf.variable_scope(scope):
 51 |             with slim.arg_scope([slim.conv2d, slim.fully_connected],
 52 |                                 activation_fn=leaky_relu(alpha),
 53 |                                 weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
 54 |                                 weights_regularizer=slim.l2_regularizer(0.0005)):
 55 |                 net = tf.pad(images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]), name='pad_1')
 56 |                 net = slim.conv2d(net, 64, 7, 2, padding='VALID', scope='conv_2')
 57 |                 net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
 58 |                 net = slim.conv2d(net, 192, 3, scope='conv_4')
 59 |                 net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')
 60 |                 net = slim.conv2d(net, 128, 1, scope='conv_6')
 61 |                 net = slim.conv2d(net, 256, 3, scope='conv_7')
 62 |                 net = slim.conv2d(net, 256, 1, scope='conv_8')
 63 |                 net = slim.conv2d(net, 512, 3, scope='conv_9')
 64 |                 net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')
 65 |                 net = slim.conv2d(net, 256, 1, scope='conv_11')
 66 |                 net = slim.conv2d(net, 512, 3, scope='conv_12')
 67 |                 net = slim.conv2d(net, 256, 1, scope='conv_13')
 68 |                 net = slim.conv2d(net, 512, 3, scope='conv_14')
 69 |                 net = slim.conv2d(net, 256, 1, scope='conv_15')
 70 |                 net = slim.conv2d(net, 512, 3, scope='conv_16')
 71 |                 net = slim.conv2d(net, 256, 1, scope='conv_17')
 72 |                 net = slim.conv2d(net, 512, 3, scope='conv_18')
 73 |                 net = slim.conv2d(net, 512, 1, scope='conv_19')
 74 |                 net = slim.conv2d(net, 1024, 3, scope='conv_20')
 75 |                 net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
 76 |                 net = slim.conv2d(net, 512, 1, scope='conv_22')
 77 |                 net = slim.conv2d(net, 1024, 3, scope='conv_23')
 78 |                 net = slim.conv2d(net, 512, 1, scope='conv_24')
 79 |                 net = slim.conv2d(net, 1024, 3, scope='conv_25')
 80 |                 net = slim.conv2d(net, 1024, 3, scope='conv_26')
 81 |                 net = tf.pad(net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]), name='pad_27')
 82 |                 net = slim.conv2d(net, 1024, 3, 2, padding='VALID', scope='conv_28')
 83 |                 net = slim.conv2d(net, 1024, 3, scope='conv_29')
 84 |                 net = slim.conv2d(net, 1024, 3, scope='conv_30')
 85 |                 net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
 86 |                 net = slim.flatten(net, scope='flat_32')
 87 |                 net = slim.fully_connected(net, 512, scope='fc_33')
 88 |                 net = slim.fully_connected(net, 4096, scope='fc_34')
 89 |                 net = slim.dropout(net, keep_prob=keep_prob,
 90 |                                    is_training=is_training, scope='dropout_35')
 91 |                 net = slim.fully_connected(net, num_outputs,
 92 |                                            activation_fn=None, scope='fc_36')
 93 |         return net
 94 | 
 95 |     def calc_iou(self, boxes1, boxes2, scope='iou'):
 96 |         """calculate ious
 97 |         Args:
 98 |           boxes1: 4-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4]  ====> (x_center, y_center, w, h)
 99 |           boxes2: 1-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4] ===> (x_center, y_center, w, h)
100 |         Return:
101 |           iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
102 |         """
103 |         with tf.variable_scope(scope):
104 |             boxes1 = tf.stack([boxes1[:, :, :, :, 0] - boxes1[:, :, :, :, 2] / 2.0,
105 |                                boxes1[:, :, :, :, 1] - boxes1[:, :, :, :, 3] / 2.0,
106 |                                boxes1[:, :, :, :, 0] + boxes1[:, :, :, :, 2] / 2.0,
107 |                                boxes1[:, :, :, :, 1] + boxes1[:, :, :, :, 3] / 2.0])
108 |             boxes1 = tf.transpose(boxes1, [1, 2, 3, 4, 0])
109 | 
110 |             boxes2 = tf.stack([boxes2[:, :, :, :, 0] - boxes2[:, :, :, :, 2] / 2.0,
111 |                                boxes2[:, :, :, :, 1] - boxes2[:, :, :, :, 3] / 2.0,
112 |                                boxes2[:, :, :, :, 0] + boxes2[:, :, :, :, 2] / 2.0,
113 |                                boxes2[:, :, :, :, 1] + boxes2[:, :, :, :, 3] / 2.0])
114 |             boxes2 = tf.transpose(boxes2, [1, 2, 3, 4, 0])
115 | 
116 |             # calculate the left up point & right down point
117 |             lu = tf.maximum(boxes1[:, :, :, :, :2], boxes2[:, :, :, :, :2])
118 |             rd = tf.minimum(boxes1[:, :, :, :, 2:], boxes2[:, :, :, :, 2:])
119 | 
120 |             # intersection
121 |             intersection = tf.maximum(0.0, rd - lu)
122 |             inter_square = intersection[:, :, :, :, 0] * intersection[:, :, :, :, 1]
123 | 
124 |             # calculate the boxs1 square and boxs2 square
125 |             square1 = (boxes1[:, :, :, :, 2] - boxes1[:, :, :, :, 0]) * \
126 |                 (boxes1[:, :, :, :, 3] - boxes1[:, :, :, :, 1])
127 |             square2 = (boxes2[:, :, :, :, 2] - boxes2[:, :, :, :, 0]) * \
128 |                 (boxes2[:, :, :, :, 3] - boxes2[:, :, :, :, 1])
129 | 
130 |             union_square = tf.maximum(square1 + square2 - inter_square, 1e-10)
131 | 
132 |         return tf.clip_by_value(inter_square / union_square, 0.0, 1.0)
133 | 
134 |     def loss_layer(self, predicts, labels, scope='loss_layer'):
135 |         with tf.variable_scope(scope):
136 |             predict_classes = tf.reshape(predicts[:, :self.boundary1], [self.batch_size, self.cell_size, self.cell_size, self.num_class])
137 |             predict_scales = tf.reshape(predicts[:, self.boundary1:self.boundary2], [self.batch_size, self.cell_size, self.cell_size, self.boxes_per_cell])
138 |             predict_boxes = tf.reshape(predicts[:, self.boundary2:], [self.batch_size, self.cell_size, self.cell_size, self.boxes_per_cell, 4])
139 | 
140 |             response = tf.reshape(labels[:, :, :, 0], [self.batch_size, self.cell_size, self.cell_size, 1])
141 |             boxes = tf.reshape(labels[:, :, :, 1:5], [self.batch_size, self.cell_size, self.cell_size, 1, 4])
142 |             boxes = tf.tile(boxes, [1, 1, 1, self.boxes_per_cell, 1]) / self.image_size
143 |             classes = labels[:, :, :, 5:]
144 | 
145 |             offset = tf.constant(self.offset, dtype=tf.float32)
146 |             offset = tf.reshape(offset, [1, self.cell_size, self.cell_size, self.boxes_per_cell])
147 |             offset = tf.tile(offset, [self.batch_size, 1, 1, 1])
148 |             predict_boxes_tran = tf.stack([(predict_boxes[:, :, :, :, 0] + offset) / self.cell_size,
149 |                                            (predict_boxes[:, :, :, :, 1] + tf.transpose(offset, (0, 2, 1, 3))) / self.cell_size,
150 |                                            tf.square(predict_boxes[:, :, :, :, 2]),
151 |                                            tf.square(predict_boxes[:, :, :, :, 3])])
152 |             predict_boxes_tran = tf.transpose(predict_boxes_tran, [1, 2, 3, 4, 0])
153 | 
154 |             iou_predict_truth = self.calc_iou(predict_boxes_tran, boxes)
155 | 
156 |             # calculate I tensor [BATCH_SIZE, CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
157 |             object_mask = tf.reduce_max(iou_predict_truth, 3, keep_dims=True)
158 |             object_mask = tf.cast((iou_predict_truth >= object_mask), tf.float32) * response
159 | 
160 |             # calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
161 |             noobject_mask = tf.ones_like(object_mask, dtype=tf.float32) - object_mask
162 | 
163 |             boxes_tran = tf.stack([boxes[:, :, :, :, 0] * self.cell_size - offset,
164 |                                    boxes[:, :, :, :, 1] * self.cell_size - tf.transpose(offset, (0, 2, 1, 3)),
165 |                                    tf.sqrt(boxes[:, :, :, :, 2]),
166 |                                    tf.sqrt(boxes[:, :, :, :, 3])])
167 |             boxes_tran = tf.transpose(boxes_tran, [1, 2, 3, 4, 0])
168 | 
169 |             # class_loss
170 |             class_delta = response * (predict_classes - classes)
171 |             class_loss = tf.reduce_mean(tf.reduce_sum(tf.square(class_delta), axis=[1, 2, 3]), name='class_loss') * self.class_scale
172 | 
173 |             # object_loss
174 |             object_delta = object_mask * (predict_scales - iou_predict_truth)
175 |             object_loss = tf.reduce_mean(tf.reduce_sum(tf.square(object_delta), axis=[1, 2, 3]), name='object_loss') * self.object_scale
176 | 
177 |             # noobject_loss
178 |             noobject_delta = noobject_mask * predict_scales
179 |             noobject_loss = tf.reduce_mean(tf.reduce_sum(tf.square(noobject_delta), axis=[1, 2, 3]), name='noobject_loss') * self.noobject_scale
180 | 
181 |             # coord_loss
182 |             coord_mask = tf.expand_dims(object_mask, 4)
183 |             boxes_delta = coord_mask * (predict_boxes - boxes_tran)
184 |             coord_loss = tf.reduce_mean(tf.reduce_sum(tf.square(boxes_delta), axis=[1, 2, 3, 4]), name='coord_loss') * self.coord_scale
185 | 
186 |             tf.losses.add_loss(class_loss)
187 |             tf.losses.add_loss(object_loss)
188 |             tf.losses.add_loss(noobject_loss)
189 |             tf.losses.add_loss(coord_loss)
190 | 
191 |             tf.summary.scalar('class_loss', class_loss)
192 |             tf.summary.scalar('object_loss', object_loss)
193 |             tf.summary.scalar('noobject_loss', noobject_loss)
194 |             tf.summary.scalar('coord_loss', coord_loss)
195 | 
196 |             tf.summary.histogram('boxes_delta_x', boxes_delta[:, :, :, :, 0])
197 |             tf.summary.histogram('boxes_delta_y', boxes_delta[:, :, :, :, 1])
198 |             tf.summary.histogram('boxes_delta_w', boxes_delta[:, :, :, :, 2])
199 |             tf.summary.histogram('boxes_delta_h', boxes_delta[:, :, :, :, 3])
200 |             tf.summary.histogram('iou', iou_predict_truth)
201 | 
202 | 
203 | def leaky_relu(alpha):
204 |     def op(inputs):
205 |         return tf.maximum(alpha * inputs, inputs, name='leaky_relu')
206 |     return op
207 | 


--------------------------------------------------------------------------------