├── README.md
├── core
    ├── __pycache__
    │   ├── backbone.cpython-35.pyc
    │   ├── common.cpython-35.pyc
    │   ├── config.cpython-35.pyc
    │   ├── dataset.cpython-35.pyc
    │   ├── utils.cpython-35.pyc
    │   └── yolov3.cpython-35.pyc
    ├── backbone.py
    ├── common.py
    ├── config.py
    ├── dataset.py
    ├── utils.py
    └── yolov3.py
├── data
    ├── anchors
    │   └── basline_anchors.txt
    ├── classes
    │   ├── coco.names
    │   └── yymnist.names
    └── dataset
    │   ├── coco_train_2014.txt
    │   ├── coco_train_2017.txt
    │   └── yymnist_test.txt
├── docs
    ├── 01554.jpg
    ├── 01567.jpg
    ├── kite.jpg
    ├── kite_result.jpg
    ├── loss.png
    ├── requirements.txt
    └── road.mp4
├── image_demo.py
├── scripts
    ├── coco_val_2014.txt
    ├── coco_val_2017.txt
    └── convert_coco_to_trainlist.py
├── test.py
├── train.py
└── video_demo.py


/README.md:
--------------------------------------------------------------------------------
 1 | # TensorFlow 2.0 YoloV3
 2 | 
 3 | YoloV3 training on coco with tensorflow 2.0. this is a minimal implementation on yolov3 algorithm. Here are what have been done:
 4 | 
 5 | - [x] A **det-txt** annotation format for training;
 6 | - [x] Training on coco, and trainlist file generate script;
 7 | - [ ] Exporting model to tflite;
 8 | - [ ] Exporting model with fp16 and int8 inference;
 9 | 
10 | 
11 | 
12 | ## Install
13 | 
14 | You should have tensorflow-2.0-rc installed.
15 | 
16 | 
17 | 
18 | ## Copyright
19 | 
20 | All beloved opensource authors.


--------------------------------------------------------------------------------
/core/__pycache__/backbone.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/backbone.cpython-35.pyc


--------------------------------------------------------------------------------
/core/__pycache__/common.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/common.cpython-35.pyc


--------------------------------------------------------------------------------
/core/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/config.cpython-35.pyc


--------------------------------------------------------------------------------
/core/__pycache__/dataset.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/dataset.cpython-35.pyc


--------------------------------------------------------------------------------
/core/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/utils.cpython-35.pyc


--------------------------------------------------------------------------------
/core/__pycache__/yolov3.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/yolov3.cpython-35.pyc


--------------------------------------------------------------------------------
/core/backbone.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # coding=utf-8
 3 | # ================================================================
 4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
 5 | #
 6 | #   Editor      : VIM
 7 | #   File name   : backbone.py
 8 | #   Author      : YunYang1994
 9 | #   Created date: 2019-07-11 23:37:51
10 | #   Description :
11 | #
12 | # ================================================================
13 | 
14 | import tensorflow as tf
15 | import core.common as common
16 | 
17 | 
18 | def darknet53(input_data):
19 | 
20 |     input_data = common.convolutional(input_data, (3, 3,  3,  32))
21 |     input_data = common.convolutional(
22 |         input_data, (3, 3, 32,  64), downsample=True)
23 | 
24 |     for i in range(1):
25 |         input_data = common.residual_block(input_data,  64,  32, 64)
26 | 
27 |     input_data = common.convolutional(
28 |         input_data, (3, 3,  64, 128), downsample=True)
29 | 
30 |     for i in range(2):
31 |         input_data = common.residual_block(input_data, 128,  64, 128)
32 | 
33 |     input_data = common.convolutional(
34 |         input_data, (3, 3, 128, 256), downsample=True)
35 | 
36 |     for i in range(8):
37 |         input_data = common.residual_block(input_data, 256, 128, 256)
38 | 
39 |     route_1 = input_data
40 |     input_data = common.convolutional(
41 |         input_data, (3, 3, 256, 512), downsample=True)
42 | 
43 |     for i in range(8):
44 |         input_data = common.residual_block(input_data, 512, 256, 512)
45 | 
46 |     route_2 = input_data
47 |     input_data = common.convolutional(
48 |         input_data, (3, 3, 512, 1024), downsample=True)
49 | 
50 |     for i in range(4):
51 |         input_data = common.residual_block(input_data, 1024, 512, 1024)
52 | 
53 |     return route_1, route_2, input_data
54 | 


--------------------------------------------------------------------------------
/core/common.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # coding=utf-8
 3 | # ================================================================
 4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
 5 | #
 6 | #   Editor      : VIM
 7 | #   File name   : common.py
 8 | #   Author      : YunYang1994
 9 | #   Created date: 2019-07-11 23:12:53
10 | #   Description :
11 | #
12 | # ================================================================
13 | 
14 | import tensorflow as tf
15 | 
16 | 
17 | class BatchNormalization(tf.keras.layers.BatchNormalization):
18 |     """
19 |     "Frozen state" and "inference mode" are two separate concepts.
20 |     `layer.trainable = False` is to freeze the layer, so the layer will use
21 |     stored moving `var` and `mean` in the "inference mode", and both `gama`
22 |     and `beta` will not be updated !
23 |     """
24 | 
25 |     def call(self, x, training=False):
26 |         if not training:
27 |             training = tf.constant(False)
28 |         training = tf.logical_and(training, self.trainable)
29 |         return super().call(x, training)
30 | 
31 | 
32 | def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True):
33 |     if downsample:
34 |         input_layer = tf.keras.layers.ZeroPadding2D(
35 |             ((1, 0), (1, 0)))(input_layer)
36 |         padding = 'valid'
37 |         strides = 2
38 |     else:
39 |         strides = 1
40 |         padding = 'same'
41 | 
42 |     conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size=filters_shape[0], strides=strides, padding=padding,
43 |                                   use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005),
44 |                                   kernel_initializer=tf.random_normal_initializer(
45 |                                       stddev=0.01),
46 |                                   bias_initializer=tf.constant_initializer(0.))(input_layer)
47 | 
48 |     if bn:
49 |         conv = BatchNormalization()(conv)
50 |     if activate == True:
51 |         conv = tf.nn.leaky_relu(conv, alpha=0.1)
52 | 
53 |     return conv
54 | 
55 | 
56 | def residual_block(input_layer, input_channel, filter_num1, filter_num2):
57 |     short_cut = input_layer
58 |     conv = convolutional(input_layer, filters_shape=(
59 |         1, 1, input_channel, filter_num1))
60 |     conv = convolutional(conv, filters_shape=(
61 |         3, 3, filter_num1,   filter_num2))
62 | 
63 |     residual_output = short_cut + conv
64 |     return residual_output
65 | 
66 | 
67 | def upsample(input_layer):
68 |     return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')
69 | 


--------------------------------------------------------------------------------
/core/config.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # coding=utf-8
 3 | # ================================================================
 4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
 5 | #
 6 | #   Editor      : VIM
 7 | #   File name   : config.py
 8 | #   Author      : YunYang1994
 9 | #   Created date: 2019-02-28 13:06:54
10 | #   Description :
11 | #
12 | # ================================================================
13 | 
14 | from easydict import EasyDict as edict
15 | 
16 | 
17 | __C = edict()
18 | # Consumers can get config by: from config import cfg
19 | 
20 | cfg = __C
21 | 
22 | # YOLO options
23 | __C.YOLO = edict()
24 | 
25 | # Set the class name
26 | __C.YOLO.CLASSES = "./data/classes/coco.names"
27 | __C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt"
28 | __C.YOLO.STRIDES = [8, 16, 32]
29 | __C.YOLO.ANCHOR_PER_SCALE = 3
30 | __C.YOLO.IOU_LOSS_THRESH = 0.5
31 | 
32 | # Train options
33 | __C.TRAIN = edict()
34 | 
35 | # train list should combined with image root to use
36 | __C.TRAIN.ANNOT_PATH = "./data/dataset/coco_train_2017.txt"
37 | __C.IMG_ROOT = '/autox-sz/departments/perception/public_data/coco'
38 | 
39 | __C.TRAIN.BATCH_SIZE = 4
40 | # __C.TRAIN.INPUT_SIZE            = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
41 | __C.TRAIN.INPUT_SIZE = [416]
42 | __C.TRAIN.DATA_AUG = True
43 | __C.TRAIN.LR_INIT = 1e-3
44 | __C.TRAIN.LR_END = 1e-6
45 | __C.TRAIN.WARMUP_EPOCHS = 2
46 | __C.TRAIN.EPOCHS = 30
47 | 
48 | 
49 | # TEST options
50 | __C.TEST = edict()
51 | 
52 | __C.TEST.ANNOT_PATH = "data/dataset/coco_train_2014.txt"
53 | __C.TEST.BATCH_SIZE = 2
54 | __C.TEST.INPUT_SIZE = 544
55 | __C.TEST.DATA_AUG = False
56 | __C.TEST.DECTECTED_IMAGE_PATH = "./data/detection/"
57 | __C.TEST.SCORE_THRESHOLD = 0.3
58 | __C.TEST.IOU_THRESHOLD = 0.45
59 | 


--------------------------------------------------------------------------------
/core/dataset.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # coding=utf-8
  3 | # ================================================================
  4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
  5 | #
  6 | #   Editor      : VIM
  7 | #   File name   : dataset.py
  8 | #   Author      : YunYang1994
  9 | #   Created date: 2019-03-15 18:05:03
 10 | #   Description :
 11 | #
 12 | # ================================================================
 13 | 
 14 | import os
 15 | import cv2
 16 | import random
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | import core.utils as utils
 20 | from core.config import cfg
 21 | 
 22 | 
 23 | class Dataset(object):
 24 |     """implement Dataset here"""
 25 | 
 26 |     def __init__(self, dataset_type):
 27 |         self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH
 28 |         self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE
 29 |         self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE
 30 |         self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG
 31 |         self.img_root = cfg.IMG_ROOT
 32 | 
 33 |         self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
 34 |         self.strides = np.array(cfg.YOLO.STRIDES)
 35 |         self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
 36 |         self.num_classes = len(self.classes)
 37 |         self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS))
 38 |         self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
 39 |         self.max_bbox_per_scale = 150
 40 | 
 41 |         self.annotations = self.load_annotations(dataset_type)
 42 |         self.num_samples = len(self.annotations)
 43 |         self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
 44 |         self.batch_count = 0
 45 | 
 46 |     def load_annotations(self, dataset_type):
 47 |         with open(self.annot_path, 'r') as f:
 48 |             txt = f.readlines()
 49 |             annotations = [line.strip()
 50 |                            for line in txt if len(line.strip().split()[1:]) != 0]
 51 |         np.random.shuffle(annotations)
 52 |         return annotations
 53 | 
 54 |     def __iter__(self):
 55 |         return self
 56 | 
 57 |     def __next__(self):
 58 | 
 59 |         with tf.device('/cpu:0'):
 60 |             self.train_input_size = random.choice(self.train_input_sizes)
 61 |             self.train_output_sizes = self.train_input_size // self.strides
 62 | 
 63 |             batch_image = np.zeros(
 64 |                 (self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32)
 65 | 
 66 |             batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0],
 67 |                                           self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
 68 |             batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1],
 69 |                                           self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
 70 |             batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2],
 71 |                                           self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
 72 | 
 73 |             batch_sbboxes = np.zeros(
 74 |                 (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
 75 |             batch_mbboxes = np.zeros(
 76 |                 (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
 77 |             batch_lbboxes = np.zeros(
 78 |                 (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
 79 | 
 80 |             num = 0
 81 |             if self.batch_count < self.num_batchs:
 82 |                 while num < self.batch_size:
 83 |                     index = self.batch_count * self.batch_size + num
 84 |                     if index >= self.num_samples:
 85 |                         index -= self.num_samples
 86 |                     annotation = self.annotations[index]
 87 |                     image, bboxes = self.parse_annotation(annotation)
 88 |                     label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(
 89 |                         bboxes)
 90 | 
 91 |                     batch_image[num, :, :, :] = image
 92 |                     batch_label_sbbox[num, :, :, :, :] = label_sbbox
 93 |                     batch_label_mbbox[num, :, :, :, :] = label_mbbox
 94 |                     batch_label_lbbox[num, :, :, :, :] = label_lbbox
 95 |                     batch_sbboxes[num, :, :] = sbboxes
 96 |                     batch_mbboxes[num, :, :] = mbboxes
 97 |                     batch_lbboxes[num, :, :] = lbboxes
 98 |                     num += 1
 99 |                 self.batch_count += 1
100 |                 batch_smaller_target = batch_label_sbbox, batch_sbboxes
101 |                 batch_medium_target = batch_label_mbbox, batch_mbboxes
102 |                 batch_larger_target = batch_label_lbbox, batch_lbboxes
103 | 
104 |                 return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target)
105 |             else:
106 |                 self.batch_count = 0
107 |                 np.random.shuffle(self.annotations)
108 |                 raise StopIteration
109 | 
110 |     def random_horizontal_flip(self, image, bboxes):
111 | 
112 |         if random.random() < 0.5:
113 |             _, w, _ = image.shape
114 |             image = image[:, ::-1, :]
115 |             bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]
116 | 
117 |         return image, bboxes
118 | 
119 |     def random_crop(self, image, bboxes):
120 | 
121 |         if random.random() < 0.5:
122 |             h, w, _ = image.shape
123 |             max_bbox = np.concatenate(
124 |                 [np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
125 | 
126 |             max_l_trans = max_bbox[0]
127 |             max_u_trans = max_bbox[1]
128 |             max_r_trans = w - max_bbox[2]
129 |             max_d_trans = h - max_bbox[3]
130 | 
131 |             crop_xmin = max(
132 |                 0, int(max_bbox[0] - random.uniform(0, max_l_trans)))
133 |             crop_ymin = max(
134 |                 0, int(max_bbox[1] - random.uniform(0, max_u_trans)))
135 |             crop_xmax = max(
136 |                 w, int(max_bbox[2] + random.uniform(0, max_r_trans)))
137 |             crop_ymax = max(
138 |                 h, int(max_bbox[3] + random.uniform(0, max_d_trans)))
139 | 
140 |             image = image[crop_ymin: crop_ymax, crop_xmin: crop_xmax]
141 | 
142 |             bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
143 |             bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin
144 | 
145 |         return image, bboxes
146 | 
147 |     def random_translate(self, image, bboxes):
148 | 
149 |         if random.random() < 0.5:
150 |             h, w, _ = image.shape
151 |             max_bbox = np.concatenate(
152 |                 [np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
153 | 
154 |             max_l_trans = max_bbox[0]
155 |             max_u_trans = max_bbox[1]
156 |             max_r_trans = w - max_bbox[2]
157 |             max_d_trans = h - max_bbox[3]
158 | 
159 |             tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
160 |             ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))
161 | 
162 |             M = np.array([[1, 0, tx], [0, 1, ty]])
163 |             image = cv2.warpAffine(image, M, (w, h))
164 | 
165 |             bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
166 |             bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty
167 | 
168 |         return image, bboxes
169 | 
170 |     def parse_annotation(self, annotation):
171 |         line = annotation.split()
172 |         image_path = os.path.join(self.img_root, line[0])
173 |         if not os.path.exists(image_path):
174 |             raise KeyError("%s does not exist ... " % image_path)
175 |         image = cv2.imread(image_path)
176 |         bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]])
177 | 
178 |         if self.data_aug:
179 |             image, bboxes = self.random_horizontal_flip(
180 |                 np.copy(image), np.copy(bboxes))
181 |             image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
182 |             image, bboxes = self.random_translate(
183 |                 np.copy(image), np.copy(bboxes))
184 | 
185 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
186 |         image, bboxes = utils.image_preporcess(
187 |             np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes))
188 |         return image, bboxes
189 | 
190 |     def bbox_iou(self, boxes1, boxes2):
191 | 
192 |         boxes1 = np.array(boxes1)
193 |         boxes2 = np.array(boxes2)
194 | 
195 |         boxes1_area = boxes1[..., 2] * boxes1[..., 3]
196 |         boxes2_area = boxes2[..., 2] * boxes2[..., 3]
197 | 
198 |         boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
199 |                                  boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
200 |         boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
201 |                                  boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
202 | 
203 |         left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
204 |         right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
205 | 
206 |         inter_section = np.maximum(right_down - left_up, 0.0)
207 |         inter_area = inter_section[..., 0] * inter_section[..., 1]
208 |         union_area = boxes1_area + boxes2_area - inter_area
209 | 
210 |         return inter_area / union_area
211 | 
212 |     def preprocess_true_boxes(self, bboxes):
213 | 
214 |         label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale,
215 |                            5 + self.num_classes)) for i in range(3)]
216 |         bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4))
217 |                        for _ in range(3)]
218 |         bbox_count = np.zeros((3,))
219 | 
220 |         for bbox in bboxes:
221 |             bbox_coor = bbox[:4]
222 |             bbox_class_ind = bbox[4]
223 | 
224 |             onehot = np.zeros(self.num_classes, dtype=np.float)
225 |             onehot[bbox_class_ind] = 1.0
226 |             uniform_distribution = np.full(
227 |                 self.num_classes, 1.0 / self.num_classes)
228 |             deta = 0.01
229 |             smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution
230 | 
231 |             bbox_xywh = np.concatenate(
232 |                 [(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
233 |             bbox_xywh_scaled = 1.0 * \
234 |                 bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
235 | 
236 |             iou = []
237 |             exist_positive = False
238 |             for i in range(3):
239 |                 anchors_xywh = np.zeros((self.anchor_per_scale, 4))
240 |                 anchors_xywh[:, 0:2] = np.floor(
241 |                     bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
242 |                 anchors_xywh[:, 2:4] = self.anchors[i]
243 | 
244 |                 iou_scale = self.bbox_iou(
245 |                     bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
246 |                 iou.append(iou_scale)
247 |                 iou_mask = iou_scale > 0.3
248 | 
249 |                 if np.any(iou_mask):
250 |                     xind, yind = np.floor(
251 |                         bbox_xywh_scaled[i, 0:2]).astype(np.int32)
252 | 
253 |                     label[i][yind, xind, iou_mask, :] = 0
254 |                     label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
255 |                     label[i][yind, xind, iou_mask, 4:5] = 1.0
256 |                     label[i][yind, xind, iou_mask, 5:] = smooth_onehot
257 | 
258 |                     bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
259 |                     bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
260 |                     bbox_count[i] += 1
261 | 
262 |                     exist_positive = True
263 | 
264 |             if not exist_positive:
265 |                 best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
266 |                 best_detect = int(best_anchor_ind / self.anchor_per_scale)
267 |                 best_anchor = int(best_anchor_ind % self.anchor_per_scale)
268 |                 xind, yind = np.floor(
269 |                     bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)
270 | 
271 |                 label[best_detect][yind, xind, best_anchor, :] = 0
272 |                 label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
273 |                 label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
274 |                 label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot
275 | 
276 |                 bbox_ind = int(bbox_count[best_detect] %
277 |                                self.max_bbox_per_scale)
278 |                 bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
279 |                 bbox_count[best_detect] += 1
280 |         label_sbbox, label_mbbox, label_lbbox = label
281 |         sbboxes, mbboxes, lbboxes = bboxes_xywh
282 |         return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
283 | 
284 |     def __len__(self):
285 |         return self.num_batchs
286 | 


--------------------------------------------------------------------------------
/core/utils.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # coding=utf-8
  3 | #================================================================
  4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
  5 | #
  6 | #   Editor      : VIM
  7 | #   File name   : utils.py
  8 | #   Author      : YunYang1994
  9 | #   Created date: 2019-07-12 01:33:38
 10 | #   Description :
 11 | #
 12 | #================================================================
 13 | 
 14 | import cv2
 15 | import random
 16 | import colorsys
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | from core.config import cfg
 20 | 
 21 | def load_weights(model, weights_file):
 22 |     """
 23 |     I agree that this code is very ugly, but I don’t know any better way of doing it.
 24 |     """
 25 |     wf = open(weights_file, 'rb')
 26 |     major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
 27 | 
 28 |     j = 0
 29 |     for i in range(75):
 30 |         conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
 31 |         bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'
 32 | 
 33 |         conv_layer = model.get_layer(conv_layer_name)
 34 |         filters = conv_layer.filters
 35 |         k_size = conv_layer.kernel_size[0]
 36 |         in_dim = conv_layer.input_shape[-1]
 37 | 
 38 |         if i not in [58, 66, 74]:
 39 |             # darknet weights: [beta, gamma, mean, variance]
 40 |             bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
 41 |             # tf weights: [gamma, beta, mean, variance]
 42 |             bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
 43 |             bn_layer = model.get_layer(bn_layer_name)
 44 |             j += 1
 45 |         else:
 46 |             conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
 47 | 
 48 |         # darknet shape (out_dim, in_dim, height, width)
 49 |         conv_shape = (filters, in_dim, k_size, k_size)
 50 |         conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
 51 |         # tf shape (height, width, in_dim, out_dim)
 52 |         conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
 53 | 
 54 |         if i not in [58, 66, 74]:
 55 |             conv_layer.set_weights([conv_weights])
 56 |             bn_layer.set_weights(bn_weights)
 57 |         else:
 58 |             conv_layer.set_weights([conv_weights, conv_bias])
 59 | 
 60 |     assert len(wf.read()) == 0, 'failed to read all data'
 61 |     wf.close()
 62 | 
 63 | 
 64 | def read_class_names(class_file_name):
 65 |     '''loads class name from a file'''
 66 |     names = {}
 67 |     with open(class_file_name, 'r') as data:
 68 |         for ID, name in enumerate(data):
 69 |             names[ID] = name.strip('\n')
 70 |     return names
 71 | 
 72 | 
 73 | def get_anchors(anchors_path):
 74 |     '''loads the anchors from a file'''
 75 |     with open(anchors_path) as f:
 76 |         anchors = f.readline()
 77 |     anchors = np.array(anchors.split(','), dtype=np.float32)
 78 |     return anchors.reshape(3, 3, 2)
 79 | 
 80 | 
 81 | def image_preporcess(image, target_size, gt_boxes=None):
 82 | 
 83 |     ih, iw    = target_size
 84 |     h,  w, _  = image.shape
 85 | 
 86 |     scale = min(iw/w, ih/h)
 87 |     nw, nh  = int(scale * w), int(scale * h)
 88 |     image_resized = cv2.resize(image, (nw, nh))
 89 | 
 90 |     image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
 91 |     dw, dh = (iw - nw) // 2, (ih-nh) // 2
 92 |     image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
 93 |     image_paded = image_paded / 255.
 94 | 
 95 |     if gt_boxes is None:
 96 |         return image_paded
 97 | 
 98 |     else:
 99 |         gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
100 |         gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
101 |         return image_paded, gt_boxes
102 | 
103 | 
104 | def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True):
105 |     """
106 |     bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
107 |     """
108 | 
109 |     num_classes = len(classes)
110 |     image_h, image_w, _ = image.shape
111 |     hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
112 |     colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
113 |     colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
114 | 
115 |     random.seed(0)
116 |     random.shuffle(colors)
117 |     random.seed(None)
118 | 
119 |     for i, bbox in enumerate(bboxes):
120 |         coor = np.array(bbox[:4], dtype=np.int32)
121 |         fontScale = 0.5
122 |         score = bbox[4]
123 |         class_ind = int(bbox[5])
124 |         bbox_color = colors[class_ind]
125 |         bbox_thick = int(0.6 * (image_h + image_w) / 600)
126 |         c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
127 |         cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
128 | 
129 |         if show_label:
130 |             bbox_mess = '%s: %.2f' % (classes[class_ind], score)
131 |             t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
132 |             cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1)  # filled
133 | 
134 |             cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
135 |                         fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
136 | 
137 |     return image
138 | 
139 | 
140 | 
141 | def bboxes_iou(boxes1, boxes2):
142 | 
143 |     boxes1 = np.array(boxes1)
144 |     boxes2 = np.array(boxes2)
145 | 
146 |     boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
147 |     boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
148 | 
149 |     left_up       = np.maximum(boxes1[..., :2], boxes2[..., :2])
150 |     right_down    = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
151 | 
152 |     inter_section = np.maximum(right_down - left_up, 0.0)
153 |     inter_area    = inter_section[..., 0] * inter_section[..., 1]
154 |     union_area    = boxes1_area + boxes2_area - inter_area
155 |     ious          = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
156 | 
157 |     return ious
158 | 
159 | 
160 | def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
161 |     """
162 |     :param bboxes: (xmin, ymin, xmax, ymax, score, class)
163 | 
164 |     Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
165 |           https://github.com/bharatsingh430/soft-nms
166 |     """
167 |     classes_in_img = list(set(bboxes[:, 5]))
168 |     best_bboxes = []
169 | 
170 |     for cls in classes_in_img:
171 |         cls_mask = (bboxes[:, 5] == cls)
172 |         cls_bboxes = bboxes[cls_mask]
173 | 
174 |         while len(cls_bboxes) > 0:
175 |             max_ind = np.argmax(cls_bboxes[:, 4])
176 |             best_bbox = cls_bboxes[max_ind]
177 |             best_bboxes.append(best_bbox)
178 |             cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
179 |             iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
180 |             weight = np.ones((len(iou),), dtype=np.float32)
181 | 
182 |             assert method in ['nms', 'soft-nms']
183 | 
184 |             if method == 'nms':
185 |                 iou_mask = iou > iou_threshold
186 |                 weight[iou_mask] = 0.0
187 | 
188 |             if method == 'soft-nms':
189 |                 weight = np.exp(-(1.0 * iou ** 2 / sigma))
190 | 
191 |             cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
192 |             score_mask = cls_bboxes[:, 4] > 0.
193 |             cls_bboxes = cls_bboxes[score_mask]
194 | 
195 |     return best_bboxes
196 | 
197 | 
198 | def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
199 | 
200 |     valid_scale=[0, np.inf]
201 |     pred_bbox = np.array(pred_bbox)
202 | 
203 |     pred_xywh = pred_bbox[:, 0:4]
204 |     pred_conf = pred_bbox[:, 4]
205 |     pred_prob = pred_bbox[:, 5:]
206 | 
207 |     # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
208 |     pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
209 |                                 pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
210 |     # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
211 |     org_h, org_w = org_img_shape
212 |     resize_ratio = min(input_size / org_w, input_size / org_h)
213 | 
214 |     dw = (input_size - resize_ratio * org_w) / 2
215 |     dh = (input_size - resize_ratio * org_h) / 2
216 | 
217 |     pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
218 |     pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
219 | 
220 |     # # (3) clip some boxes those are out of range
221 |     pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
222 |                                 np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
223 |     invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
224 |     pred_coor[invalid_mask] = 0
225 | 
226 |     # # (4) discard some invalid boxes
227 |     bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
228 |     scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
229 | 
230 |     # # (5) discard some boxes with low scores
231 |     classes = np.argmax(pred_prob, axis=-1)
232 |     scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
233 |     score_mask = scores > score_threshold
234 |     mask = np.logical_and(scale_mask, score_mask)
235 |     coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
236 | 
237 |     return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
238 | 
239 | 
240 | 
241 | 
242 | 


--------------------------------------------------------------------------------
/core/yolov3.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # coding=utf-8
  3 | # ================================================================
  4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
  5 | #
  6 | #   Editor      : VIM
  7 | #   File name   : yolov3.py
  8 | #   Author      : YunYang1994
  9 | #   Created date: 2019-07-12 13:47:10
 10 | #   Description :
 11 | #
 12 | # ================================================================
 13 | 
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | import core.utils as utils
 17 | import core.common as common
 18 | import core.backbone as backbone
 19 | from core.config import cfg
 20 | 
 21 | 
 22 | NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
 23 | ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS)
 24 | STRIDES = np.array(cfg.YOLO.STRIDES)
 25 | IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH
 26 | 
 27 | 
 28 | def YOLOv3(input_layer):
 29 |     route_1, route_2, conv = backbone.darknet53(input_layer)
 30 | 
 31 |     conv = common.convolutional(conv, (1, 1, 1024,  512))
 32 |     conv = common.convolutional(conv, (3, 3,  512, 1024))
 33 |     conv = common.convolutional(conv, (1, 1, 1024,  512))
 34 |     conv = common.convolutional(conv, (3, 3,  512, 1024))
 35 |     conv = common.convolutional(conv, (1, 1, 1024,  512))
 36 | 
 37 |     conv_lobj_branch = common.convolutional(conv, (3, 3, 512, 1024))
 38 |     conv_lbbox = common.convolutional(
 39 |         conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
 40 | 
 41 |     conv = common.convolutional(conv, (1, 1,  512,  256))
 42 |     conv = common.upsample(conv)
 43 | 
 44 |     conv = tf.concat([conv, route_2], axis=-1)
 45 | 
 46 |     conv = common.convolutional(conv, (1, 1, 768, 256))
 47 |     conv = common.convolutional(conv, (3, 3, 256, 512))
 48 |     conv = common.convolutional(conv, (1, 1, 512, 256))
 49 |     conv = common.convolutional(conv, (3, 3, 256, 512))
 50 |     conv = common.convolutional(conv, (1, 1, 512, 256))
 51 | 
 52 |     conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512))
 53 |     conv_mbbox = common.convolutional(
 54 |         conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
 55 | 
 56 |     conv = common.convolutional(conv, (1, 1, 256, 128))
 57 |     conv = common.upsample(conv)
 58 | 
 59 |     conv = tf.concat([conv, route_1], axis=-1)
 60 | 
 61 |     conv = common.convolutional(conv, (1, 1, 384, 128))
 62 |     conv = common.convolutional(conv, (3, 3, 128, 256))
 63 |     conv = common.convolutional(conv, (1, 1, 256, 128))
 64 |     conv = common.convolutional(conv, (3, 3, 128, 256))
 65 |     conv = common.convolutional(conv, (1, 1, 256, 128))
 66 | 
 67 |     conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256))
 68 |     conv_sbbox = common.convolutional(
 69 |         conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS + 5)), activate=False, bn=False)
 70 | 
 71 |     return [conv_sbbox, conv_mbbox, conv_lbbox]
 72 | 
 73 | 
 74 | def decode(conv_output, i=0):
 75 |     """
 76 |     return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
 77 |             contains (x, y, w, h, score, probability)
 78 |     """
 79 | 
 80 |     conv_shape = tf.shape(conv_output)
 81 |     batch_size = conv_shape[0]
 82 |     output_size = conv_shape[1]
 83 | 
 84 |     conv_output = tf.reshape(
 85 |         conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
 86 | 
 87 |     conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
 88 |     conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
 89 |     conv_raw_conf = conv_output[:, :, :, :, 4:5]
 90 |     conv_raw_prob = conv_output[:, :, :, :, 5:]
 91 | 
 92 |     y = tf.tile(tf.range(output_size, dtype=tf.int32)
 93 |                 [:, tf.newaxis], [1, output_size])
 94 |     x = tf.tile(tf.range(output_size, dtype=tf.int32)
 95 |                 [tf.newaxis, :], [output_size, 1])
 96 | 
 97 |     xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
 98 |     xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [
 99 |                       batch_size, 1, 1, 3, 1])
100 |     xy_grid = tf.cast(xy_grid, tf.float32)
101 | 
102 |     pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
103 |     pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
104 |     pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
105 | 
106 |     pred_conf = tf.sigmoid(conv_raw_conf)
107 |     pred_prob = tf.sigmoid(conv_raw_prob)
108 | 
109 |     return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
110 | 
111 | 
112 | def bbox_iou(boxes1, boxes2):
113 | 
114 |     boxes1_area = boxes1[..., 2] * boxes1[..., 3]
115 |     boxes2_area = boxes2[..., 2] * boxes2[..., 3]
116 | 
117 |     boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
118 |                         boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
119 |     boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
120 |                         boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
121 | 
122 |     left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
123 |     right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
124 | 
125 |     inter_section = tf.maximum(right_down - left_up, 0.0)
126 |     inter_area = inter_section[..., 0] * inter_section[..., 1]
127 |     union_area = boxes1_area + boxes2_area - inter_area
128 | 
129 |     return 1.0 * inter_area / union_area
130 | 
131 | 
132 | def bbox_giou(boxes1, boxes2):
133 | 
134 |     boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
135 |                         boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
136 |     boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
137 |                         boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
138 | 
139 |     boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
140 |                         tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
141 |     boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
142 |                         tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
143 | 
144 |     boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * \
145 |         (boxes1[..., 3] - boxes1[..., 1])
146 |     boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * \
147 |         (boxes2[..., 3] - boxes2[..., 1])
148 | 
149 |     left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
150 |     right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
151 | 
152 |     inter_section = tf.maximum(right_down - left_up, 0.0)
153 |     inter_area = inter_section[..., 0] * inter_section[..., 1]
154 |     union_area = boxes1_area + boxes2_area - inter_area
155 |     iou = inter_area / union_area
156 | 
157 |     enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
158 |     enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
159 |     enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
160 |     enclose_area = enclose[..., 0] * enclose[..., 1]
161 |     giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
162 | 
163 |     return giou
164 | 
165 | 
166 | def compute_loss(pred, conv, label, bboxes, i=0):
167 | 
168 |     conv_shape = tf.shape(conv)
169 |     batch_size = conv_shape[0]
170 |     output_size = conv_shape[1]
171 |     input_size = STRIDES[i] * output_size
172 |     conv = tf.reshape(conv, (batch_size, output_size,
173 |                              output_size, 3, 5 + NUM_CLASS))
174 | 
175 |     conv_raw_conf = conv[:, :, :, :, 4:5]
176 |     conv_raw_prob = conv[:, :, :, :, 5:]
177 | 
178 |     pred_xywh = pred[:, :, :, :, 0:4]
179 |     pred_conf = pred[:, :, :, :, 4:5]
180 | 
181 |     label_xywh = label[:, :, :, :, 0:4]
182 |     respond_bbox = label[:, :, :, :, 4:5]
183 |     label_prob = label[:, :, :, :, 5:]
184 | 
185 |     giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
186 |     input_size = tf.cast(input_size, tf.float32)
187 | 
188 |     bbox_loss_scale = 2.0 - 1.0 * \
189 |         label_xywh[:, :, :, :, 2:3] * \
190 |         label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
191 |     giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)
192 | 
193 |     iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :],
194 |                    bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
195 |     max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
196 | 
197 |     respond_bgd = (1.0 - respond_bbox) * \
198 |         tf.cast(max_iou < IOU_LOSS_THRESH, tf.float32)
199 | 
200 |     conf_focal = tf.pow(respond_bbox - pred_conf, 2)
201 | 
202 |     conf_loss = conf_focal * (
203 |         respond_bbox *
204 |         tf.nn.sigmoid_cross_entropy_with_logits(
205 |             labels=respond_bbox, logits=conv_raw_conf)
206 |         +
207 |         respond_bgd *
208 |         tf.nn.sigmoid_cross_entropy_with_logits(
209 |             labels=respond_bbox, logits=conv_raw_conf)
210 |     )
211 | 
212 |     prob_loss = respond_bbox * \
213 |         tf.nn.sigmoid_cross_entropy_with_logits(
214 |             labels=label_prob, logits=conv_raw_prob)
215 | 
216 |     giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1, 2, 3, 4]))
217 |     conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4]))
218 |     prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4]))
219 | 
220 |     return giou_loss, conf_loss, prob_loss
221 | 


--------------------------------------------------------------------------------
/data/anchors/basline_anchors.txt:
--------------------------------------------------------------------------------
1 | 1.25,1.625, 2.0,3.75, 4.125,2.875, 1.875,3.8125, 3.875,2.8125, 3.6875,7.4375, 3.625,2.8125, 4.875,6.1875, 11.65625,10.1875
2 | 


--------------------------------------------------------------------------------
/data/classes/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/data/classes/yymnist.names:
--------------------------------------------------------------------------------
 1 | 0
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | 8
10 | 9
11 | 


--------------------------------------------------------------------------------
/data/dataset/yymnist_test.txt:
--------------------------------------------------------------------------------
1 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000001.jpg 122,187,136,201,7 71,209,85,223,4 78,7,100,29,2 366,244,394,272,3 344,383,372,411,3 238,243,294,299,8 24,233,136,345,8
2 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000002.jpg 343,279,357,293,4 258,168,280,190,7 277,354,333,410,6 5,24,61,80,3 124,227,152,255,9 140,285,224,369,3 105,42,217,154,4
3 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000003.jpg 285,100,313,128,2 99,16,183,100,1
4 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000004.jpg 365,140,387,162,0 70,332,92,354,1 278,373,292,387,0 341,14,383,56,0 10,143,52,185,8 214,109,242,137,1 112,282,140,310,0 312,130,368,186,0 180,180,236,236,3 271,192,383,304,2 20,10,132,122,4 66,147,150,231,4
5 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000005.jpg 194,13,216,35,9 126,34,168,76,2 80,183,108,211,6 341,332,397,388,6
6 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000006.jpg 251,179,265,193,9 22,43,36,57,8 315,132,337,154,4 155,372,197,414,4 239,18,295,74,1 89,53,131,95,1 81,296,165,380,1 208,257,320,369,9 86,173,170,257,7
7 | 


--------------------------------------------------------------------------------
/docs/01554.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/01554.jpg


--------------------------------------------------------------------------------
/docs/01567.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/01567.jpg


--------------------------------------------------------------------------------
/docs/kite.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/kite.jpg


--------------------------------------------------------------------------------
/docs/kite_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/kite_result.jpg


--------------------------------------------------------------------------------
/docs/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/loss.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.15.1
2 | Pillow==6.2.0
3 | scipy==1.1.0
4 | wget==3.2
5 | seaborn==0.9.0
6 | easydict==1.9
7 | tensorflow==2.0.0b0
8 | 


--------------------------------------------------------------------------------
/docs/road.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/road.mp4


--------------------------------------------------------------------------------
/image_demo.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # coding=utf-8
 3 | #================================================================
 4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
 5 | #
 6 | #   Editor      : VIM
 7 | #   File name   : image_demo.py
 8 | #   Author      : YunYang1994
 9 | #   Created date: 2019-07-12 13:07:27
10 | #   Description :
11 | #
12 | #================================================================
13 | 
14 | import cv2
15 | import numpy as np
16 | import core.utils as utils
17 | import tensorflow as tf
18 | from core.yolov3 import YOLOv3, decode
19 | from PIL import Image
20 | from core.config import cfg
21 | 
22 | input_size   = 416
23 | NUM_CLASS    = len(utils.read_class_names(cfg.YOLO.CLASSES))
24 | image_path   = "./docs/kite.jpg"
25 | 
26 | input_layer  = tf.keras.layers.Input([input_size, input_size, 3])
27 | feature_maps = YOLOv3(input_layer)
28 | 
29 | original_image      = cv2.imread(image_path)
30 | original_image      = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
31 | original_image_size = original_image.shape[:2]
32 | 
33 | image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size])
34 | image_data = image_data[np.newaxis, ...].astype(np.float32)
35 | 
36 | bbox_tensors = []
37 | for i, fm in enumerate(feature_maps):
38 |     bbox_tensor = decode(fm, i)
39 |     bbox_tensors.append(tf.reshape(bbox_tensor, (-1, 5+NUM_CLASS)))
40 | 
41 | bbox_tensors = tf.concat(bbox_tensors, axis=0)
42 | model = tf.keras.Model(input_layer, bbox_tensors)
43 | utils.load_weights(model, "./yolov3.weights")
44 | model.summary()
45 | 
46 | pred_bbox = model.predict(image_data)
47 | bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3)
48 | bboxes = utils.nms(bboxes, 0.45, method='nms')
49 | 
50 | image = utils.draw_bbox(original_image, bboxes)
51 | image = Image.fromarray(image)
52 | image.show()
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/scripts/convert_coco_to_trainlist.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | convert coco to trainlist
 4 | 
 5 | 
 6 | """
 7 | from pycocotools.coco import COCO
 8 | from pycocotools.cocoeval import COCOeval
 9 | from pycocotools import mask as COCOmask
10 | import numpy as np
11 | import cv2
12 | import matplotlib.pyplot as plt
13 | import skimage.io as io
14 | import random
15 | import fire
16 | 
17 | 
18 | def drawbox(img, box):
19 |     (img_h, img_w, img_c) = img.shape
20 |     (x, y, w, h) = box
21 |     p0 = (int((x - w/2) * img_w), int((y - h/2) * img_h))
22 |     p1 = (int((x + w/2) * img_w), int((y + h/2) * img_h))
23 |     cv2.rectangle(img, p0, p1, (255, 255, 255), 1)
24 | 
25 | 
26 | def convert_boxes_labels(ct_boxes, ratio):
27 |     boxlab = np.zeros((len(ct_boxes) * 6 + 1), np.float32)
28 |     for i in range(len(ct_boxes)):
29 |         box = ct_boxes[i]
30 |         boxlab[i * 6 + 0] = 0
31 |         boxlab[i * 6 + 1: i * 6 + 5] = box
32 |         boxlab[i * 6 + 5] = i + 1
33 |     boxlab[len(ct_boxes) * 6 + 0] = ratio
34 |     return boxlab
35 | 
36 | 
37 | def resize_mask(mask, size):
38 |     (dst_h, dst_w) = size
39 |     mk_resize = np.zeros((dst_h, dst_w, 1), np.uint8)
40 |     (img_h, img_w, img_c) = mask.shape
41 |     for r in range(dst_h):
42 |         for c in range(dst_w):
43 |             (o_r, o_c) = (int(1.0 * img_h / dst_h * r), int(1.0 * img_w / dst_w * c))
44 |             mk_resize[r, c, 0] = mask[o_r, o_c, 0]
45 |     return mk_resize
46 | 
47 | 
48 | def run(coco_dir):
49 |     (dst_h, dst_w) = [320, 320]
50 |     dataTypes = ['train', 'val']
51 |     years = ['2014', '2017']
52 | 
53 |     # dataTypes = ['val']
54 |     # years = ['2017']
55 | 
56 |     dataDir = coco_dir
57 |     for dataType in dataTypes:
58 |         for year in years:
59 |             imageSet = dataType + year
60 |             ann_path = '{}/annotations/instances_{}.json'.format(
61 |                 dataDir, imageSet)
62 |             coco = COCO(ann_path)
63 | 
64 |             target_f = 'coco_{}_{}.txt'.format(dataType, year)
65 |             print('generating {}'.format(target_f))
66 |             target = open(target_f, 'w')
67 | 
68 |             # display COCO categories and supercategories
69 |             cats = coco.loadCats(coco.getCatIds())
70 |             imgIds = coco.getImgIds()
71 | 
72 |             random.shuffle(imgIds)
73 |             count_id = 0
74 |             for imgId in imgIds:
75 |                 count_id = count_id+1
76 |                 img = coco.loadImgs(imgId)[0]
77 | 
78 |                 img_path = '%s/%s' % (imageSet, img['file_name'])
79 |                 print(img_path)
80 | 
81 |                 annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
82 |                 anns = coco.loadAnns(annIds)
83 | 
84 |                 anno_part = ''
85 |                 for ann in anns:
86 |                     box = ann['bbox']
87 |                     box = [str(i) for i in box]
88 |                     label_id = str(ann['category_id'])
89 |                     one_part = ','.join(box + [label_id])
90 |                     anno_part += ' {}'.format(one_part)
91 | 
92 |                 one_line = '{}{}\n'.format(img_path, anno_part)
93 |                 target.write(one_line)
94 |     print('done!')
95 | 
96 | 
97 | if __name__ == '__main__':
98 |     fire.Fire(run)
99 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # coding=utf-8
  3 | # ================================================================
  4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
  5 | #
  6 | #   Editor      : VIM
  7 | #   File name   : test.py
  8 | #   Author      : YunYang1994
  9 | #   Created date: 2019-07-19 10:29:34
 10 | #   Description :
 11 | #
 12 | # ================================================================
 13 | 
 14 | import cv2
 15 | import os
 16 | import shutil
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | import core.utils as utils
 20 | from core.config import cfg
 21 | from core.yolov3 import YOLOv3, decode
 22 | 
 23 | 
 24 | INPUT_SIZE = 416
 25 | NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
 26 | CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)
 27 | 
 28 | predicted_dir_path = '../mAP/predicted'
 29 | ground_truth_dir_path = '../mAP/ground-truth'
 30 | if os.path.exists(predicted_dir_path):
 31 |     shutil.rmtree(predicted_dir_path)
 32 | if os.path.exists(ground_truth_dir_path):
 33 |     shutil.rmtree(ground_truth_dir_path)
 34 | if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH):
 35 |     shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)
 36 | 
 37 | os.mkdir(predicted_dir_path)
 38 | os.mkdir(ground_truth_dir_path)
 39 | os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)
 40 | 
 41 | # Build Model
 42 | input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3])
 43 | feature_maps = YOLOv3(input_layer)
 44 | 
 45 | bbox_tensors = []
 46 | for i, fm in enumerate(feature_maps):
 47 |     bbox_tensor = decode(fm, i)
 48 |     bbox_tensors.append(tf.reshape(bbox_tensor, (-1, 5+NUM_CLASS)))
 49 | 
 50 | bbox_tensors = tf.concat(bbox_tensors, axis=0)
 51 | model = tf.keras.Model(input_layer, bbox_tensors)
 52 | model.load_weights("./yolov3")
 53 | 
 54 | with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:
 55 |     for num, line in enumerate(annotation_file):
 56 |         annotation = line.strip().split()
 57 |         image_path = annotation[0]
 58 |         image_name = image_path.split('/')[-1]
 59 |         image = cv2.imread(image_path)
 60 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 61 |         bbox_data_gt = np.array([list(map(int, box.split(',')))
 62 |                                  for box in annotation[1:]])
 63 | 
 64 |         if len(bbox_data_gt) == 0:
 65 |             bboxes_gt = []
 66 |             classes_gt = []
 67 |         else:
 68 |             bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
 69 |         ground_truth_path = os.path.join(
 70 |             ground_truth_dir_path, str(num) + '.txt')
 71 | 
 72 |         print('=> ground truth of %s:' % image_name)
 73 |         num_bbox_gt = len(bboxes_gt)
 74 |         with open(ground_truth_path, 'w') as f:
 75 |             for i in range(num_bbox_gt):
 76 |                 class_name = CLASSES[classes_gt[i]]
 77 |                 xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
 78 |                 bbox_mess = ' '.join(
 79 |                     [class_name, xmin, ymin, xmax, ymax]) + '\n'
 80 |                 f.write(bbox_mess)
 81 |                 print('\t' + str(bbox_mess).strip())
 82 |         print('=> predict result of %s:' % image_name)
 83 |         predict_result_path = os.path.join(
 84 |             predicted_dir_path, str(num) + '.txt')
 85 |         # Predict Process
 86 |         image_size = image.shape[:2]
 87 |         image_data = utils.image_preporcess(
 88 |             np.copy(image), [INPUT_SIZE, INPUT_SIZE])
 89 |         image_data = image_data[np.newaxis, ...].astype(np.float32)
 90 | 
 91 |         pred_bbox = model.predict(image_data)
 92 |         bboxes = utils.postprocess_boxes(
 93 |             pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD)
 94 |         bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms')
 95 | 
 96 |         if cfg.TEST.DECTECTED_IMAGE_PATH is not None:
 97 |             image = utils.draw_bbox(image, bboxes)
 98 |             cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH+image_name, image)
 99 | 
100 |         with open(predict_result_path, 'w') as f:
101 |             for bbox in bboxes:
102 |                 coor = np.array(bbox[:4], dtype=np.int32)
103 |                 score = bbox[4]
104 |                 class_ind = int(bbox[5])
105 |                 class_name = CLASSES[class_ind]
106 |                 score = '%.4f' % score
107 |                 xmin, ymin, xmax, ymax = list(map(str, coor))
108 |                 bbox_mess = ' '.join(
109 |                     [class_name, score, xmin, ymin, xmax, ymax]) + '\n'
110 |                 f.write(bbox_mess)
111 |                 print('\t' + str(bbox_mess).strip())
112 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # coding=utf-8
 3 | # ================================================================
 4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
 5 | #
 6 | #   Editor      : VIM
 7 | #   File name   : train.py
 8 | #   Author      : YunYang1994
 9 | #   Created date: 2019-07-18 09:18:54
10 | #   Description :
11 | #
12 | # ================================================================
13 | 
14 | import os
15 | import time
16 | import shutil
17 | import numpy as np
18 | import tensorflow as tf
19 | import core.utils as utils
20 | from core.dataset import Dataset
21 | from core.yolov3 import YOLOv3, decode, compute_loss
22 | from core.config import cfg
23 | 
24 | trainset = Dataset('train')
25 | logdir = "./data/log"
26 | steps_per_epoch = len(trainset)
27 | global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
28 | warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch
29 | total_steps = cfg.TRAIN.EPOCHS * steps_per_epoch
30 | 
31 | input_tensor = tf.keras.layers.Input([416, 416, 3])
32 | conv_tensors = YOLOv3(input_tensor)
33 | 
34 | output_tensors = []
35 | for i, conv_tensor in enumerate(conv_tensors):
36 |     pred_tensor = decode(conv_tensor, i)
37 |     output_tensors.append(conv_tensor)
38 |     output_tensors.append(pred_tensor)
39 | 
40 | model = tf.keras.Model(input_tensor, output_tensors)
41 | optimizer = tf.keras.optimizers.Adam()
42 | if os.path.exists(logdir):
43 |     shutil.rmtree(logdir)
44 | writer = tf.summary.create_file_writer(logdir)
45 | 
46 | 
47 | def train_step(image_data, target):
48 |     with tf.GradientTape() as tape:
49 |         pred_result = model(image_data, training=True)
50 |         giou_loss = conf_loss = prob_loss = 0
51 | 
52 |         # optimizing process
53 |         for i in range(3):
54 |             conv, pred = pred_result[i*2], pred_result[i*2+1]
55 |             loss_items = compute_loss(pred, conv, *target[i], i)
56 |             giou_loss += loss_items[0]
57 |             conf_loss += loss_items[1]
58 |             prob_loss += loss_items[2]
59 | 
60 |         total_loss = giou_loss + conf_loss + prob_loss
61 | 
62 |         gradients = tape.gradient(total_loss, model.trainable_variables)
63 |         optimizer.apply_gradients(zip(gradients, model.trainable_variables))
64 |         tf.print("=> STEP %4d   lr: %.6f   giou_loss: %4.2f   conf_loss: %4.2f   "
65 |                  "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, optimizer.lr.numpy(),
66 |                                                            giou_loss, conf_loss,
67 |                                                            prob_loss, total_loss))
68 |         # update learning rate
69 |         global_steps.assign_add(1)
70 |         if global_steps < warmup_steps:
71 |             lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT
72 |         else:
73 |             lr = cfg.TRAIN.LR_END + 0.5 * (cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * (
74 |                 (1 + tf.cos((global_steps - warmup_steps) /
75 |                             (total_steps - warmup_steps) * np.pi))
76 |             )
77 |         optimizer.lr.assign(lr.numpy())
78 | 
79 |         # writing summary data
80 |         with writer.as_default():
81 |             tf.summary.scalar("lr", optimizer.lr, step=global_steps)
82 |             tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
83 |             tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps)
84 |             tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
85 |             tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
86 |         writer.flush()
87 | 
88 | 
89 | for epoch in range(cfg.TRAIN.EPOCHS):
90 |     for image_data, target in trainset:
91 |         train_step(image_data, target)
92 |     model.save_weights("./yolov3")
93 | 


--------------------------------------------------------------------------------
/video_demo.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # coding=utf-8
 3 | #================================================================
 4 | #   Copyright (C) 2019 * Ltd. All rights reserved.
 5 | #
 6 | #   Editor      : VIM
 7 | #   File name   : video_demo.py
 8 | #   Author      : YunYang1994
 9 | #   Created date: 2019-07-12 19:36:53
10 | #   Description :
11 | #
12 | #================================================================
13 | 
14 | import cv2
15 | import time
16 | import numpy as np
17 | import core.utils as utils
18 | import tensorflow as tf
19 | from core.yolov3 import YOLOv3, decode
20 | 
21 | 
22 | video_path      = "./docs/road.mp4"
23 | # video_path      = 0
24 | num_classes     = 80
25 | input_size      = 416
26 | 
27 | input_layer  = tf.keras.layers.Input([input_size, input_size, 3])
28 | feature_maps = YOLOv3(input_layer)
29 | 
30 | bbox_tensors = []
31 | for i, fm in enumerate(feature_maps):
32 |     bbox_tensor = decode(fm, i)
33 |     bbox_tensors.append(tf.reshape(bbox_tensor, (-1, 5+num_classes)))
34 | 
35 | bbox_tensors = tf.concat(bbox_tensors, axis=0)
36 | model = tf.keras.Model(input_layer, bbox_tensors)
37 | utils.load_weights(model, "./yolov3.weights")
38 | model.summary()
39 | vid = cv2.VideoCapture(video_path)
40 | while True:
41 |     return_value, frame = vid.read()
42 |     if return_value:
43 |         frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
44 |     else:
45 |         raise ValueError("No image!")
46 |     frame_size = frame.shape[:2]
47 |     image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size])
48 |     image_data = image_data[np.newaxis, ...].astype(np.float32)
49 |     prev_time = time.time()
50 | 
51 |     pred_bbox = model.predict(image_data)
52 |     bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3)
53 |     bboxes = utils.nms(bboxes, 0.45, method='nms')
54 |     image = utils.draw_bbox(frame, bboxes)
55 | 
56 |     curr_time = time.time()
57 |     exec_time = curr_time - prev_time
58 |     result = np.asarray(image)
59 |     info = "time: %.2f ms" %(1000*exec_time)
60 |     cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
61 |                 fontScale=1, color=(255, 0, 0), thickness=2)
62 |     cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
63 |     result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
64 |     cv2.imshow("result", result)
65 |     if cv2.waitKey(1) & 0xFF == ord('q'): break
66 | 
67 | 


--------------------------------------------------------------------------------