├── README.md ├── core ├── __pycache__ │ ├── backbone.cpython-35.pyc │ ├── common.cpython-35.pyc │ ├── config.cpython-35.pyc │ ├── dataset.cpython-35.pyc │ ├── utils.cpython-35.pyc │ └── yolov3.cpython-35.pyc ├── backbone.py ├── common.py ├── config.py ├── dataset.py ├── utils.py └── yolov3.py ├── data ├── anchors │ └── basline_anchors.txt ├── classes │ ├── coco.names │ └── yymnist.names └── dataset │ ├── coco_train_2014.txt │ ├── coco_train_2017.txt │ └── yymnist_test.txt ├── docs ├── 01554.jpg ├── 01567.jpg ├── kite.jpg ├── kite_result.jpg ├── loss.png ├── requirements.txt └── road.mp4 ├── image_demo.py ├── scripts ├── coco_val_2014.txt ├── coco_val_2017.txt └── convert_coco_to_trainlist.py ├── test.py ├── train.py └── video_demo.py /README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow 2.0 YoloV3 2 | 3 | YoloV3 training on coco with tensorflow 2.0. this is a minimal implementation on yolov3 algorithm. Here are what have been done: 4 | 5 | - [x] A **det-txt** annotation format for training; 6 | - [x] Training on coco, and trainlist file generate script; 7 | - [ ] Exporting model to tflite; 8 | - [ ] Exporting model with fp16 and int8 inference; 9 | 10 | 11 | 12 | ## Install 13 | 14 | You should have tensorflow-2.0-rc installed. 15 | 16 | 17 | 18 | ## Copyright 19 | 20 | All beloved opensource authors. -------------------------------------------------------------------------------- /core/__pycache__/backbone.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/backbone.cpython-35.pyc -------------------------------------------------------------------------------- /core/__pycache__/common.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/common.cpython-35.pyc -------------------------------------------------------------------------------- /core/__pycache__/config.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/config.cpython-35.pyc -------------------------------------------------------------------------------- /core/__pycache__/dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/dataset.cpython-35.pyc -------------------------------------------------------------------------------- /core/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /core/__pycache__/yolov3.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/core/__pycache__/yolov3.cpython-35.pyc -------------------------------------------------------------------------------- /core/backbone.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | # ================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : backbone.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-11 23:37:51 10 | # Description : 11 | # 12 | # ================================================================ 13 | 14 | import tensorflow as tf 15 | import core.common as common 16 | 17 | 18 | def darknet53(input_data): 19 | 20 | input_data = common.convolutional(input_data, (3, 3, 3, 32)) 21 | input_data = common.convolutional( 22 | input_data, (3, 3, 32, 64), downsample=True) 23 | 24 | for i in range(1): 25 | input_data = common.residual_block(input_data, 64, 32, 64) 26 | 27 | input_data = common.convolutional( 28 | input_data, (3, 3, 64, 128), downsample=True) 29 | 30 | for i in range(2): 31 | input_data = common.residual_block(input_data, 128, 64, 128) 32 | 33 | input_data = common.convolutional( 34 | input_data, (3, 3, 128, 256), downsample=True) 35 | 36 | for i in range(8): 37 | input_data = common.residual_block(input_data, 256, 128, 256) 38 | 39 | route_1 = input_data 40 | input_data = common.convolutional( 41 | input_data, (3, 3, 256, 512), downsample=True) 42 | 43 | for i in range(8): 44 | input_data = common.residual_block(input_data, 512, 256, 512) 45 | 46 | route_2 = input_data 47 | input_data = common.convolutional( 48 | input_data, (3, 3, 512, 1024), downsample=True) 49 | 50 | for i in range(4): 51 | input_data = common.residual_block(input_data, 1024, 512, 1024) 52 | 53 | return route_1, route_2, input_data 54 | -------------------------------------------------------------------------------- /core/common.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | # ================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : common.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-11 23:12:53 10 | # Description : 11 | # 12 | # ================================================================ 13 | 14 | import tensorflow as tf 15 | 16 | 17 | class BatchNormalization(tf.keras.layers.BatchNormalization): 18 | """ 19 | "Frozen state" and "inference mode" are two separate concepts. 20 | `layer.trainable = False` is to freeze the layer, so the layer will use 21 | stored moving `var` and `mean` in the "inference mode", and both `gama` 22 | and `beta` will not be updated ! 23 | """ 24 | 25 | def call(self, x, training=False): 26 | if not training: 27 | training = tf.constant(False) 28 | training = tf.logical_and(training, self.trainable) 29 | return super().call(x, training) 30 | 31 | 32 | def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True): 33 | if downsample: 34 | input_layer = tf.keras.layers.ZeroPadding2D( 35 | ((1, 0), (1, 0)))(input_layer) 36 | padding = 'valid' 37 | strides = 2 38 | else: 39 | strides = 1 40 | padding = 'same' 41 | 42 | conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size=filters_shape[0], strides=strides, padding=padding, 43 | use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005), 44 | kernel_initializer=tf.random_normal_initializer( 45 | stddev=0.01), 46 | bias_initializer=tf.constant_initializer(0.))(input_layer) 47 | 48 | if bn: 49 | conv = BatchNormalization()(conv) 50 | if activate == True: 51 | conv = tf.nn.leaky_relu(conv, alpha=0.1) 52 | 53 | return conv 54 | 55 | 56 | def residual_block(input_layer, input_channel, filter_num1, filter_num2): 57 | short_cut = input_layer 58 | conv = convolutional(input_layer, filters_shape=( 59 | 1, 1, input_channel, filter_num1)) 60 | conv = convolutional(conv, filters_shape=( 61 | 3, 3, filter_num1, filter_num2)) 62 | 63 | residual_output = short_cut + conv 64 | return residual_output 65 | 66 | 67 | def upsample(input_layer): 68 | return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest') 69 | -------------------------------------------------------------------------------- /core/config.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | # ================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : config.py 8 | # Author : YunYang1994 9 | # Created date: 2019-02-28 13:06:54 10 | # Description : 11 | # 12 | # ================================================================ 13 | 14 | from easydict import EasyDict as edict 15 | 16 | 17 | __C = edict() 18 | # Consumers can get config by: from config import cfg 19 | 20 | cfg = __C 21 | 22 | # YOLO options 23 | __C.YOLO = edict() 24 | 25 | # Set the class name 26 | __C.YOLO.CLASSES = "./data/classes/coco.names" 27 | __C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt" 28 | __C.YOLO.STRIDES = [8, 16, 32] 29 | __C.YOLO.ANCHOR_PER_SCALE = 3 30 | __C.YOLO.IOU_LOSS_THRESH = 0.5 31 | 32 | # Train options 33 | __C.TRAIN = edict() 34 | 35 | # train list should combined with image root to use 36 | __C.TRAIN.ANNOT_PATH = "./data/dataset/coco_train_2017.txt" 37 | __C.IMG_ROOT = '/autox-sz/departments/perception/public_data/coco' 38 | 39 | __C.TRAIN.BATCH_SIZE = 4 40 | # __C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] 41 | __C.TRAIN.INPUT_SIZE = [416] 42 | __C.TRAIN.DATA_AUG = True 43 | __C.TRAIN.LR_INIT = 1e-3 44 | __C.TRAIN.LR_END = 1e-6 45 | __C.TRAIN.WARMUP_EPOCHS = 2 46 | __C.TRAIN.EPOCHS = 30 47 | 48 | 49 | # TEST options 50 | __C.TEST = edict() 51 | 52 | __C.TEST.ANNOT_PATH = "data/dataset/coco_train_2014.txt" 53 | __C.TEST.BATCH_SIZE = 2 54 | __C.TEST.INPUT_SIZE = 544 55 | __C.TEST.DATA_AUG = False 56 | __C.TEST.DECTECTED_IMAGE_PATH = "./data/detection/" 57 | __C.TEST.SCORE_THRESHOLD = 0.3 58 | __C.TEST.IOU_THRESHOLD = 0.45 59 | -------------------------------------------------------------------------------- /core/dataset.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | # ================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : dataset.py 8 | # Author : YunYang1994 9 | # Created date: 2019-03-15 18:05:03 10 | # Description : 11 | # 12 | # ================================================================ 13 | 14 | import os 15 | import cv2 16 | import random 17 | import numpy as np 18 | import tensorflow as tf 19 | import core.utils as utils 20 | from core.config import cfg 21 | 22 | 23 | class Dataset(object): 24 | """implement Dataset here""" 25 | 26 | def __init__(self, dataset_type): 27 | self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH 28 | self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE 29 | self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE 30 | self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG 31 | self.img_root = cfg.IMG_ROOT 32 | 33 | self.train_input_sizes = cfg.TRAIN.INPUT_SIZE 34 | self.strides = np.array(cfg.YOLO.STRIDES) 35 | self.classes = utils.read_class_names(cfg.YOLO.CLASSES) 36 | self.num_classes = len(self.classes) 37 | self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) 38 | self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE 39 | self.max_bbox_per_scale = 150 40 | 41 | self.annotations = self.load_annotations(dataset_type) 42 | self.num_samples = len(self.annotations) 43 | self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) 44 | self.batch_count = 0 45 | 46 | def load_annotations(self, dataset_type): 47 | with open(self.annot_path, 'r') as f: 48 | txt = f.readlines() 49 | annotations = [line.strip() 50 | for line in txt if len(line.strip().split()[1:]) != 0] 51 | np.random.shuffle(annotations) 52 | return annotations 53 | 54 | def __iter__(self): 55 | return self 56 | 57 | def __next__(self): 58 | 59 | with tf.device('/cpu:0'): 60 | self.train_input_size = random.choice(self.train_input_sizes) 61 | self.train_output_sizes = self.train_input_size // self.strides 62 | 63 | batch_image = np.zeros( 64 | (self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32) 65 | 66 | batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0], 67 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) 68 | batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1], 69 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) 70 | batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2], 71 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) 72 | 73 | batch_sbboxes = np.zeros( 74 | (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) 75 | batch_mbboxes = np.zeros( 76 | (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) 77 | batch_lbboxes = np.zeros( 78 | (self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) 79 | 80 | num = 0 81 | if self.batch_count < self.num_batchs: 82 | while num < self.batch_size: 83 | index = self.batch_count * self.batch_size + num 84 | if index >= self.num_samples: 85 | index -= self.num_samples 86 | annotation = self.annotations[index] 87 | image, bboxes = self.parse_annotation(annotation) 88 | label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes( 89 | bboxes) 90 | 91 | batch_image[num, :, :, :] = image 92 | batch_label_sbbox[num, :, :, :, :] = label_sbbox 93 | batch_label_mbbox[num, :, :, :, :] = label_mbbox 94 | batch_label_lbbox[num, :, :, :, :] = label_lbbox 95 | batch_sbboxes[num, :, :] = sbboxes 96 | batch_mbboxes[num, :, :] = mbboxes 97 | batch_lbboxes[num, :, :] = lbboxes 98 | num += 1 99 | self.batch_count += 1 100 | batch_smaller_target = batch_label_sbbox, batch_sbboxes 101 | batch_medium_target = batch_label_mbbox, batch_mbboxes 102 | batch_larger_target = batch_label_lbbox, batch_lbboxes 103 | 104 | return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target) 105 | else: 106 | self.batch_count = 0 107 | np.random.shuffle(self.annotations) 108 | raise StopIteration 109 | 110 | def random_horizontal_flip(self, image, bboxes): 111 | 112 | if random.random() < 0.5: 113 | _, w, _ = image.shape 114 | image = image[:, ::-1, :] 115 | bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]] 116 | 117 | return image, bboxes 118 | 119 | def random_crop(self, image, bboxes): 120 | 121 | if random.random() < 0.5: 122 | h, w, _ = image.shape 123 | max_bbox = np.concatenate( 124 | [np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) 125 | 126 | max_l_trans = max_bbox[0] 127 | max_u_trans = max_bbox[1] 128 | max_r_trans = w - max_bbox[2] 129 | max_d_trans = h - max_bbox[3] 130 | 131 | crop_xmin = max( 132 | 0, int(max_bbox[0] - random.uniform(0, max_l_trans))) 133 | crop_ymin = max( 134 | 0, int(max_bbox[1] - random.uniform(0, max_u_trans))) 135 | crop_xmax = max( 136 | w, int(max_bbox[2] + random.uniform(0, max_r_trans))) 137 | crop_ymax = max( 138 | h, int(max_bbox[3] + random.uniform(0, max_d_trans))) 139 | 140 | image = image[crop_ymin: crop_ymax, crop_xmin: crop_xmax] 141 | 142 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin 143 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin 144 | 145 | return image, bboxes 146 | 147 | def random_translate(self, image, bboxes): 148 | 149 | if random.random() < 0.5: 150 | h, w, _ = image.shape 151 | max_bbox = np.concatenate( 152 | [np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) 153 | 154 | max_l_trans = max_bbox[0] 155 | max_u_trans = max_bbox[1] 156 | max_r_trans = w - max_bbox[2] 157 | max_d_trans = h - max_bbox[3] 158 | 159 | tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1)) 160 | ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1)) 161 | 162 | M = np.array([[1, 0, tx], [0, 1, ty]]) 163 | image = cv2.warpAffine(image, M, (w, h)) 164 | 165 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx 166 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty 167 | 168 | return image, bboxes 169 | 170 | def parse_annotation(self, annotation): 171 | line = annotation.split() 172 | image_path = os.path.join(self.img_root, line[0]) 173 | if not os.path.exists(image_path): 174 | raise KeyError("%s does not exist ... " % image_path) 175 | image = cv2.imread(image_path) 176 | bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) 177 | 178 | if self.data_aug: 179 | image, bboxes = self.random_horizontal_flip( 180 | np.copy(image), np.copy(bboxes)) 181 | image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) 182 | image, bboxes = self.random_translate( 183 | np.copy(image), np.copy(bboxes)) 184 | 185 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 186 | image, bboxes = utils.image_preporcess( 187 | np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) 188 | return image, bboxes 189 | 190 | def bbox_iou(self, boxes1, boxes2): 191 | 192 | boxes1 = np.array(boxes1) 193 | boxes2 = np.array(boxes2) 194 | 195 | boxes1_area = boxes1[..., 2] * boxes1[..., 3] 196 | boxes2_area = boxes2[..., 2] * boxes2[..., 3] 197 | 198 | boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5, 199 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) 200 | boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5, 201 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) 202 | 203 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 204 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 205 | 206 | inter_section = np.maximum(right_down - left_up, 0.0) 207 | inter_area = inter_section[..., 0] * inter_section[..., 1] 208 | union_area = boxes1_area + boxes2_area - inter_area 209 | 210 | return inter_area / union_area 211 | 212 | def preprocess_true_boxes(self, bboxes): 213 | 214 | label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale, 215 | 5 + self.num_classes)) for i in range(3)] 216 | bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) 217 | for _ in range(3)] 218 | bbox_count = np.zeros((3,)) 219 | 220 | for bbox in bboxes: 221 | bbox_coor = bbox[:4] 222 | bbox_class_ind = bbox[4] 223 | 224 | onehot = np.zeros(self.num_classes, dtype=np.float) 225 | onehot[bbox_class_ind] = 1.0 226 | uniform_distribution = np.full( 227 | self.num_classes, 1.0 / self.num_classes) 228 | deta = 0.01 229 | smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution 230 | 231 | bbox_xywh = np.concatenate( 232 | [(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1) 233 | bbox_xywh_scaled = 1.0 * \ 234 | bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis] 235 | 236 | iou = [] 237 | exist_positive = False 238 | for i in range(3): 239 | anchors_xywh = np.zeros((self.anchor_per_scale, 4)) 240 | anchors_xywh[:, 0:2] = np.floor( 241 | bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5 242 | anchors_xywh[:, 2:4] = self.anchors[i] 243 | 244 | iou_scale = self.bbox_iou( 245 | bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh) 246 | iou.append(iou_scale) 247 | iou_mask = iou_scale > 0.3 248 | 249 | if np.any(iou_mask): 250 | xind, yind = np.floor( 251 | bbox_xywh_scaled[i, 0:2]).astype(np.int32) 252 | 253 | label[i][yind, xind, iou_mask, :] = 0 254 | label[i][yind, xind, iou_mask, 0:4] = bbox_xywh 255 | label[i][yind, xind, iou_mask, 4:5] = 1.0 256 | label[i][yind, xind, iou_mask, 5:] = smooth_onehot 257 | 258 | bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale) 259 | bboxes_xywh[i][bbox_ind, :4] = bbox_xywh 260 | bbox_count[i] += 1 261 | 262 | exist_positive = True 263 | 264 | if not exist_positive: 265 | best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1) 266 | best_detect = int(best_anchor_ind / self.anchor_per_scale) 267 | best_anchor = int(best_anchor_ind % self.anchor_per_scale) 268 | xind, yind = np.floor( 269 | bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32) 270 | 271 | label[best_detect][yind, xind, best_anchor, :] = 0 272 | label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh 273 | label[best_detect][yind, xind, best_anchor, 4:5] = 1.0 274 | label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot 275 | 276 | bbox_ind = int(bbox_count[best_detect] % 277 | self.max_bbox_per_scale) 278 | bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh 279 | bbox_count[best_detect] += 1 280 | label_sbbox, label_mbbox, label_lbbox = label 281 | sbboxes, mbboxes, lbboxes = bboxes_xywh 282 | return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes 283 | 284 | def __len__(self): 285 | return self.num_batchs 286 | -------------------------------------------------------------------------------- /core/utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : utils.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-12 01:33:38 10 | # Description : 11 | # 12 | #================================================================ 13 | 14 | import cv2 15 | import random 16 | import colorsys 17 | import numpy as np 18 | import tensorflow as tf 19 | from core.config import cfg 20 | 21 | def load_weights(model, weights_file): 22 | """ 23 | I agree that this code is very ugly, but I don’t know any better way of doing it. 24 | """ 25 | wf = open(weights_file, 'rb') 26 | major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5) 27 | 28 | j = 0 29 | for i in range(75): 30 | conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d' 31 | bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization' 32 | 33 | conv_layer = model.get_layer(conv_layer_name) 34 | filters = conv_layer.filters 35 | k_size = conv_layer.kernel_size[0] 36 | in_dim = conv_layer.input_shape[-1] 37 | 38 | if i not in [58, 66, 74]: 39 | # darknet weights: [beta, gamma, mean, variance] 40 | bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters) 41 | # tf weights: [gamma, beta, mean, variance] 42 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] 43 | bn_layer = model.get_layer(bn_layer_name) 44 | j += 1 45 | else: 46 | conv_bias = np.fromfile(wf, dtype=np.float32, count=filters) 47 | 48 | # darknet shape (out_dim, in_dim, height, width) 49 | conv_shape = (filters, in_dim, k_size, k_size) 50 | conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape)) 51 | # tf shape (height, width, in_dim, out_dim) 52 | conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0]) 53 | 54 | if i not in [58, 66, 74]: 55 | conv_layer.set_weights([conv_weights]) 56 | bn_layer.set_weights(bn_weights) 57 | else: 58 | conv_layer.set_weights([conv_weights, conv_bias]) 59 | 60 | assert len(wf.read()) == 0, 'failed to read all data' 61 | wf.close() 62 | 63 | 64 | def read_class_names(class_file_name): 65 | '''loads class name from a file''' 66 | names = {} 67 | with open(class_file_name, 'r') as data: 68 | for ID, name in enumerate(data): 69 | names[ID] = name.strip('\n') 70 | return names 71 | 72 | 73 | def get_anchors(anchors_path): 74 | '''loads the anchors from a file''' 75 | with open(anchors_path) as f: 76 | anchors = f.readline() 77 | anchors = np.array(anchors.split(','), dtype=np.float32) 78 | return anchors.reshape(3, 3, 2) 79 | 80 | 81 | def image_preporcess(image, target_size, gt_boxes=None): 82 | 83 | ih, iw = target_size 84 | h, w, _ = image.shape 85 | 86 | scale = min(iw/w, ih/h) 87 | nw, nh = int(scale * w), int(scale * h) 88 | image_resized = cv2.resize(image, (nw, nh)) 89 | 90 | image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0) 91 | dw, dh = (iw - nw) // 2, (ih-nh) // 2 92 | image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized 93 | image_paded = image_paded / 255. 94 | 95 | if gt_boxes is None: 96 | return image_paded 97 | 98 | else: 99 | gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw 100 | gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh 101 | return image_paded, gt_boxes 102 | 103 | 104 | def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True): 105 | """ 106 | bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. 107 | """ 108 | 109 | num_classes = len(classes) 110 | image_h, image_w, _ = image.shape 111 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] 112 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 113 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) 114 | 115 | random.seed(0) 116 | random.shuffle(colors) 117 | random.seed(None) 118 | 119 | for i, bbox in enumerate(bboxes): 120 | coor = np.array(bbox[:4], dtype=np.int32) 121 | fontScale = 0.5 122 | score = bbox[4] 123 | class_ind = int(bbox[5]) 124 | bbox_color = colors[class_ind] 125 | bbox_thick = int(0.6 * (image_h + image_w) / 600) 126 | c1, c2 = (coor[0], coor[1]), (coor[2], coor[3]) 127 | cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) 128 | 129 | if show_label: 130 | bbox_mess = '%s: %.2f' % (classes[class_ind], score) 131 | t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0] 132 | cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled 133 | 134 | cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX, 135 | fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA) 136 | 137 | return image 138 | 139 | 140 | 141 | def bboxes_iou(boxes1, boxes2): 142 | 143 | boxes1 = np.array(boxes1) 144 | boxes2 = np.array(boxes2) 145 | 146 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) 147 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) 148 | 149 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 150 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 151 | 152 | inter_section = np.maximum(right_down - left_up, 0.0) 153 | inter_area = inter_section[..., 0] * inter_section[..., 1] 154 | union_area = boxes1_area + boxes2_area - inter_area 155 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) 156 | 157 | return ious 158 | 159 | 160 | def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): 161 | """ 162 | :param bboxes: (xmin, ymin, xmax, ymax, score, class) 163 | 164 | Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf 165 | https://github.com/bharatsingh430/soft-nms 166 | """ 167 | classes_in_img = list(set(bboxes[:, 5])) 168 | best_bboxes = [] 169 | 170 | for cls in classes_in_img: 171 | cls_mask = (bboxes[:, 5] == cls) 172 | cls_bboxes = bboxes[cls_mask] 173 | 174 | while len(cls_bboxes) > 0: 175 | max_ind = np.argmax(cls_bboxes[:, 4]) 176 | best_bbox = cls_bboxes[max_ind] 177 | best_bboxes.append(best_bbox) 178 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) 179 | iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) 180 | weight = np.ones((len(iou),), dtype=np.float32) 181 | 182 | assert method in ['nms', 'soft-nms'] 183 | 184 | if method == 'nms': 185 | iou_mask = iou > iou_threshold 186 | weight[iou_mask] = 0.0 187 | 188 | if method == 'soft-nms': 189 | weight = np.exp(-(1.0 * iou ** 2 / sigma)) 190 | 191 | cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight 192 | score_mask = cls_bboxes[:, 4] > 0. 193 | cls_bboxes = cls_bboxes[score_mask] 194 | 195 | return best_bboxes 196 | 197 | 198 | def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold): 199 | 200 | valid_scale=[0, np.inf] 201 | pred_bbox = np.array(pred_bbox) 202 | 203 | pred_xywh = pred_bbox[:, 0:4] 204 | pred_conf = pred_bbox[:, 4] 205 | pred_prob = pred_bbox[:, 5:] 206 | 207 | # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax) 208 | pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5, 209 | pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1) 210 | # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) 211 | org_h, org_w = org_img_shape 212 | resize_ratio = min(input_size / org_w, input_size / org_h) 213 | 214 | dw = (input_size - resize_ratio * org_w) / 2 215 | dh = (input_size - resize_ratio * org_h) / 2 216 | 217 | pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio 218 | pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio 219 | 220 | # # (3) clip some boxes those are out of range 221 | pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), 222 | np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) 223 | invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) 224 | pred_coor[invalid_mask] = 0 225 | 226 | # # (4) discard some invalid boxes 227 | bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) 228 | scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) 229 | 230 | # # (5) discard some boxes with low scores 231 | classes = np.argmax(pred_prob, axis=-1) 232 | scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] 233 | score_mask = scores > score_threshold 234 | mask = np.logical_and(scale_mask, score_mask) 235 | coors, scores, classes = pred_coor[mask], scores[mask], classes[mask] 236 | 237 | return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1) 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /core/yolov3.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | # ================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : yolov3.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-12 13:47:10 10 | # Description : 11 | # 12 | # ================================================================ 13 | 14 | import numpy as np 15 | import tensorflow as tf 16 | import core.utils as utils 17 | import core.common as common 18 | import core.backbone as backbone 19 | from core.config import cfg 20 | 21 | 22 | NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) 23 | ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) 24 | STRIDES = np.array(cfg.YOLO.STRIDES) 25 | IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH 26 | 27 | 28 | def YOLOv3(input_layer): 29 | route_1, route_2, conv = backbone.darknet53(input_layer) 30 | 31 | conv = common.convolutional(conv, (1, 1, 1024, 512)) 32 | conv = common.convolutional(conv, (3, 3, 512, 1024)) 33 | conv = common.convolutional(conv, (1, 1, 1024, 512)) 34 | conv = common.convolutional(conv, (3, 3, 512, 1024)) 35 | conv = common.convolutional(conv, (1, 1, 1024, 512)) 36 | 37 | conv_lobj_branch = common.convolutional(conv, (3, 3, 512, 1024)) 38 | conv_lbbox = common.convolutional( 39 | conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False) 40 | 41 | conv = common.convolutional(conv, (1, 1, 512, 256)) 42 | conv = common.upsample(conv) 43 | 44 | conv = tf.concat([conv, route_2], axis=-1) 45 | 46 | conv = common.convolutional(conv, (1, 1, 768, 256)) 47 | conv = common.convolutional(conv, (3, 3, 256, 512)) 48 | conv = common.convolutional(conv, (1, 1, 512, 256)) 49 | conv = common.convolutional(conv, (3, 3, 256, 512)) 50 | conv = common.convolutional(conv, (1, 1, 512, 256)) 51 | 52 | conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512)) 53 | conv_mbbox = common.convolutional( 54 | conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False) 55 | 56 | conv = common.convolutional(conv, (1, 1, 256, 128)) 57 | conv = common.upsample(conv) 58 | 59 | conv = tf.concat([conv, route_1], axis=-1) 60 | 61 | conv = common.convolutional(conv, (1, 1, 384, 128)) 62 | conv = common.convolutional(conv, (3, 3, 128, 256)) 63 | conv = common.convolutional(conv, (1, 1, 256, 128)) 64 | conv = common.convolutional(conv, (3, 3, 128, 256)) 65 | conv = common.convolutional(conv, (1, 1, 256, 128)) 66 | 67 | conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256)) 68 | conv_sbbox = common.convolutional( 69 | conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS + 5)), activate=False, bn=False) 70 | 71 | return [conv_sbbox, conv_mbbox, conv_lbbox] 72 | 73 | 74 | def decode(conv_output, i=0): 75 | """ 76 | return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] 77 | contains (x, y, w, h, score, probability) 78 | """ 79 | 80 | conv_shape = tf.shape(conv_output) 81 | batch_size = conv_shape[0] 82 | output_size = conv_shape[1] 83 | 84 | conv_output = tf.reshape( 85 | conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) 86 | 87 | conv_raw_dxdy = conv_output[:, :, :, :, 0:2] 88 | conv_raw_dwdh = conv_output[:, :, :, :, 2:4] 89 | conv_raw_conf = conv_output[:, :, :, :, 4:5] 90 | conv_raw_prob = conv_output[:, :, :, :, 5:] 91 | 92 | y = tf.tile(tf.range(output_size, dtype=tf.int32) 93 | [:, tf.newaxis], [1, output_size]) 94 | x = tf.tile(tf.range(output_size, dtype=tf.int32) 95 | [tf.newaxis, :], [output_size, 1]) 96 | 97 | xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1) 98 | xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [ 99 | batch_size, 1, 1, 3, 1]) 100 | xy_grid = tf.cast(xy_grid, tf.float32) 101 | 102 | pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i] 103 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i] 104 | pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1) 105 | 106 | pred_conf = tf.sigmoid(conv_raw_conf) 107 | pred_prob = tf.sigmoid(conv_raw_prob) 108 | 109 | return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1) 110 | 111 | 112 | def bbox_iou(boxes1, boxes2): 113 | 114 | boxes1_area = boxes1[..., 2] * boxes1[..., 3] 115 | boxes2_area = boxes2[..., 2] * boxes2[..., 3] 116 | 117 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, 118 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) 119 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, 120 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) 121 | 122 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) 123 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) 124 | 125 | inter_section = tf.maximum(right_down - left_up, 0.0) 126 | inter_area = inter_section[..., 0] * inter_section[..., 1] 127 | union_area = boxes1_area + boxes2_area - inter_area 128 | 129 | return 1.0 * inter_area / union_area 130 | 131 | 132 | def bbox_giou(boxes1, boxes2): 133 | 134 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, 135 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) 136 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, 137 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) 138 | 139 | boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]), 140 | tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1) 141 | boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]), 142 | tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1) 143 | 144 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * \ 145 | (boxes1[..., 3] - boxes1[..., 1]) 146 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * \ 147 | (boxes2[..., 3] - boxes2[..., 1]) 148 | 149 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) 150 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) 151 | 152 | inter_section = tf.maximum(right_down - left_up, 0.0) 153 | inter_area = inter_section[..., 0] * inter_section[..., 1] 154 | union_area = boxes1_area + boxes2_area - inter_area 155 | iou = inter_area / union_area 156 | 157 | enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2]) 158 | enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:]) 159 | enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0) 160 | enclose_area = enclose[..., 0] * enclose[..., 1] 161 | giou = iou - 1.0 * (enclose_area - union_area) / enclose_area 162 | 163 | return giou 164 | 165 | 166 | def compute_loss(pred, conv, label, bboxes, i=0): 167 | 168 | conv_shape = tf.shape(conv) 169 | batch_size = conv_shape[0] 170 | output_size = conv_shape[1] 171 | input_size = STRIDES[i] * output_size 172 | conv = tf.reshape(conv, (batch_size, output_size, 173 | output_size, 3, 5 + NUM_CLASS)) 174 | 175 | conv_raw_conf = conv[:, :, :, :, 4:5] 176 | conv_raw_prob = conv[:, :, :, :, 5:] 177 | 178 | pred_xywh = pred[:, :, :, :, 0:4] 179 | pred_conf = pred[:, :, :, :, 4:5] 180 | 181 | label_xywh = label[:, :, :, :, 0:4] 182 | respond_bbox = label[:, :, :, :, 4:5] 183 | label_prob = label[:, :, :, :, 5:] 184 | 185 | giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1) 186 | input_size = tf.cast(input_size, tf.float32) 187 | 188 | bbox_loss_scale = 2.0 - 1.0 * \ 189 | label_xywh[:, :, :, :, 2:3] * \ 190 | label_xywh[:, :, :, :, 3:4] / (input_size ** 2) 191 | giou_loss = respond_bbox * bbox_loss_scale * (1 - giou) 192 | 193 | iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], 194 | bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) 195 | max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1) 196 | 197 | respond_bgd = (1.0 - respond_bbox) * \ 198 | tf.cast(max_iou < IOU_LOSS_THRESH, tf.float32) 199 | 200 | conf_focal = tf.pow(respond_bbox - pred_conf, 2) 201 | 202 | conf_loss = conf_focal * ( 203 | respond_bbox * 204 | tf.nn.sigmoid_cross_entropy_with_logits( 205 | labels=respond_bbox, logits=conv_raw_conf) 206 | + 207 | respond_bgd * 208 | tf.nn.sigmoid_cross_entropy_with_logits( 209 | labels=respond_bbox, logits=conv_raw_conf) 210 | ) 211 | 212 | prob_loss = respond_bbox * \ 213 | tf.nn.sigmoid_cross_entropy_with_logits( 214 | labels=label_prob, logits=conv_raw_prob) 215 | 216 | giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1, 2, 3, 4])) 217 | conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4])) 218 | prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4])) 219 | 220 | return giou_loss, conf_loss, prob_loss 221 | -------------------------------------------------------------------------------- /data/anchors/basline_anchors.txt: -------------------------------------------------------------------------------- 1 | 1.25,1.625, 2.0,3.75, 4.125,2.875, 1.875,3.8125, 3.875,2.8125, 3.6875,7.4375, 3.625,2.8125, 4.875,6.1875, 11.65625,10.1875 2 | -------------------------------------------------------------------------------- /data/classes/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /data/classes/yymnist.names: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | -------------------------------------------------------------------------------- /data/dataset/yymnist_test.txt: -------------------------------------------------------------------------------- 1 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000001.jpg 122,187,136,201,7 71,209,85,223,4 78,7,100,29,2 366,244,394,272,3 344,383,372,411,3 238,243,294,299,8 24,233,136,345,8 2 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000002.jpg 343,279,357,293,4 258,168,280,190,7 277,354,333,410,6 5,24,61,80,3 124,227,152,255,9 140,285,224,369,3 105,42,217,154,4 3 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000003.jpg 285,100,313,128,2 99,16,183,100,1 4 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000004.jpg 365,140,387,162,0 70,332,92,354,1 278,373,292,387,0 341,14,383,56,0 10,143,52,185,8 214,109,242,137,1 112,282,140,310,0 312,130,368,186,0 180,180,236,236,3 271,192,383,304,2 20,10,132,122,4 66,147,150,231,4 5 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000005.jpg 194,13,216,35,9 126,34,168,76,2 80,183,108,211,6 341,332,397,388,6 6 | /home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000006.jpg 251,179,265,193,9 22,43,36,57,8 315,132,337,154,4 155,372,197,414,4 239,18,295,74,1 89,53,131,95,1 81,296,165,380,1 208,257,320,369,9 86,173,170,257,7 7 | -------------------------------------------------------------------------------- /docs/01554.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/01554.jpg -------------------------------------------------------------------------------- /docs/01567.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/01567.jpg -------------------------------------------------------------------------------- /docs/kite.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/kite.jpg -------------------------------------------------------------------------------- /docs/kite_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/kite_result.jpg -------------------------------------------------------------------------------- /docs/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/loss.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.15.1 2 | Pillow==6.2.0 3 | scipy==1.1.0 4 | wget==3.2 5 | seaborn==0.9.0 6 | easydict==1.9 7 | tensorflow==2.0.0b0 8 | -------------------------------------------------------------------------------- /docs/road.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov3_tf2/9f01118426e709d59aff00afc83d37504f5fab0d/docs/road.mp4 -------------------------------------------------------------------------------- /image_demo.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : image_demo.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-12 13:07:27 10 | # Description : 11 | # 12 | #================================================================ 13 | 14 | import cv2 15 | import numpy as np 16 | import core.utils as utils 17 | import tensorflow as tf 18 | from core.yolov3 import YOLOv3, decode 19 | from PIL import Image 20 | from core.config import cfg 21 | 22 | input_size = 416 23 | NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) 24 | image_path = "./docs/kite.jpg" 25 | 26 | input_layer = tf.keras.layers.Input([input_size, input_size, 3]) 27 | feature_maps = YOLOv3(input_layer) 28 | 29 | original_image = cv2.imread(image_path) 30 | original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) 31 | original_image_size = original_image.shape[:2] 32 | 33 | image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) 34 | image_data = image_data[np.newaxis, ...].astype(np.float32) 35 | 36 | bbox_tensors = [] 37 | for i, fm in enumerate(feature_maps): 38 | bbox_tensor = decode(fm, i) 39 | bbox_tensors.append(tf.reshape(bbox_tensor, (-1, 5+NUM_CLASS))) 40 | 41 | bbox_tensors = tf.concat(bbox_tensors, axis=0) 42 | model = tf.keras.Model(input_layer, bbox_tensors) 43 | utils.load_weights(model, "./yolov3.weights") 44 | model.summary() 45 | 46 | pred_bbox = model.predict(image_data) 47 | bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) 48 | bboxes = utils.nms(bboxes, 0.45, method='nms') 49 | 50 | image = utils.draw_bbox(original_image, bboxes) 51 | image = Image.fromarray(image) 52 | image.show() 53 | 54 | 55 | -------------------------------------------------------------------------------- /scripts/convert_coco_to_trainlist.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | convert coco to trainlist 4 | 5 | 6 | """ 7 | from pycocotools.coco import COCO 8 | from pycocotools.cocoeval import COCOeval 9 | from pycocotools import mask as COCOmask 10 | import numpy as np 11 | import cv2 12 | import matplotlib.pyplot as plt 13 | import skimage.io as io 14 | import random 15 | import fire 16 | 17 | 18 | def drawbox(img, box): 19 | (img_h, img_w, img_c) = img.shape 20 | (x, y, w, h) = box 21 | p0 = (int((x - w/2) * img_w), int((y - h/2) * img_h)) 22 | p1 = (int((x + w/2) * img_w), int((y + h/2) * img_h)) 23 | cv2.rectangle(img, p0, p1, (255, 255, 255), 1) 24 | 25 | 26 | def convert_boxes_labels(ct_boxes, ratio): 27 | boxlab = np.zeros((len(ct_boxes) * 6 + 1), np.float32) 28 | for i in range(len(ct_boxes)): 29 | box = ct_boxes[i] 30 | boxlab[i * 6 + 0] = 0 31 | boxlab[i * 6 + 1: i * 6 + 5] = box 32 | boxlab[i * 6 + 5] = i + 1 33 | boxlab[len(ct_boxes) * 6 + 0] = ratio 34 | return boxlab 35 | 36 | 37 | def resize_mask(mask, size): 38 | (dst_h, dst_w) = size 39 | mk_resize = np.zeros((dst_h, dst_w, 1), np.uint8) 40 | (img_h, img_w, img_c) = mask.shape 41 | for r in range(dst_h): 42 | for c in range(dst_w): 43 | (o_r, o_c) = (int(1.0 * img_h / dst_h * r), int(1.0 * img_w / dst_w * c)) 44 | mk_resize[r, c, 0] = mask[o_r, o_c, 0] 45 | return mk_resize 46 | 47 | 48 | def run(coco_dir): 49 | (dst_h, dst_w) = [320, 320] 50 | dataTypes = ['train', 'val'] 51 | years = ['2014', '2017'] 52 | 53 | # dataTypes = ['val'] 54 | # years = ['2017'] 55 | 56 | dataDir = coco_dir 57 | for dataType in dataTypes: 58 | for year in years: 59 | imageSet = dataType + year 60 | ann_path = '{}/annotations/instances_{}.json'.format( 61 | dataDir, imageSet) 62 | coco = COCO(ann_path) 63 | 64 | target_f = 'coco_{}_{}.txt'.format(dataType, year) 65 | print('generating {}'.format(target_f)) 66 | target = open(target_f, 'w') 67 | 68 | # display COCO categories and supercategories 69 | cats = coco.loadCats(coco.getCatIds()) 70 | imgIds = coco.getImgIds() 71 | 72 | random.shuffle(imgIds) 73 | count_id = 0 74 | for imgId in imgIds: 75 | count_id = count_id+1 76 | img = coco.loadImgs(imgId)[0] 77 | 78 | img_path = '%s/%s' % (imageSet, img['file_name']) 79 | print(img_path) 80 | 81 | annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None) 82 | anns = coco.loadAnns(annIds) 83 | 84 | anno_part = '' 85 | for ann in anns: 86 | box = ann['bbox'] 87 | box = [str(i) for i in box] 88 | label_id = str(ann['category_id']) 89 | one_part = ','.join(box + [label_id]) 90 | anno_part += ' {}'.format(one_part) 91 | 92 | one_line = '{}{}\n'.format(img_path, anno_part) 93 | target.write(one_line) 94 | print('done!') 95 | 96 | 97 | if __name__ == '__main__': 98 | fire.Fire(run) 99 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | # ================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : test.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-19 10:29:34 10 | # Description : 11 | # 12 | # ================================================================ 13 | 14 | import cv2 15 | import os 16 | import shutil 17 | import numpy as np 18 | import tensorflow as tf 19 | import core.utils as utils 20 | from core.config import cfg 21 | from core.yolov3 import YOLOv3, decode 22 | 23 | 24 | INPUT_SIZE = 416 25 | NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) 26 | CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) 27 | 28 | predicted_dir_path = '../mAP/predicted' 29 | ground_truth_dir_path = '../mAP/ground-truth' 30 | if os.path.exists(predicted_dir_path): 31 | shutil.rmtree(predicted_dir_path) 32 | if os.path.exists(ground_truth_dir_path): 33 | shutil.rmtree(ground_truth_dir_path) 34 | if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): 35 | shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) 36 | 37 | os.mkdir(predicted_dir_path) 38 | os.mkdir(ground_truth_dir_path) 39 | os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) 40 | 41 | # Build Model 42 | input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3]) 43 | feature_maps = YOLOv3(input_layer) 44 | 45 | bbox_tensors = [] 46 | for i, fm in enumerate(feature_maps): 47 | bbox_tensor = decode(fm, i) 48 | bbox_tensors.append(tf.reshape(bbox_tensor, (-1, 5+NUM_CLASS))) 49 | 50 | bbox_tensors = tf.concat(bbox_tensors, axis=0) 51 | model = tf.keras.Model(input_layer, bbox_tensors) 52 | model.load_weights("./yolov3") 53 | 54 | with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: 55 | for num, line in enumerate(annotation_file): 56 | annotation = line.strip().split() 57 | image_path = annotation[0] 58 | image_name = image_path.split('/')[-1] 59 | image = cv2.imread(image_path) 60 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 61 | bbox_data_gt = np.array([list(map(int, box.split(','))) 62 | for box in annotation[1:]]) 63 | 64 | if len(bbox_data_gt) == 0: 65 | bboxes_gt = [] 66 | classes_gt = [] 67 | else: 68 | bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] 69 | ground_truth_path = os.path.join( 70 | ground_truth_dir_path, str(num) + '.txt') 71 | 72 | print('=> ground truth of %s:' % image_name) 73 | num_bbox_gt = len(bboxes_gt) 74 | with open(ground_truth_path, 'w') as f: 75 | for i in range(num_bbox_gt): 76 | class_name = CLASSES[classes_gt[i]] 77 | xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) 78 | bbox_mess = ' '.join( 79 | [class_name, xmin, ymin, xmax, ymax]) + '\n' 80 | f.write(bbox_mess) 81 | print('\t' + str(bbox_mess).strip()) 82 | print('=> predict result of %s:' % image_name) 83 | predict_result_path = os.path.join( 84 | predicted_dir_path, str(num) + '.txt') 85 | # Predict Process 86 | image_size = image.shape[:2] 87 | image_data = utils.image_preporcess( 88 | np.copy(image), [INPUT_SIZE, INPUT_SIZE]) 89 | image_data = image_data[np.newaxis, ...].astype(np.float32) 90 | 91 | pred_bbox = model.predict(image_data) 92 | bboxes = utils.postprocess_boxes( 93 | pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) 94 | bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') 95 | 96 | if cfg.TEST.DECTECTED_IMAGE_PATH is not None: 97 | image = utils.draw_bbox(image, bboxes) 98 | cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH+image_name, image) 99 | 100 | with open(predict_result_path, 'w') as f: 101 | for bbox in bboxes: 102 | coor = np.array(bbox[:4], dtype=np.int32) 103 | score = bbox[4] 104 | class_ind = int(bbox[5]) 105 | class_name = CLASSES[class_ind] 106 | score = '%.4f' % score 107 | xmin, ymin, xmax, ymax = list(map(str, coor)) 108 | bbox_mess = ' '.join( 109 | [class_name, score, xmin, ymin, xmax, ymax]) + '\n' 110 | f.write(bbox_mess) 111 | print('\t' + str(bbox_mess).strip()) 112 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | # ================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : train.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-18 09:18:54 10 | # Description : 11 | # 12 | # ================================================================ 13 | 14 | import os 15 | import time 16 | import shutil 17 | import numpy as np 18 | import tensorflow as tf 19 | import core.utils as utils 20 | from core.dataset import Dataset 21 | from core.yolov3 import YOLOv3, decode, compute_loss 22 | from core.config import cfg 23 | 24 | trainset = Dataset('train') 25 | logdir = "./data/log" 26 | steps_per_epoch = len(trainset) 27 | global_steps = tf.Variable(1, trainable=False, dtype=tf.int64) 28 | warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch 29 | total_steps = cfg.TRAIN.EPOCHS * steps_per_epoch 30 | 31 | input_tensor = tf.keras.layers.Input([416, 416, 3]) 32 | conv_tensors = YOLOv3(input_tensor) 33 | 34 | output_tensors = [] 35 | for i, conv_tensor in enumerate(conv_tensors): 36 | pred_tensor = decode(conv_tensor, i) 37 | output_tensors.append(conv_tensor) 38 | output_tensors.append(pred_tensor) 39 | 40 | model = tf.keras.Model(input_tensor, output_tensors) 41 | optimizer = tf.keras.optimizers.Adam() 42 | if os.path.exists(logdir): 43 | shutil.rmtree(logdir) 44 | writer = tf.summary.create_file_writer(logdir) 45 | 46 | 47 | def train_step(image_data, target): 48 | with tf.GradientTape() as tape: 49 | pred_result = model(image_data, training=True) 50 | giou_loss = conf_loss = prob_loss = 0 51 | 52 | # optimizing process 53 | for i in range(3): 54 | conv, pred = pred_result[i*2], pred_result[i*2+1] 55 | loss_items = compute_loss(pred, conv, *target[i], i) 56 | giou_loss += loss_items[0] 57 | conf_loss += loss_items[1] 58 | prob_loss += loss_items[2] 59 | 60 | total_loss = giou_loss + conf_loss + prob_loss 61 | 62 | gradients = tape.gradient(total_loss, model.trainable_variables) 63 | optimizer.apply_gradients(zip(gradients, model.trainable_variables)) 64 | tf.print("=> STEP %4d lr: %.6f giou_loss: %4.2f conf_loss: %4.2f " 65 | "prob_loss: %4.2f total_loss: %4.2f" % (global_steps, optimizer.lr.numpy(), 66 | giou_loss, conf_loss, 67 | prob_loss, total_loss)) 68 | # update learning rate 69 | global_steps.assign_add(1) 70 | if global_steps < warmup_steps: 71 | lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT 72 | else: 73 | lr = cfg.TRAIN.LR_END + 0.5 * (cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ( 74 | (1 + tf.cos((global_steps - warmup_steps) / 75 | (total_steps - warmup_steps) * np.pi)) 76 | ) 77 | optimizer.lr.assign(lr.numpy()) 78 | 79 | # writing summary data 80 | with writer.as_default(): 81 | tf.summary.scalar("lr", optimizer.lr, step=global_steps) 82 | tf.summary.scalar("loss/total_loss", total_loss, step=global_steps) 83 | tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps) 84 | tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps) 85 | tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps) 86 | writer.flush() 87 | 88 | 89 | for epoch in range(cfg.TRAIN.EPOCHS): 90 | for image_data, target in trainset: 91 | train_step(image_data, target) 92 | model.save_weights("./yolov3") 93 | -------------------------------------------------------------------------------- /video_demo.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : video_demo.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-12 19:36:53 10 | # Description : 11 | # 12 | #================================================================ 13 | 14 | import cv2 15 | import time 16 | import numpy as np 17 | import core.utils as utils 18 | import tensorflow as tf 19 | from core.yolov3 import YOLOv3, decode 20 | 21 | 22 | video_path = "./docs/road.mp4" 23 | # video_path = 0 24 | num_classes = 80 25 | input_size = 416 26 | 27 | input_layer = tf.keras.layers.Input([input_size, input_size, 3]) 28 | feature_maps = YOLOv3(input_layer) 29 | 30 | bbox_tensors = [] 31 | for i, fm in enumerate(feature_maps): 32 | bbox_tensor = decode(fm, i) 33 | bbox_tensors.append(tf.reshape(bbox_tensor, (-1, 5+num_classes))) 34 | 35 | bbox_tensors = tf.concat(bbox_tensors, axis=0) 36 | model = tf.keras.Model(input_layer, bbox_tensors) 37 | utils.load_weights(model, "./yolov3.weights") 38 | model.summary() 39 | vid = cv2.VideoCapture(video_path) 40 | while True: 41 | return_value, frame = vid.read() 42 | if return_value: 43 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 44 | else: 45 | raise ValueError("No image!") 46 | frame_size = frame.shape[:2] 47 | image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size]) 48 | image_data = image_data[np.newaxis, ...].astype(np.float32) 49 | prev_time = time.time() 50 | 51 | pred_bbox = model.predict(image_data) 52 | bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3) 53 | bboxes = utils.nms(bboxes, 0.45, method='nms') 54 | image = utils.draw_bbox(frame, bboxes) 55 | 56 | curr_time = time.time() 57 | exec_time = curr_time - prev_time 58 | result = np.asarray(image) 59 | info = "time: %.2f ms" %(1000*exec_time) 60 | cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX, 61 | fontScale=1, color=(255, 0, 0), thickness=2) 62 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) 63 | result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 64 | cv2.imshow("result", result) 65 | if cv2.waitKey(1) & 0xFF == ord('q'): break 66 | 67 | --------------------------------------------------------------------------------