├── .gitignore ├── Dockerfile ├── README.md ├── convert_weights_pb.py ├── requirements.txt ├── tflite_example.py ├── utils.py ├── yolo_v3.py └── yolo_v3_tiny.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # vim tmp file 107 | *~ 108 | 109 | # PyCharm 110 | .idea/ 111 | 112 | *.jpg 113 | *.png 114 | coco.names 115 | *.weights 116 | 117 | # VSCode 118 | .vscode/ 119 | 120 | # Tensorflow 121 | checkpoint 122 | *.ckpt.* 123 | 124 | .DS_Store 125 | saved_model 126 | saved_model/* 127 | .idea 128 | .idea/* 129 | .keras 130 | .keras/* 131 | .config 132 | .config/* 133 | .bash_history 134 | .bash_history/* 135 | .wget-hsts -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:1.14.0-py3 2 | 3 | ADD . /root 4 | WORKDIR /root 5 | 6 | RUN apt-get autoclean 7 | RUN apt-get update 8 | RUN apt-get -y install wget 9 | RUN pip3 install --upgrade pip 10 | RUN pip3 install -r requirements.txt 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tensorflow-lite-yolo-v3 2 | 3 | Convert the weights of YOLO v3 object detector into tensorflow lite format. It can be served for tensorflow serving as well. 4 | 5 | ## Setup env 6 | docker build -t tflite . 7 | docker run -it -v /home/peace195/tensorflow-lite-yolo-v3:/root/ tflite 8 | 9 | ## How to run 10 | 11 | 1. Download COCO class names file: `wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names` 12 | 2. Download binary file with desired weights: 13 | - Full weights: `wget https://pjreddie.com/media/files/yolov3.weights` 14 | - Tiny weights: `wget https://pjreddie.com/media/files/yolov3-tiny.weights` 15 | - SPP weights: `wget https://pjreddie.com/media/files/yolov3-spp.weights` 16 | 3. Convert .weights to .pb saved_model `python ./convert_weights_pb.py` (this can be used for tensorflow serving) 17 | 4. Convert .pb to .tflite `tflite_convert --saved_model_dir=saved_model/ --output_file yolo_v3.tflite --saved_model_signature_key='predict'` 18 | 19 | 20 | Optional Flags 21 | 22 | convert_weights_pb.py: 23 | 24 | --class_names 25 | Path to the class names file 26 | --weights_file 27 | Path to the desired weights file 28 | --data_format 29 | `NCHW` (gpu only) or `NHWC` 30 | --tiny 31 | Use yolov3-tiny 32 | --spp 33 | Use yolov3-spp 34 | --output_graph 35 | Location to write the output .pb graph 36 | 37 | Contact me if you have any issues: binhtd.hust@gmail.com / Binh Do -------------------------------------------------------------------------------- /convert_weights_pb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import yolo_v3 6 | import yolo_v3_tiny 7 | from PIL import Image, ImageDraw 8 | 9 | from utils import load_weights, load_coco_names, detections_boxes, savepb 10 | 11 | FLAGS = tf.app.flags.FLAGS 12 | 13 | tf.app.flags.DEFINE_string( 14 | 'class_names', 'coco.names', 'File with class names') 15 | tf.app.flags.DEFINE_string( 16 | 'weights_file', 'yolov3.weights', 'Binary file with detector weights') 17 | tf.app.flags.DEFINE_string( 18 | 'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC') 19 | tf.app.flags.DEFINE_string( 20 | 'output_graph', 'saved_model', 'Saved model tensorflow protobuf model output path') 21 | 22 | tf.app.flags.DEFINE_bool( 23 | 'tiny', False, 'Use tiny version of YOLOv3') 24 | tf.app.flags.DEFINE_bool( 25 | 'spp', False, 'Use SPP version of YOLOv3') 26 | tf.app.flags.DEFINE_integer( 27 | 'size', 416, 'Image size') 28 | 29 | 30 | 31 | def main(argv=None): 32 | if FLAGS.tiny: 33 | model = yolo_v3_tiny.yolo_v3_tiny 34 | elif FLAGS.spp: 35 | model = yolo_v3.yolo_v3_spp 36 | else: 37 | model = yolo_v3.yolo_v3 38 | 39 | classes = load_coco_names(FLAGS.class_names) 40 | 41 | # placeholder for detector inputs 42 | inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3], "inputs") 43 | 44 | with tf.variable_scope('detector'): 45 | detections = model(inputs, len(classes), data_format=FLAGS.data_format) 46 | load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file) 47 | 48 | # Sets the output nodes in the current session 49 | boxes = detections_boxes(detections) 50 | 51 | with tf.Session() as sess: 52 | sess.run(load_ops) 53 | savepb(sess, FLAGS.output_graph) 54 | 55 | if __name__ == '__main__': 56 | tf.app.run() 57 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.2.4 2 | numpy==1.16.1 3 | scipy==1.1.0 4 | pandas==0.24.1 5 | pillow==5.4.1 6 | scikit-image==0.14.2 7 | sklearn -------------------------------------------------------------------------------- /tflite_example.py: -------------------------------------------------------------------------------- 1 | import time 2 | import cv2 3 | import numpy as np 4 | import sys 5 | import os 6 | import tensorflow as tf 7 | from PIL import Image, ImageDraw 8 | 9 | def handle_predictions(predictions, confidence=0.6, iou_threshold=0.5): 10 | boxes = predictions[:, :, :4] 11 | box_confidences = np.expand_dims(predictions[:, :, 4], -1) 12 | box_class_probs = predictions[:, :, 5:] 13 | 14 | box_scores = box_confidences * box_class_probs 15 | box_classes = np.argmax(box_scores, axis=-1) 16 | box_class_scores = np.max(box_scores, axis=-1) 17 | pos = np.where(box_class_scores >= confidence) 18 | 19 | boxes = boxes[pos] 20 | classes = box_classes[pos] 21 | scores = box_class_scores[pos] 22 | 23 | n_boxes, n_classes, n_scores = nms_boxes(boxes, classes, scores, iou_threshold) 24 | 25 | if n_boxes: 26 | boxes = np.concatenate(n_boxes) 27 | classes = np.concatenate(n_classes) 28 | scores = np.concatenate(n_scores) 29 | 30 | return boxes, classes, scores 31 | 32 | else: 33 | return None, None, None 34 | 35 | 36 | def nms_boxes(boxes, classes, scores, iou_threshold): 37 | nboxes, nclasses, nscores = [], [], [] 38 | for c in set(classes): 39 | inds = np.where(classes == c) 40 | b = boxes[inds] 41 | c = classes[inds] 42 | s = scores[inds] 43 | 44 | x = b[:, 0] 45 | y = b[:, 1] 46 | w = b[:, 2] 47 | h = b[:, 3] 48 | 49 | areas = w * h 50 | order = s.argsort()[::-1] 51 | 52 | keep = [] 53 | while order.size > 0: 54 | i = order[0] 55 | keep.append(i) 56 | 57 | xx1 = np.maximum(x[i], x[order[1:]]) 58 | yy1 = np.maximum(y[i], y[order[1:]]) 59 | xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]]) 60 | yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]]) 61 | 62 | w1 = np.maximum(0.0, xx2 - xx1 + 1) 63 | h1 = np.maximum(0.0, yy2 - yy1 + 1) 64 | 65 | inter = w1 * h1 66 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 67 | inds = np.where(ovr <= iou_threshold)[0] 68 | order = order[inds + 1] 69 | 70 | keep = np.array(keep) 71 | 72 | nboxes.append(b[keep]) 73 | nclasses.append(c[keep]) 74 | nscores.append(s[keep]) 75 | return nboxes, nclasses, nscores 76 | 77 | def load_coco_names(file_name): 78 | names = {} 79 | with open(file_name) as f: 80 | for id, name in enumerate(f): 81 | names[id] = name 82 | return names 83 | 84 | def letter_box_image(image: Image.Image, output_height: int, output_width: int, fill_value)-> np.ndarray: 85 | height_ratio = float(output_height)/image.size[1] 86 | width_ratio = float(output_width)/image.size[0] 87 | fit_ratio = min(width_ratio, height_ratio) 88 | fit_height = int(image.size[1] * fit_ratio) 89 | fit_width = int(image.size[0] * fit_ratio) 90 | fit_image = np.asarray(image.resize((fit_width, fit_height), resample=Image.BILINEAR)) 91 | 92 | if isinstance(fill_value, int): 93 | fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype) 94 | 95 | to_return = np.tile(fill_value, (output_height, output_width, 1)) 96 | pad_top = int(0.5 * (output_height - fit_height)) 97 | pad_left = int(0.5 * (output_width - fit_width)) 98 | to_return[pad_top:pad_top+fit_height, pad_left:pad_left+fit_width] = fit_image 99 | return to_return 100 | 101 | 102 | def draw_boxes(boxes, classes, scores, img, cls_names, detection_size, is_letter_box_image): 103 | draw = ImageDraw.Draw(img) 104 | 105 | color = tuple(np.random.randint(0, 256, 3)) 106 | for box, score, cls in zip(boxes, scores, classes): 107 | box = convert_to_original_size(box, np.array(detection_size), 108 | np.array(img.size), 109 | is_letter_box_image) 110 | draw.rectangle(box, outline=color) 111 | draw.text(box[:2], '{} {:.2f}%'.format( 112 | cls_names[cls], score * 100), fill=color) 113 | 114 | 115 | def convert_to_original_size(box, size, original_size, is_letter_box_image): 116 | if is_letter_box_image: 117 | box = box.reshape(2, 2) 118 | box[0, :] = letter_box_pos_to_original_pos(box[0, :], size, original_size) 119 | box[1, :] = letter_box_pos_to_original_pos(box[1, :], size, original_size) 120 | else: 121 | ratio = original_size / size 122 | box = box.reshape(2, 2) * ratio 123 | return list(box.reshape(-1)) 124 | 125 | 126 | def letter_box_pos_to_original_pos(letter_pos, current_size, ori_image_size)-> np.ndarray: 127 | letter_pos = np.asarray(letter_pos, dtype=np.float) 128 | current_size = np.asarray(current_size, dtype=np.float) 129 | ori_image_size = np.asarray(ori_image_size, dtype=np.float) 130 | final_ratio = min(current_size[0]/ori_image_size[0], current_size[1]/ori_image_size[1]) 131 | pad = 0.5 * (current_size - final_ratio * ori_image_size) 132 | pad = pad.astype(np.int32) 133 | to_return_pos = (letter_pos - pad) / final_ratio 134 | return to_return_pos 135 | 136 | model_path = os.path.join(os.getcwd(), 'yolo_v3.tflite') 137 | interpreter = tf.contrib.lite.Interpreter(model_path=model_path) 138 | interpreter.allocate_tensors() 139 | 140 | input_details = interpreter.get_input_details() 141 | output_details = interpreter.get_output_details() 142 | print(output_details) 143 | if input_details[0]['dtype'] == np.float32: 144 | floating_model = True 145 | 146 | height = input_details[0]['shape'][1] 147 | width = input_details[0]['shape'][2] 148 | 149 | img = Image.open('example.jpg') 150 | img_resized = letter_box_image(img, height, width, 128) 151 | img_resized = img_resized.astype(np.float32) 152 | 153 | interpreter.set_tensor(input_details[0]['index'], np.expand_dims(img_resized, 0)) 154 | interpreter.invoke() 155 | predictions = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] 156 | boxes, classes, scores = handle_predictions(predictions[0], 157 | confidence=0.3, 158 | iou_threshold=0.5) 159 | class_names = load_coco_names("coco.names") 160 | draw_boxes(boxes, classes, scores, img, class_names, (height, width), True) 161 | img.save("output.jpg") 162 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from PIL import ImageDraw, Image 6 | 7 | 8 | def load_graph(frozen_graph_filename): 9 | 10 | with tf.gfile.GFile(frozen_graph_filename, "rb") as f: 11 | graph_def = tf.GraphDef() 12 | graph_def.ParseFromString(f.read()) 13 | 14 | with tf.Graph().as_default() as graph: 15 | tf.import_graph_def(graph_def, name="") 16 | 17 | return graph 18 | 19 | 20 | def savepb(sess, output_graph): 21 | with sess.graph.as_default(): 22 | x_op = sess.graph.get_operation_by_name("inputs") 23 | x = x_op.outputs[0] 24 | pred_op = sess.graph.get_operation_by_name("output_boxes") 25 | pred = pred_op.outputs[0] 26 | 27 | with sess.graph.as_default(): 28 | prediction_signature = tf.saved_model.signature_def_utils.build_signature_def( 29 | inputs={ 30 | "input": tf.saved_model.utils.build_tensor_info(x) 31 | }, 32 | outputs={ 33 | "output": tf.saved_model.utils.build_tensor_info(pred) 34 | }, 35 | method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME 36 | ) 37 | builder = tf.saved_model.builder.SavedModelBuilder(output_graph) 38 | builder.add_meta_graph_and_variables( 39 | sess, [tf.saved_model.tag_constants.SERVING], 40 | signature_def_map={ 41 | "predict": prediction_signature, 42 | }) 43 | builder.save() 44 | 45 | 46 | def load_weights(var_list, weights_file): 47 | """ 48 | Loads and converts pre-trained weights. 49 | :param var_list: list of network variables. 50 | :param weights_file: name of the binary file. 51 | :return: list of assign ops 52 | """ 53 | with open(weights_file, "rb") as fp: 54 | _ = np.fromfile(fp, dtype=np.int32, count=5) 55 | 56 | weights = np.fromfile(fp, dtype=np.float32) 57 | 58 | ptr = 0 59 | i = 0 60 | assign_ops = [] 61 | while i < len(var_list) - 1: 62 | var1 = var_list[i] 63 | var2 = var_list[i + 1] 64 | # do something only if we process conv layer 65 | if 'Conv' in var1.name.split('/')[-2]: 66 | # check type of next layer 67 | if 'BatchNorm' in var2.name.split('/')[-2]: 68 | # load batch norm params 69 | gamma, beta, mean, var = var_list[i + 1:i + 5] 70 | batch_norm_vars = [beta, gamma, mean, var] 71 | for var in batch_norm_vars: 72 | shape = var.shape.as_list() 73 | num_params = np.prod(shape) 74 | var_weights = weights[ptr:ptr + num_params].reshape(shape) 75 | ptr += num_params 76 | assign_ops.append( 77 | tf.assign(var, var_weights, validate_shape=True)) 78 | 79 | # we move the pointer by 4, because we loaded 4 variables 80 | i += 4 81 | elif 'Conv' in var2.name.split('/')[-2]: 82 | # load biases 83 | bias = var2 84 | bias_shape = bias.shape.as_list() 85 | bias_params = np.prod(bias_shape) 86 | bias_weights = weights[ptr:ptr + 87 | bias_params].reshape(bias_shape) 88 | ptr += bias_params 89 | assign_ops.append( 90 | tf.assign(bias, bias_weights, validate_shape=True)) 91 | 92 | # we loaded 1 variable 93 | i += 1 94 | # we can load weights of conv layer 95 | shape = var1.shape.as_list() 96 | num_params = np.prod(shape) 97 | 98 | var_weights = weights[ptr:ptr + num_params].reshape( 99 | (shape[3], shape[2], shape[0], shape[1])) 100 | # remember to transpose to column-major 101 | var_weights = np.transpose(var_weights, (2, 3, 1, 0)) 102 | ptr += num_params 103 | assign_ops.append( 104 | tf.assign(var1, var_weights, validate_shape=True)) 105 | i += 1 106 | 107 | return assign_ops 108 | 109 | 110 | def detections_boxes(detections): 111 | """ 112 | Converts center x, center y, width and height values to coordinates of top left and bottom right points. 113 | 114 | :param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5)) 115 | :return: converted detections of same shape as input 116 | """ 117 | center_x, center_y, width, height, attrs = tf.split( 118 | detections, [1, 1, 1, 1, -1], axis=-1) 119 | w2 = width / 2 120 | h2 = height / 2 121 | x0 = center_x - w2 122 | y0 = center_y - h2 123 | x1 = center_x + w2 124 | y1 = center_y + h2 125 | 126 | boxes = tf.concat([x0, y0, x1, y1], axis=-1) 127 | detections = tf.concat([boxes, attrs], axis=-1, name="output_boxes") 128 | return detections 129 | 130 | 131 | def load_coco_names(file_name): 132 | names = {} 133 | with open(file_name) as f: 134 | for id, name in enumerate(f): 135 | names[id] = name 136 | return names -------------------------------------------------------------------------------- /yolo_v3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | slim = tf.contrib.slim 7 | 8 | _BATCH_NORM_DECAY = 0.9 9 | _BATCH_NORM_EPSILON = 1e-05 10 | _LEAKY_RELU = 0.1 11 | 12 | _ANCHORS = [(10, 13), (16, 30), (33, 23), 13 | (30, 61), (62, 45), (59, 119), 14 | (116, 90), (156, 198), (373, 326)] 15 | 16 | 17 | def darknet53(inputs): 18 | """ 19 | Builds Darknet-53 model. 20 | """ 21 | inputs = _conv2d_fixed_padding(inputs, 32, 3) 22 | inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2) 23 | inputs = _darknet53_block(inputs, 32) 24 | inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2) 25 | 26 | for i in range(2): 27 | inputs = _darknet53_block(inputs, 64) 28 | 29 | inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2) 30 | 31 | for i in range(8): 32 | inputs = _darknet53_block(inputs, 128) 33 | 34 | route_1 = inputs 35 | inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2) 36 | 37 | for i in range(8): 38 | inputs = _darknet53_block(inputs, 256) 39 | 40 | route_2 = inputs 41 | inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2) 42 | 43 | for i in range(4): 44 | inputs = _darknet53_block(inputs, 512) 45 | 46 | return route_1, route_2, inputs 47 | 48 | 49 | def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1): 50 | if strides > 1: 51 | inputs = _fixed_padding(inputs, kernel_size) 52 | inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides, 53 | padding=('SAME' if strides == 1 else 'VALID')) 54 | return inputs 55 | 56 | 57 | def _darknet53_block(inputs, filters): 58 | shortcut = inputs 59 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 60 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 61 | 62 | inputs = inputs + shortcut 63 | return inputs 64 | 65 | 66 | def _spp_block(inputs, data_format='NCHW'): 67 | return tf.concat([slim.max_pool2d(inputs, 13, 1, 'SAME'), 68 | slim.max_pool2d(inputs, 9, 1, 'SAME'), 69 | slim.max_pool2d(inputs, 5, 1, 'SAME'), 70 | inputs], 71 | axis=1 if data_format == 'NCHW' else 3) 72 | 73 | 74 | @tf.contrib.framework.add_arg_scope 75 | def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs): 76 | """ 77 | Pads the input along the spatial dimensions independently of input size. 78 | 79 | Args: 80 | inputs: A tensor of size [batch, channels, height_in, width_in] or 81 | [batch, height_in, width_in, channels] depending on data_format. 82 | kernel_size: The kernel to be used in the conv2d or max_pool2d operation. 83 | Should be a positive integer. 84 | data_format: The input format ('NHWC' or 'NCHW'). 85 | mode: The mode for tf.pad. 86 | 87 | Returns: 88 | A tensor with the same format as the input with the data either intact 89 | (if kernel_size == 1) or padded (if kernel_size > 1). 90 | """ 91 | pad_total = kernel_size - 1 92 | pad_beg = pad_total // 2 93 | pad_end = pad_total - pad_beg 94 | 95 | if kwargs['data_format'] == 'NCHW': 96 | padded_inputs = tf.pad(inputs, [[0, 0], [0, 0], 97 | [pad_beg, pad_end], 98 | [pad_beg, pad_end]], 99 | mode=mode) 100 | else: 101 | padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], 102 | [pad_beg, pad_end], [0, 0]], mode=mode) 103 | return padded_inputs 104 | 105 | 106 | def _yolo_block(inputs, filters, data_format='NCHW', with_spp=False): 107 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 108 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 109 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 110 | 111 | if with_spp: 112 | inputs = _spp_block(inputs, data_format) 113 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 114 | 115 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 116 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 117 | route = inputs 118 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 119 | return route, inputs 120 | 121 | 122 | def _get_size(shape, data_format): 123 | if len(shape) == 4: 124 | shape = shape[1:] 125 | return shape[1:3] if data_format == 'NCHW' else shape[0:2] 126 | 127 | 128 | def _detection_layer(inputs, num_classes, anchors, img_size, data_format): 129 | num_anchors = len(anchors) 130 | predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1, 131 | stride=1, normalizer_fn=None, 132 | activation_fn=None, 133 | biases_initializer=tf.zeros_initializer()) 134 | 135 | shape = predictions.get_shape().as_list() 136 | grid_size = _get_size(shape, data_format) 137 | dim = grid_size[0] * grid_size[1] 138 | bbox_attrs = 5 + num_classes 139 | 140 | if data_format == 'NCHW': 141 | predictions = tf.reshape( 142 | predictions, [-1, num_anchors * bbox_attrs, dim]) 143 | predictions = tf.transpose(predictions, [0, 2, 1]) 144 | 145 | predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs]) 146 | 147 | stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1]) 148 | 149 | anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors] 150 | 151 | box_centers, box_sizes, confidence, classes = tf.split( 152 | predictions, [2, 2, 1, num_classes], axis=-1) 153 | 154 | box_centers = tf.nn.sigmoid(box_centers) 155 | confidence = tf.nn.sigmoid(confidence) 156 | 157 | grid_x = tf.range(grid_size[0], dtype=tf.float32) 158 | grid_y = tf.range(grid_size[1], dtype=tf.float32) 159 | a, b = tf.meshgrid(grid_x, grid_y) 160 | 161 | x_offset = tf.reshape(a, (-1, 1)) 162 | y_offset = tf.reshape(b, (-1, 1)) 163 | 164 | x_y_offset = tf.concat([x_offset, y_offset], axis=-1) 165 | x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2]) 166 | 167 | box_centers = box_centers + x_y_offset 168 | box_centers = box_centers * stride 169 | 170 | anchors = tf.tile(anchors, [dim, 1]) 171 | box_sizes = tf.exp(box_sizes) * anchors 172 | box_sizes = box_sizes * stride 173 | 174 | detections = tf.concat([box_centers, box_sizes, confidence], axis=-1) 175 | 176 | classes = tf.nn.sigmoid(classes) 177 | predictions = tf.concat([detections, classes], axis=-1) 178 | return predictions 179 | 180 | 181 | def _upsample(inputs, out_shape, data_format='NCHW'): 182 | # tf.image.resize_nearest_neighbor accepts input in format NHWC 183 | if data_format == 'NCHW': 184 | inputs = tf.transpose(inputs, [0, 2, 3, 1]) 185 | 186 | if data_format == 'NCHW': 187 | new_height = out_shape[3] 188 | new_width = out_shape[2] 189 | else: 190 | new_height = out_shape[2] 191 | new_width = out_shape[1] 192 | 193 | inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width)) 194 | 195 | # back to NCHW if needed 196 | if data_format == 'NCHW': 197 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 198 | 199 | inputs = tf.identity(inputs, name='upsampled') 200 | return inputs 201 | 202 | 203 | def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False, with_spp=False): 204 | """ 205 | Creates YOLO v3 model. 206 | 207 | :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. 208 | Dimension batch_size may be undefined. The channel order is RGB. 209 | :param num_classes: number of predicted classes. 210 | :param is_training: whether is training or not. 211 | :param data_format: data format NCHW or NHWC. 212 | :param reuse: whether or not the network and its variables should be reused. 213 | :param with_spp: whether or not is using spp layer. 214 | :return: 215 | """ 216 | # it will be needed later on 217 | img_size = inputs.get_shape().as_list()[1:3] 218 | 219 | # transpose the inputs to NCHW 220 | if data_format == 'NCHW': 221 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 222 | 223 | # normalize values to range [0..1] 224 | inputs = inputs / 255 225 | 226 | # set batch norm params 227 | batch_norm_params = { 228 | 'decay': _BATCH_NORM_DECAY, 229 | 'epsilon': _BATCH_NORM_EPSILON, 230 | 'scale': True, 231 | 'is_training': is_training, 232 | 'fused': None, # Use fused batch norm if possible. 233 | } 234 | 235 | # Set activation_fn and parameters for conv2d, batch_norm. 236 | with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse): 237 | with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, 238 | normalizer_params=batch_norm_params, 239 | biases_initializer=None, 240 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)): 241 | with tf.variable_scope('darknet-53'): 242 | route_1, route_2, inputs = darknet53(inputs) 243 | 244 | with tf.variable_scope('yolo-v3'): 245 | route, inputs = _yolo_block(inputs, 512, data_format, with_spp) 246 | 247 | detect_1 = _detection_layer( 248 | inputs, num_classes, _ANCHORS[6:9], img_size, data_format) 249 | detect_1 = tf.identity(detect_1, name='detect_1') 250 | 251 | inputs = _conv2d_fixed_padding(route, 256, 1) 252 | upsample_size = route_2.get_shape().as_list() 253 | inputs = _upsample(inputs, upsample_size, data_format) 254 | inputs = tf.concat([inputs, route_2], 255 | axis=1 if data_format == 'NCHW' else 3) 256 | 257 | route, inputs = _yolo_block(inputs, 256) 258 | 259 | detect_2 = _detection_layer( 260 | inputs, num_classes, _ANCHORS[3:6], img_size, data_format) 261 | detect_2 = tf.identity(detect_2, name='detect_2') 262 | 263 | inputs = _conv2d_fixed_padding(route, 128, 1) 264 | upsample_size = route_1.get_shape().as_list() 265 | inputs = _upsample(inputs, upsample_size, data_format) 266 | inputs = tf.concat([inputs, route_1], 267 | axis=1 if data_format == 'NCHW' else 3) 268 | 269 | _, inputs = _yolo_block(inputs, 128) 270 | 271 | detect_3 = _detection_layer( 272 | inputs, num_classes, _ANCHORS[0:3], img_size, data_format) 273 | detect_3 = tf.identity(detect_3, name='detect_3') 274 | 275 | detections = tf.concat([detect_1, detect_2, detect_3], axis=1) 276 | detections = tf.identity(detections, name='detections') 277 | return detections 278 | 279 | 280 | def yolo_v3_spp(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): 281 | """ 282 | Creates YOLO v3 with SPP model. 283 | 284 | :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. 285 | Dimension batch_size may be undefined. The channel order is RGB. 286 | :param num_classes: number of predicted classes. 287 | :param is_training: whether is training or not. 288 | :param data_format: data format NCHW or NHWC. 289 | :param reuse: whether or not the network and its variables should be reused. 290 | :return: 291 | """ 292 | return yolo_v3(inputs, num_classes, is_training=is_training, data_format=data_format, reuse=reuse, with_spp=True) 293 | -------------------------------------------------------------------------------- /yolo_v3_tiny.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \ 6 | _detection_layer, _upsample 7 | 8 | slim = tf.contrib.slim 9 | 10 | _BATCH_NORM_DECAY = 0.9 11 | _BATCH_NORM_EPSILON = 1e-05 12 | _LEAKY_RELU = 0.1 13 | 14 | _ANCHORS = [(10, 14), (23, 27), (37, 58), 15 | (81, 82), (135, 169), (344, 319)] 16 | 17 | 18 | def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): 19 | """ 20 | Creates YOLO v3 tiny model. 21 | 22 | :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. 23 | Dimension batch_size may be undefined. The channel order is RGB. 24 | :param num_classes: number of predicted classes. 25 | :param is_training: whether is training or not. 26 | :param data_format: data format NCHW or NHWC. 27 | :param reuse: whether or not the network and its variables should be reused. 28 | :return: 29 | """ 30 | # it will be needed later on 31 | img_size = inputs.get_shape().as_list()[1:3] 32 | 33 | # transpose the inputs to NCHW 34 | if data_format == 'NCHW': 35 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 36 | 37 | # normalize values to range [0..1] 38 | inputs = inputs / 255 39 | 40 | # set batch norm params 41 | batch_norm_params = { 42 | 'decay': _BATCH_NORM_DECAY, 43 | 'epsilon': _BATCH_NORM_EPSILON, 44 | 'scale': True, 45 | 'is_training': is_training, 46 | 'fused': None, # Use fused batch norm if possible. 47 | } 48 | 49 | # Set activation_fn and parameters for conv2d, batch_norm. 50 | with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format): 51 | with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse): 52 | with slim.arg_scope([slim.conv2d], 53 | normalizer_fn=slim.batch_norm, 54 | normalizer_params=batch_norm_params, 55 | biases_initializer=None, 56 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)): 57 | 58 | with tf.variable_scope('yolo-v3-tiny'): 59 | for i in range(6): 60 | inputs = _conv2d_fixed_padding( 61 | inputs, 16 * pow(2, i), 3) 62 | 63 | if i == 4: 64 | route_1 = inputs 65 | 66 | if i == 5: 67 | inputs = slim.max_pool2d( 68 | inputs, [2, 2], stride=1, padding="SAME", scope='pool2') 69 | else: 70 | inputs = slim.max_pool2d( 71 | inputs, [2, 2], scope='pool2') 72 | 73 | inputs = _conv2d_fixed_padding(inputs, 1024, 3) 74 | inputs = _conv2d_fixed_padding(inputs, 256, 1) 75 | route_2 = inputs 76 | 77 | inputs = _conv2d_fixed_padding(inputs, 512, 3) 78 | # inputs = _conv2d_fixed_padding(inputs, 255, 1) 79 | 80 | detect_1 = _detection_layer( 81 | inputs, num_classes, _ANCHORS[3:6], img_size, data_format) 82 | detect_1 = tf.identity(detect_1, name='detect_1') 83 | 84 | inputs = _conv2d_fixed_padding(route_2, 128, 1) 85 | upsample_size = route_1.get_shape().as_list() 86 | inputs = _upsample(inputs, upsample_size, data_format) 87 | 88 | inputs = tf.concat([inputs, route_1], 89 | axis=1 if data_format == 'NCHW' else 3) 90 | 91 | inputs = _conv2d_fixed_padding(inputs, 256, 3) 92 | # inputs = _conv2d_fixed_padding(inputs, 255, 1) 93 | 94 | detect_2 = _detection_layer( 95 | inputs, num_classes, _ANCHORS[0:3], img_size, data_format) 96 | detect_2 = tf.identity(detect_2, name='detect_2') 97 | 98 | detections = tf.concat([detect_1, detect_2], axis=1) 99 | detections = tf.identity(detections, name='detections') 100 | return detections 101 | --------------------------------------------------------------------------------