├── .gitignore
├── Dockerfile
├── README.md
├── convert_weights_pb.py
├── requirements.txt
├── tflite_example.py
├── utils.py
├── yolo_v3.py
└── yolo_v3_tiny.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # vim tmp file
107 | *~
108 | 
109 | # PyCharm
110 | .idea/
111 | 
112 | *.jpg
113 | *.png
114 | coco.names
115 | *.weights
116 | 
117 | # VSCode
118 | .vscode/
119 | 
120 | # Tensorflow
121 | checkpoint
122 | *.ckpt.*
123 | 
124 | .DS_Store
125 | saved_model
126 | saved_model/*
127 | .idea
128 | .idea/*
129 | .keras
130 | .keras/*
131 | .config
132 | .config/*
133 | .bash_history
134 | .bash_history/*
135 | .wget-hsts


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:1.14.0-py3
 2 | 
 3 | ADD . /root
 4 | WORKDIR /root
 5 | 
 6 | RUN apt-get autoclean
 7 | RUN apt-get update
 8 | RUN apt-get -y install wget
 9 | RUN pip3 install --upgrade pip
10 | RUN pip3 install -r requirements.txt
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tensorflow-lite-yolo-v3
 2 | 
 3 | Convert the weights of YOLO v3 object detector into tensorflow lite format. It can be served for tensorflow serving as well.
 4 | 
 5 | ## Setup env
 6 |     docker build -t tflite .
 7 |     docker run -it -v /home/peace195/tensorflow-lite-yolo-v3:/root/ tflite
 8 |     
 9 | ## How to run
10 | 
11 | 1. Download COCO class names file: `wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names`
12 | 2. Download binary file with desired weights: 
13 |     - Full weights: `wget https://pjreddie.com/media/files/yolov3.weights`
14 |     - Tiny weights: `wget https://pjreddie.com/media/files/yolov3-tiny.weights` 
15 |     - SPP weights: `wget https://pjreddie.com/media/files/yolov3-spp.weights` 
16 | 3. Convert .weights to .pb saved_model `python ./convert_weights_pb.py` (this can be used for tensorflow serving)      
17 | 4. Convert .pb to .tflite `tflite_convert --saved_model_dir=saved_model/ --output_file yolo_v3.tflite --saved_model_signature_key='predict'`
18 | 
19 | 
20 | Optional Flags
21 | 
22 | convert_weights_pb.py:
23 | 
24 |     --class_names
25 |         Path to the class names file
26 |     --weights_file
27 |         Path to the desired weights file    
28 |     --data_format
29 |         `NCHW` (gpu only) or `NHWC`
30 |     --tiny
31 |         Use yolov3-tiny
32 |     --spp
33 |         Use yolov3-spp
34 |     --output_graph
35 |         Location to write the output .pb graph
36 |         
37 | Contact me if you have any issues: binhtd.hust@gmail.com / Binh Do


--------------------------------------------------------------------------------
/convert_weights_pb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | import yolo_v3
 6 | import yolo_v3_tiny
 7 | from PIL import Image, ImageDraw
 8 | 
 9 | from utils import load_weights, load_coco_names, detections_boxes, savepb
10 | 
11 | FLAGS = tf.app.flags.FLAGS
12 | 
13 | tf.app.flags.DEFINE_string(
14 |     'class_names', 'coco.names', 'File with class names')
15 | tf.app.flags.DEFINE_string(
16 |     'weights_file', 'yolov3.weights', 'Binary file with detector weights')
17 | tf.app.flags.DEFINE_string(
18 |     'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC')
19 | tf.app.flags.DEFINE_string(
20 |     'output_graph', 'saved_model', 'Saved model tensorflow protobuf model output path')
21 | 
22 | tf.app.flags.DEFINE_bool(
23 |     'tiny', False, 'Use tiny version of YOLOv3')
24 | tf.app.flags.DEFINE_bool(
25 |     'spp', False, 'Use SPP version of YOLOv3')
26 | tf.app.flags.DEFINE_integer(
27 |     'size', 416, 'Image size')
28 | 
29 | 
30 | 
31 | def main(argv=None):
32 |     if FLAGS.tiny:
33 |         model = yolo_v3_tiny.yolo_v3_tiny
34 |     elif FLAGS.spp:
35 |         model = yolo_v3.yolo_v3_spp
36 |     else:
37 |         model = yolo_v3.yolo_v3
38 | 
39 |     classes = load_coco_names(FLAGS.class_names)
40 | 
41 |     # placeholder for detector inputs
42 |     inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3], "inputs")
43 | 
44 |     with tf.variable_scope('detector'):
45 |         detections = model(inputs, len(classes), data_format=FLAGS.data_format)
46 |         load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file)
47 | 
48 |     # Sets the output nodes in the current session
49 |     boxes = detections_boxes(detections)
50 | 
51 |     with tf.Session() as sess:
52 |         sess.run(load_ops)
53 |         savepb(sess, FLAGS.output_graph)
54 | 
55 | if __name__ == '__main__':
56 |     tf.app.run()
57 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | keras==2.2.4
2 | numpy==1.16.1
3 | scipy==1.1.0
4 | pandas==0.24.1
5 | pillow==5.4.1
6 | scikit-image==0.14.2
7 | sklearn


--------------------------------------------------------------------------------
/tflite_example.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import cv2
  3 | import numpy as np
  4 | import sys
  5 | import os
  6 | import tensorflow as tf
  7 | from PIL import Image, ImageDraw
  8 | 
  9 | def handle_predictions(predictions, confidence=0.6, iou_threshold=0.5):
 10 |     boxes = predictions[:, :, :4]
 11 |     box_confidences = np.expand_dims(predictions[:, :, 4], -1)
 12 |     box_class_probs = predictions[:, :, 5:]
 13 | 
 14 |     box_scores = box_confidences * box_class_probs
 15 |     box_classes = np.argmax(box_scores, axis=-1)
 16 |     box_class_scores = np.max(box_scores, axis=-1)
 17 |     pos = np.where(box_class_scores >= confidence)
 18 | 
 19 |     boxes = boxes[pos]
 20 |     classes = box_classes[pos]
 21 |     scores = box_class_scores[pos]
 22 | 
 23 |     n_boxes, n_classes, n_scores = nms_boxes(boxes, classes, scores, iou_threshold)
 24 | 
 25 |     if n_boxes:
 26 |         boxes = np.concatenate(n_boxes)
 27 |         classes = np.concatenate(n_classes)
 28 |         scores = np.concatenate(n_scores)
 29 | 
 30 |         return boxes, classes, scores
 31 | 
 32 |     else:
 33 |         return None, None, None
 34 | 
 35 | 
 36 | def nms_boxes(boxes, classes, scores, iou_threshold):
 37 |     nboxes, nclasses, nscores = [], [], []
 38 |     for c in set(classes):
 39 |         inds = np.where(classes == c)
 40 |         b = boxes[inds]
 41 |         c = classes[inds]
 42 |         s = scores[inds]
 43 | 
 44 |         x = b[:, 0]
 45 |         y = b[:, 1]
 46 |         w = b[:, 2]
 47 |         h = b[:, 3]
 48 | 
 49 |         areas = w * h
 50 |         order = s.argsort()[::-1]
 51 | 
 52 |         keep = []
 53 |         while order.size > 0:
 54 |             i = order[0]
 55 |             keep.append(i)
 56 | 
 57 |             xx1 = np.maximum(x[i], x[order[1:]])
 58 |             yy1 = np.maximum(y[i], y[order[1:]])
 59 |             xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
 60 |             yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
 61 | 
 62 |             w1 = np.maximum(0.0, xx2 - xx1 + 1)
 63 |             h1 = np.maximum(0.0, yy2 - yy1 + 1)
 64 | 
 65 |             inter = w1 * h1
 66 |             ovr = inter / (areas[i] + areas[order[1:]] - inter)
 67 |             inds = np.where(ovr <= iou_threshold)[0]
 68 |             order = order[inds + 1]
 69 | 
 70 |         keep = np.array(keep)
 71 | 
 72 |         nboxes.append(b[keep])
 73 |         nclasses.append(c[keep])
 74 |         nscores.append(s[keep])
 75 |     return nboxes, nclasses, nscores
 76 | 
 77 | def load_coco_names(file_name):
 78 |     names = {}
 79 |     with open(file_name) as f:
 80 |         for id, name in enumerate(f):
 81 |             names[id] = name
 82 |     return names
 83 | 
 84 | def letter_box_image(image: Image.Image, output_height: int, output_width: int, fill_value)-> np.ndarray:
 85 |     height_ratio = float(output_height)/image.size[1]
 86 |     width_ratio = float(output_width)/image.size[0]
 87 |     fit_ratio = min(width_ratio, height_ratio)
 88 |     fit_height = int(image.size[1] * fit_ratio)
 89 |     fit_width = int(image.size[0] * fit_ratio)
 90 |     fit_image = np.asarray(image.resize((fit_width, fit_height), resample=Image.BILINEAR))
 91 | 
 92 |     if isinstance(fill_value, int):
 93 |         fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype)
 94 | 
 95 |     to_return = np.tile(fill_value, (output_height, output_width, 1))
 96 |     pad_top = int(0.5 * (output_height - fit_height))
 97 |     pad_left = int(0.5 * (output_width - fit_width))
 98 |     to_return[pad_top:pad_top+fit_height, pad_left:pad_left+fit_width] = fit_image
 99 |     return to_return
100 | 
101 | 
102 | def draw_boxes(boxes, classes, scores, img, cls_names, detection_size, is_letter_box_image):
103 |     draw = ImageDraw.Draw(img)
104 | 
105 |     color = tuple(np.random.randint(0, 256, 3))
106 |     for box, score, cls in zip(boxes, scores, classes):
107 |         box = convert_to_original_size(box, np.array(detection_size),
108 |                                        np.array(img.size),
109 |                                        is_letter_box_image)
110 |         draw.rectangle(box, outline=color)
111 |         draw.text(box[:2], '{} {:.2f}%'.format(
112 |             cls_names[cls], score * 100), fill=color)
113 | 
114 | 
115 | def convert_to_original_size(box, size, original_size, is_letter_box_image):
116 |     if is_letter_box_image:
117 |         box = box.reshape(2, 2)
118 |         box[0, :] = letter_box_pos_to_original_pos(box[0, :], size, original_size)
119 |         box[1, :] = letter_box_pos_to_original_pos(box[1, :], size, original_size)
120 |     else:
121 |         ratio = original_size / size
122 |         box = box.reshape(2, 2) * ratio
123 |     return list(box.reshape(-1))
124 | 
125 | 
126 | def letter_box_pos_to_original_pos(letter_pos, current_size, ori_image_size)-> np.ndarray:
127 |     letter_pos = np.asarray(letter_pos, dtype=np.float)
128 |     current_size = np.asarray(current_size, dtype=np.float)
129 |     ori_image_size = np.asarray(ori_image_size, dtype=np.float)
130 |     final_ratio = min(current_size[0]/ori_image_size[0], current_size[1]/ori_image_size[1])
131 |     pad = 0.5 * (current_size - final_ratio * ori_image_size)
132 |     pad = pad.astype(np.int32)
133 |     to_return_pos = (letter_pos - pad) / final_ratio
134 |     return to_return_pos
135 | 
136 | model_path = os.path.join(os.getcwd(), 'yolo_v3.tflite')
137 | interpreter = tf.contrib.lite.Interpreter(model_path=model_path)
138 | interpreter.allocate_tensors()
139 | 
140 | input_details = interpreter.get_input_details()
141 | output_details = interpreter.get_output_details()
142 | print(output_details)
143 | if input_details[0]['dtype'] == np.float32:
144 |     floating_model = True
145 | 
146 | height = input_details[0]['shape'][1]
147 | width = input_details[0]['shape'][2]
148 | 
149 | img = Image.open('example.jpg')
150 | img_resized = letter_box_image(img, height, width, 128)
151 | img_resized = img_resized.astype(np.float32)
152 | 
153 | interpreter.set_tensor(input_details[0]['index'], np.expand_dims(img_resized, 0))
154 | interpreter.invoke()
155 | predictions = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
156 | boxes, classes, scores = handle_predictions(predictions[0],
157 |                                             confidence=0.3,
158 |                                             iou_threshold=0.5)
159 | class_names = load_coco_names("coco.names")
160 | draw_boxes(boxes, classes, scores, img, class_names, (height, width), True)
161 | img.save("output.jpg")
162 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from PIL import ImageDraw, Image
  6 | 
  7 | 
  8 | def load_graph(frozen_graph_filename):
  9 | 
 10 |     with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
 11 |         graph_def = tf.GraphDef()
 12 |         graph_def.ParseFromString(f.read())
 13 | 
 14 |     with tf.Graph().as_default() as graph:
 15 |         tf.import_graph_def(graph_def, name="")
 16 | 
 17 |     return graph
 18 | 
 19 | 
 20 | def savepb(sess, output_graph):
 21 |     with sess.graph.as_default():
 22 |         x_op = sess.graph.get_operation_by_name("inputs")
 23 |         x = x_op.outputs[0]
 24 |         pred_op = sess.graph.get_operation_by_name("output_boxes")
 25 |         pred = pred_op.outputs[0]
 26 | 
 27 |     with sess.graph.as_default():
 28 |         prediction_signature = tf.saved_model.signature_def_utils.build_signature_def(
 29 |             inputs={
 30 |                 "input": tf.saved_model.utils.build_tensor_info(x)
 31 |             },
 32 |             outputs={
 33 |                 "output": tf.saved_model.utils.build_tensor_info(pred)
 34 |             },
 35 |             method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
 36 |         )
 37 |         builder = tf.saved_model.builder.SavedModelBuilder(output_graph)
 38 |         builder.add_meta_graph_and_variables(
 39 |             sess, [tf.saved_model.tag_constants.SERVING],
 40 |             signature_def_map={
 41 |                 "predict": prediction_signature,
 42 |             })
 43 |         builder.save()
 44 | 
 45 | 
 46 | def load_weights(var_list, weights_file):
 47 |     """
 48 |     Loads and converts pre-trained weights.
 49 |     :param var_list: list of network variables.
 50 |     :param weights_file: name of the binary file.
 51 |     :return: list of assign ops
 52 |     """
 53 |     with open(weights_file, "rb") as fp:
 54 |         _ = np.fromfile(fp, dtype=np.int32, count=5)
 55 | 
 56 |         weights = np.fromfile(fp, dtype=np.float32)
 57 | 
 58 |     ptr = 0
 59 |     i = 0
 60 |     assign_ops = []
 61 |     while i < len(var_list) - 1:
 62 |         var1 = var_list[i]
 63 |         var2 = var_list[i + 1]
 64 |         # do something only if we process conv layer
 65 |         if 'Conv' in var1.name.split('/')[-2]:
 66 |             # check type of next layer
 67 |             if 'BatchNorm' in var2.name.split('/')[-2]:
 68 |                 # load batch norm params
 69 |                 gamma, beta, mean, var = var_list[i + 1:i + 5]
 70 |                 batch_norm_vars = [beta, gamma, mean, var]
 71 |                 for var in batch_norm_vars:
 72 |                     shape = var.shape.as_list()
 73 |                     num_params = np.prod(shape)
 74 |                     var_weights = weights[ptr:ptr + num_params].reshape(shape)
 75 |                     ptr += num_params
 76 |                     assign_ops.append(
 77 |                         tf.assign(var, var_weights, validate_shape=True))
 78 | 
 79 |                 # we move the pointer by 4, because we loaded 4 variables
 80 |                 i += 4
 81 |             elif 'Conv' in var2.name.split('/')[-2]:
 82 |                 # load biases
 83 |                 bias = var2
 84 |                 bias_shape = bias.shape.as_list()
 85 |                 bias_params = np.prod(bias_shape)
 86 |                 bias_weights = weights[ptr:ptr +
 87 |                                        bias_params].reshape(bias_shape)
 88 |                 ptr += bias_params
 89 |                 assign_ops.append(
 90 |                     tf.assign(bias, bias_weights, validate_shape=True))
 91 | 
 92 |                 # we loaded 1 variable
 93 |                 i += 1
 94 |             # we can load weights of conv layer
 95 |             shape = var1.shape.as_list()
 96 |             num_params = np.prod(shape)
 97 | 
 98 |             var_weights = weights[ptr:ptr + num_params].reshape(
 99 |                 (shape[3], shape[2], shape[0], shape[1]))
100 |             # remember to transpose to column-major
101 |             var_weights = np.transpose(var_weights, (2, 3, 1, 0))
102 |             ptr += num_params
103 |             assign_ops.append(
104 |                 tf.assign(var1, var_weights, validate_shape=True))
105 |             i += 1
106 | 
107 |     return assign_ops
108 | 
109 | 
110 | def detections_boxes(detections):
111 |     """
112 |     Converts center x, center y, width and height values to coordinates of top left and bottom right points.
113 | 
114 |     :param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5))
115 |     :return: converted detections of same shape as input
116 |     """
117 |     center_x, center_y, width, height, attrs = tf.split(
118 |         detections, [1, 1, 1, 1, -1], axis=-1)
119 |     w2 = width / 2
120 |     h2 = height / 2
121 |     x0 = center_x - w2
122 |     y0 = center_y - h2
123 |     x1 = center_x + w2
124 |     y1 = center_y + h2
125 | 
126 |     boxes = tf.concat([x0, y0, x1, y1], axis=-1)
127 |     detections = tf.concat([boxes, attrs], axis=-1, name="output_boxes")
128 |     return detections
129 | 
130 | 
131 | def load_coco_names(file_name):
132 |     names = {}
133 |     with open(file_name) as f:
134 |         for id, name in enumerate(f):
135 |             names[id] = name
136 |     return names


--------------------------------------------------------------------------------
/yolo_v3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | slim = tf.contrib.slim
  7 | 
  8 | _BATCH_NORM_DECAY = 0.9
  9 | _BATCH_NORM_EPSILON = 1e-05
 10 | _LEAKY_RELU = 0.1
 11 | 
 12 | _ANCHORS = [(10, 13), (16, 30), (33, 23),
 13 |             (30, 61), (62, 45), (59, 119),
 14 |             (116, 90), (156, 198), (373, 326)]
 15 | 
 16 | 
 17 | def darknet53(inputs):
 18 |     """
 19 |     Builds Darknet-53 model.
 20 |     """
 21 |     inputs = _conv2d_fixed_padding(inputs, 32, 3)
 22 |     inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2)
 23 |     inputs = _darknet53_block(inputs, 32)
 24 |     inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2)
 25 | 
 26 |     for i in range(2):
 27 |         inputs = _darknet53_block(inputs, 64)
 28 | 
 29 |     inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2)
 30 | 
 31 |     for i in range(8):
 32 |         inputs = _darknet53_block(inputs, 128)
 33 | 
 34 |     route_1 = inputs
 35 |     inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2)
 36 | 
 37 |     for i in range(8):
 38 |         inputs = _darknet53_block(inputs, 256)
 39 | 
 40 |     route_2 = inputs
 41 |     inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2)
 42 | 
 43 |     for i in range(4):
 44 |         inputs = _darknet53_block(inputs, 512)
 45 | 
 46 |     return route_1, route_2, inputs
 47 | 
 48 | 
 49 | def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1):
 50 |     if strides > 1:
 51 |         inputs = _fixed_padding(inputs, kernel_size)
 52 |     inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,
 53 |                          padding=('SAME' if strides == 1 else 'VALID'))
 54 |     return inputs
 55 | 
 56 | 
 57 | def _darknet53_block(inputs, filters):
 58 |     shortcut = inputs
 59 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
 60 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
 61 | 
 62 |     inputs = inputs + shortcut
 63 |     return inputs
 64 | 
 65 | 
 66 | def _spp_block(inputs, data_format='NCHW'):
 67 |     return tf.concat([slim.max_pool2d(inputs, 13, 1, 'SAME'),
 68 |                       slim.max_pool2d(inputs, 9, 1, 'SAME'),
 69 |                       slim.max_pool2d(inputs, 5, 1, 'SAME'),
 70 |                       inputs],
 71 |                      axis=1 if data_format == 'NCHW' else 3)
 72 | 
 73 | 
 74 | @tf.contrib.framework.add_arg_scope
 75 | def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs):
 76 |     """
 77 |     Pads the input along the spatial dimensions independently of input size.
 78 | 
 79 |     Args:
 80 |       inputs: A tensor of size [batch, channels, height_in, width_in] or
 81 |         [batch, height_in, width_in, channels] depending on data_format.
 82 |       kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
 83 |                    Should be a positive integer.
 84 |       data_format: The input format ('NHWC' or 'NCHW').
 85 |       mode: The mode for tf.pad.
 86 | 
 87 |     Returns:
 88 |       A tensor with the same format as the input with the data either intact
 89 |       (if kernel_size == 1) or padded (if kernel_size > 1).
 90 |     """
 91 |     pad_total = kernel_size - 1
 92 |     pad_beg = pad_total // 2
 93 |     pad_end = pad_total - pad_beg
 94 | 
 95 |     if kwargs['data_format'] == 'NCHW':
 96 |         padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
 97 |                                         [pad_beg, pad_end],
 98 |                                         [pad_beg, pad_end]],
 99 |                                mode=mode)
100 |     else:
101 |         padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
102 |                                         [pad_beg, pad_end], [0, 0]], mode=mode)
103 |     return padded_inputs
104 | 
105 | 
106 | def _yolo_block(inputs, filters, data_format='NCHW', with_spp=False):
107 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
108 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
109 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
110 | 
111 |     if with_spp:
112 |         inputs = _spp_block(inputs, data_format)
113 |         inputs = _conv2d_fixed_padding(inputs, filters, 1)
114 | 
115 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
116 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
117 |     route = inputs
118 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
119 |     return route, inputs
120 | 
121 | 
122 | def _get_size(shape, data_format):
123 |     if len(shape) == 4:
124 |         shape = shape[1:]
125 |     return shape[1:3] if data_format == 'NCHW' else shape[0:2]
126 | 
127 | 
128 | def _detection_layer(inputs, num_classes, anchors, img_size, data_format):
129 |     num_anchors = len(anchors)
130 |     predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1,
131 |                               stride=1, normalizer_fn=None,
132 |                               activation_fn=None,
133 |                               biases_initializer=tf.zeros_initializer())
134 | 
135 |     shape = predictions.get_shape().as_list()
136 |     grid_size = _get_size(shape, data_format)
137 |     dim = grid_size[0] * grid_size[1]
138 |     bbox_attrs = 5 + num_classes
139 | 
140 |     if data_format == 'NCHW':
141 |         predictions = tf.reshape(
142 |             predictions, [-1, num_anchors * bbox_attrs, dim])
143 |         predictions = tf.transpose(predictions, [0, 2, 1])
144 | 
145 |     predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs])
146 | 
147 |     stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1])
148 | 
149 |     anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors]
150 | 
151 |     box_centers, box_sizes, confidence, classes = tf.split(
152 |         predictions, [2, 2, 1, num_classes], axis=-1)
153 | 
154 |     box_centers = tf.nn.sigmoid(box_centers)
155 |     confidence = tf.nn.sigmoid(confidence)
156 | 
157 |     grid_x = tf.range(grid_size[0], dtype=tf.float32)
158 |     grid_y = tf.range(grid_size[1], dtype=tf.float32)
159 |     a, b = tf.meshgrid(grid_x, grid_y)
160 | 
161 |     x_offset = tf.reshape(a, (-1, 1))
162 |     y_offset = tf.reshape(b, (-1, 1))
163 | 
164 |     x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
165 |     x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2])
166 | 
167 |     box_centers = box_centers + x_y_offset
168 |     box_centers = box_centers * stride
169 | 
170 |     anchors = tf.tile(anchors, [dim, 1])
171 |     box_sizes = tf.exp(box_sizes) * anchors
172 |     box_sizes = box_sizes * stride
173 | 
174 |     detections = tf.concat([box_centers, box_sizes, confidence], axis=-1)
175 | 
176 |     classes = tf.nn.sigmoid(classes)
177 |     predictions = tf.concat([detections, classes], axis=-1)
178 |     return predictions
179 | 
180 | 
181 | def _upsample(inputs, out_shape, data_format='NCHW'):
182 |     # tf.image.resize_nearest_neighbor accepts input in format NHWC
183 |     if data_format == 'NCHW':
184 |         inputs = tf.transpose(inputs, [0, 2, 3, 1])
185 | 
186 |     if data_format == 'NCHW':
187 |         new_height = out_shape[3]
188 |         new_width = out_shape[2]
189 |     else:
190 |         new_height = out_shape[2]
191 |         new_width = out_shape[1]
192 | 
193 |     inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))
194 | 
195 |     # back to NCHW if needed
196 |     if data_format == 'NCHW':
197 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
198 | 
199 |     inputs = tf.identity(inputs, name='upsampled')
200 |     return inputs
201 | 
202 | 
203 | def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False, with_spp=False):
204 |     """
205 |     Creates YOLO v3 model.
206 | 
207 |     :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
208 |         Dimension batch_size may be undefined. The channel order is RGB.
209 |     :param num_classes: number of predicted classes.
210 |     :param is_training: whether is training or not.
211 |     :param data_format: data format NCHW or NHWC.
212 |     :param reuse: whether or not the network and its variables should be reused.
213 |     :param with_spp: whether or not is using spp layer.
214 |     :return:
215 |     """
216 |     # it will be needed later on
217 |     img_size = inputs.get_shape().as_list()[1:3]
218 | 
219 |     # transpose the inputs to NCHW
220 |     if data_format == 'NCHW':
221 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
222 | 
223 |     # normalize values to range [0..1]
224 |     inputs = inputs / 255
225 | 
226 |     # set batch norm params
227 |     batch_norm_params = {
228 |         'decay': _BATCH_NORM_DECAY,
229 |         'epsilon': _BATCH_NORM_EPSILON,
230 |         'scale': True,
231 |         'is_training': is_training,
232 |         'fused': None,  # Use fused batch norm if possible.
233 |     }
234 | 
235 |     # Set activation_fn and parameters for conv2d, batch_norm.
236 |     with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse):
237 |         with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm,
238 |                             normalizer_params=batch_norm_params,
239 |                             biases_initializer=None,
240 |                             activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
241 |             with tf.variable_scope('darknet-53'):
242 |                 route_1, route_2, inputs = darknet53(inputs)
243 | 
244 |             with tf.variable_scope('yolo-v3'):
245 |                 route, inputs = _yolo_block(inputs, 512, data_format, with_spp)
246 | 
247 |                 detect_1 = _detection_layer(
248 |                     inputs, num_classes, _ANCHORS[6:9], img_size, data_format)
249 |                 detect_1 = tf.identity(detect_1, name='detect_1')
250 | 
251 |                 inputs = _conv2d_fixed_padding(route, 256, 1)
252 |                 upsample_size = route_2.get_shape().as_list()
253 |                 inputs = _upsample(inputs, upsample_size, data_format)
254 |                 inputs = tf.concat([inputs, route_2],
255 |                                    axis=1 if data_format == 'NCHW' else 3)
256 | 
257 |                 route, inputs = _yolo_block(inputs, 256)
258 | 
259 |                 detect_2 = _detection_layer(
260 |                     inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
261 |                 detect_2 = tf.identity(detect_2, name='detect_2')
262 | 
263 |                 inputs = _conv2d_fixed_padding(route, 128, 1)
264 |                 upsample_size = route_1.get_shape().as_list()
265 |                 inputs = _upsample(inputs, upsample_size, data_format)
266 |                 inputs = tf.concat([inputs, route_1],
267 |                                    axis=1 if data_format == 'NCHW' else 3)
268 | 
269 |                 _, inputs = _yolo_block(inputs, 128)
270 | 
271 |                 detect_3 = _detection_layer(
272 |                     inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
273 |                 detect_3 = tf.identity(detect_3, name='detect_3')
274 | 
275 |                 detections = tf.concat([detect_1, detect_2, detect_3], axis=1)
276 |                 detections = tf.identity(detections, name='detections')
277 |                 return detections
278 | 
279 | 
280 | def yolo_v3_spp(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
281 |     """
282 |     Creates YOLO v3 with SPP  model.
283 | 
284 |     :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
285 |         Dimension batch_size may be undefined. The channel order is RGB.
286 |     :param num_classes: number of predicted classes.
287 |     :param is_training: whether is training or not.
288 |     :param data_format: data format NCHW or NHWC.
289 |     :param reuse: whether or not the network and its variables should be reused.
290 |     :return:
291 |     """
292 |     return yolo_v3(inputs, num_classes, is_training=is_training, data_format=data_format, reuse=reuse, with_spp=True)
293 | 


--------------------------------------------------------------------------------
/yolo_v3_tiny.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \
  6 |     _detection_layer, _upsample
  7 | 
  8 | slim = tf.contrib.slim
  9 | 
 10 | _BATCH_NORM_DECAY = 0.9
 11 | _BATCH_NORM_EPSILON = 1e-05
 12 | _LEAKY_RELU = 0.1
 13 | 
 14 | _ANCHORS = [(10, 14),  (23, 27),  (37, 58),
 15 |             (81, 82),  (135, 169),  (344, 319)]
 16 | 
 17 | 
 18 | def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
 19 |     """
 20 |     Creates YOLO v3 tiny model.
 21 | 
 22 |     :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
 23 |         Dimension batch_size may be undefined. The channel order is RGB.
 24 |     :param num_classes: number of predicted classes.
 25 |     :param is_training: whether is training or not.
 26 |     :param data_format: data format NCHW or NHWC.
 27 |     :param reuse: whether or not the network and its variables should be reused.
 28 |     :return:
 29 |     """
 30 |     # it will be needed later on
 31 |     img_size = inputs.get_shape().as_list()[1:3]
 32 | 
 33 |     # transpose the inputs to NCHW
 34 |     if data_format == 'NCHW':
 35 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
 36 | 
 37 |     # normalize values to range [0..1]
 38 |     inputs = inputs / 255
 39 | 
 40 |     # set batch norm params
 41 |     batch_norm_params = {
 42 |         'decay': _BATCH_NORM_DECAY,
 43 |         'epsilon': _BATCH_NORM_EPSILON,
 44 |         'scale': True,
 45 |         'is_training': is_training,
 46 |         'fused': None,  # Use fused batch norm if possible.
 47 |     }
 48 | 
 49 |     # Set activation_fn and parameters for conv2d, batch_norm.
 50 |     with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format):
 51 |         with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse):
 52 |             with slim.arg_scope([slim.conv2d],
 53 |                                 normalizer_fn=slim.batch_norm,
 54 |                                 normalizer_params=batch_norm_params,
 55 |                                 biases_initializer=None,
 56 |                                 activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
 57 | 
 58 |                 with tf.variable_scope('yolo-v3-tiny'):
 59 |                     for i in range(6):
 60 |                         inputs = _conv2d_fixed_padding(
 61 |                             inputs, 16 * pow(2, i), 3)
 62 | 
 63 |                         if i == 4:
 64 |                             route_1 = inputs
 65 | 
 66 |                         if i == 5:
 67 |                             inputs = slim.max_pool2d(
 68 |                                 inputs, [2, 2], stride=1, padding="SAME", scope='pool2')
 69 |                         else:
 70 |                             inputs = slim.max_pool2d(
 71 |                                 inputs, [2, 2], scope='pool2')
 72 | 
 73 |                     inputs = _conv2d_fixed_padding(inputs, 1024, 3)
 74 |                     inputs = _conv2d_fixed_padding(inputs, 256, 1)
 75 |                     route_2 = inputs
 76 | 
 77 |                     inputs = _conv2d_fixed_padding(inputs, 512, 3)
 78 |                     # inputs = _conv2d_fixed_padding(inputs, 255, 1)
 79 | 
 80 |                     detect_1 = _detection_layer(
 81 |                         inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
 82 |                     detect_1 = tf.identity(detect_1, name='detect_1')
 83 | 
 84 |                     inputs = _conv2d_fixed_padding(route_2, 128, 1)
 85 |                     upsample_size = route_1.get_shape().as_list()
 86 |                     inputs = _upsample(inputs, upsample_size, data_format)
 87 | 
 88 |                     inputs = tf.concat([inputs, route_1],
 89 |                                        axis=1 if data_format == 'NCHW' else 3)
 90 | 
 91 |                     inputs = _conv2d_fixed_padding(inputs, 256, 3)
 92 |                     # inputs = _conv2d_fixed_padding(inputs, 255, 1)
 93 | 
 94 |                     detect_2 = _detection_layer(
 95 |                         inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
 96 |                     detect_2 = tf.identity(detect_2, name='detect_2')
 97 | 
 98 |                     detections = tf.concat([detect_1, detect_2], axis=1)
 99 |                     detections = tf.identity(detections, name='detections')
100 |                     return detections
101 | 


--------------------------------------------------------------------------------