├── CMakeLists.txt
├── README.md
├── config
└── object_detection.yaml
├── launch
└── object_detection.launch
├── package.xml
└── scripts
├── classifier
├── __init__.py
├── __init__.pyc
├── model
│ ├── classes.txt
│ ├── tiny_yolo_anchors.txt
│ └── yolo.h5
├── test.py
├── yolo.py
├── yolo.pyc
└── yolo3
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── model.py
│ ├── model.pyc
│ ├── utils.py
│ └── utils.pyc
└── object_detection.py
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8.3)
2 | project(object_detection)
3 |
4 | ## Compile as C++11, supported in ROS Kinetic and newer
5 | # add_compile_options(-std=c++11)
6 |
7 | ## Find catkin macros and libraries
8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
9 | ## is used, also find other catkin packages
10 | find_package(catkin REQUIRED COMPONENTS
11 | roscpp
12 | rospy
13 | std_msgs
14 | )
15 |
16 | ## System dependencies are found with CMake's conventions
17 | # find_package(Boost REQUIRED COMPONENTS system)
18 |
19 |
20 | ## Uncomment this if the package has a setup.py. This macro ensures
21 | ## modules and global scripts declared therein get installed
22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
23 | # catkin_python_setup()
24 |
25 | ################################################
26 | ## Declare ROS messages, services and actions ##
27 | ################################################
28 |
29 | ## To declare and build messages, services or actions from within this
30 | ## package, follow these steps:
31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in
32 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
33 | ## * In the file package.xml:
34 | ## * add a build_depend tag for "message_generation"
35 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
36 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
37 | ## but can be declared for certainty nonetheless:
38 | ## * add a exec_depend tag for "message_runtime"
39 | ## * In this file (CMakeLists.txt):
40 | ## * add "message_generation" and every package in MSG_DEP_SET to
41 | ## find_package(catkin REQUIRED COMPONENTS ...)
42 | ## * add "message_runtime" and every package in MSG_DEP_SET to
43 | ## catkin_package(CATKIN_DEPENDS ...)
44 | ## * uncomment the add_*_files sections below as needed
45 | ## and list every .msg/.srv/.action file to be processed
46 | ## * uncomment the generate_messages entry below
47 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
48 |
49 | ## Generate messages in the 'msg' folder
50 | # add_message_files(
51 | # FILES
52 | # Message1.msg
53 | # Message2.msg
54 | # )
55 |
56 | ## Generate services in the 'srv' folder
57 | # add_service_files(
58 | # FILES
59 | # Service1.srv
60 | # Service2.srv
61 | # )
62 |
63 | ## Generate actions in the 'action' folder
64 | # add_action_files(
65 | # FILES
66 | # Action1.action
67 | # Action2.action
68 | # )
69 |
70 | ## Generate added messages and services with any dependencies listed here
71 | # generate_messages(
72 | # DEPENDENCIES
73 | # std_msgs
74 | # )
75 |
76 | ################################################
77 | ## Declare ROS dynamic reconfigure parameters ##
78 | ################################################
79 |
80 | ## To declare and build dynamic reconfigure parameters within this
81 | ## package, follow these steps:
82 | ## * In the file package.xml:
83 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
84 | ## * In this file (CMakeLists.txt):
85 | ## * add "dynamic_reconfigure" to
86 | ## find_package(catkin REQUIRED COMPONENTS ...)
87 | ## * uncomment the "generate_dynamic_reconfigure_options" section below
88 | ## and list every .cfg file to be processed
89 |
90 | ## Generate dynamic reconfigure parameters in the 'cfg' folder
91 | # generate_dynamic_reconfigure_options(
92 | # cfg/DynReconf1.cfg
93 | # cfg/DynReconf2.cfg
94 | # )
95 |
96 | ###################################
97 | ## catkin specific configuration ##
98 | ###################################
99 | ## The catkin_package macro generates cmake config files for your package
100 | ## Declare things to be passed to dependent projects
101 | ## INCLUDE_DIRS: uncomment this if your package contains header files
102 | ## LIBRARIES: libraries you create in this project that dependent projects also need
103 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need
104 | ## DEPENDS: system dependencies of this project that dependent projects also need
105 | catkin_package(
106 | # INCLUDE_DIRS include
107 | # LIBRARIES object_detection
108 | # CATKIN_DEPENDS roscpp rospy std_msgs
109 | # DEPENDS system_lib
110 | )
111 |
112 | ###########
113 | ## Build ##
114 | ###########
115 |
116 | ## Specify additional locations of header files
117 | ## Your package locations should be listed before other locations
118 | include_directories(
119 | # include
120 | ${catkin_INCLUDE_DIRS}
121 | )
122 |
123 | ## Declare a C++ library
124 | # add_library(${PROJECT_NAME}
125 | # src/${PROJECT_NAME}/object_detection.cpp
126 | # )
127 |
128 | ## Add cmake target dependencies of the library
129 | ## as an example, code may need to be generated before libraries
130 | ## either from message generation or dynamic reconfigure
131 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
132 |
133 | ## Declare a C++ executable
134 | ## With catkin_make all packages are built within a single CMake context
135 | ## The recommended prefix ensures that target names across packages don't collide
136 | # add_executable(${PROJECT_NAME}_node src/object_detection_node.cpp)
137 |
138 | ## Rename C++ executable without prefix
139 | ## The above recommended prefix causes long target names, the following renames the
140 | ## target back to the shorter version for ease of user use
141 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
142 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
143 |
144 | ## Add cmake target dependencies of the executable
145 | ## same as for the library above
146 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
147 |
148 | ## Specify libraries to link a library or executable target against
149 | # target_link_libraries(${PROJECT_NAME}_node
150 | # ${catkin_LIBRARIES}
151 | # )
152 |
153 | #############
154 | ## Install ##
155 | #############
156 |
157 | # all install targets should use catkin DESTINATION variables
158 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
159 |
160 | ## Mark executable scripts (Python etc.) for installation
161 | ## in contrast to setup.py, you can choose the destination
162 | # install(PROGRAMS
163 | # scripts/my_python_script
164 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
165 | # )
166 |
167 | ## Mark executables and/or libraries for installation
168 | # install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
169 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
170 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
171 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
172 | # )
173 |
174 | ## Mark cpp header files for installation
175 | # install(DIRECTORY include/${PROJECT_NAME}/
176 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
177 | # FILES_MATCHING PATTERN "*.h"
178 | # PATTERN ".svn" EXCLUDE
179 | # )
180 |
181 | ## Mark other files for installation (e.g. launch and bag files, etc.)
182 | # install(FILES
183 | # # myfile1
184 | # # myfile2
185 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
186 | # )
187 |
188 | #############
189 | ## Testing ##
190 | #############
191 |
192 | ## Add gtest based cpp test target and link libraries
193 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_object_detection.cpp)
194 | # if(TARGET ${PROJECT_NAME}-test)
195 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
196 | # endif()
197 |
198 | ## Add folders to be run by python nosetests
199 | # catkin_add_nosetests(test)
200 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YOLOv3-tiny on Jetson tx2
2 |
3 | This is a tested ROS node for YOLOv3-tiny on Jetson tx2.
4 |
5 | Please see the medium post to get the understanding about this repo: https://medium.com/intro-to-artificial-intelligence/run-yolo-v3-as-ros-node-on-jetson-tx2-without-tensorrt-43f562aadc68
6 |
7 | ### Credit
8 |
9 | I have used components of below resources to make this ROS node. Thanking them for the great effort.
10 |
11 | * https://medium.com/@manivannan_data/how-to-train-yolov3-to-detect-custom-objects-ccbcafeb13d2
12 | * https://medium.com/@manivannan_data/how-to-train-yolov2-to-detect-custom-objects-9010df784f36
13 | * https://github.com/qqwweee/keras-yolo3
14 |
--------------------------------------------------------------------------------
/config/object_detection.yaml:
--------------------------------------------------------------------------------
1 | classification:
2 | model: '/scripts/classifier/model/yolo.h5'
3 | anchors: '/scripts/classifier/model/tiny_yolo_anchors.txt'
4 | classes: '/scripts/classifier/model/classes.txt'
5 |
--------------------------------------------------------------------------------
/launch/object_detection.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | object_detection
4 | 0.0.0
5 | The object_detection package
6 |
7 |
8 |
9 |
10 | nvidia
11 |
12 |
13 |
14 |
15 |
16 | TODO
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | catkin
52 | roscpp
53 | rospy
54 | std_msgs
55 | roscpp
56 | rospy
57 | std_msgs
58 | roscpp
59 | rospy
60 | std_msgs
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/scripts/classifier/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/__init__.py
--------------------------------------------------------------------------------
/scripts/classifier/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/__init__.pyc
--------------------------------------------------------------------------------
/scripts/classifier/model/classes.txt:
--------------------------------------------------------------------------------
1 | Red
2 | Green
3 | Yellow
4 |
--------------------------------------------------------------------------------
/scripts/classifier/model/tiny_yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
2 |
--------------------------------------------------------------------------------
/scripts/classifier/model/yolo.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/model/yolo.h5
--------------------------------------------------------------------------------
/scripts/classifier/test.py:
--------------------------------------------------------------------------------
1 | import rospkg
2 |
3 | # get an instance of RosPack with the default search paths
4 | rospack = rospkg.RosPack()
5 |
6 | # get the file path for rospy_tutorials
7 | rospack.get_path('object_detection')
8 |
--------------------------------------------------------------------------------
/scripts/classifier/yolo.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | Run a YOLO_v3 style detection model on test images.
5 | """
6 |
7 | import colorsys
8 | import os
9 | from timeit import default_timer as timer
10 | import tensorflow as tf
11 | import numpy as np
12 | from keras import backend as K
13 | from keras.models import load_model
14 | from keras.layers import Input
15 | from keras.backend.tensorflow_backend import set_session
16 | from PIL import Image, ImageFont, ImageDraw
17 |
18 | from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
19 | from yolo3.utils import letterbox_image
20 | import os
21 | import rospkg
22 |
23 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
24 | #from keras.utils import multi_gpu_model
25 | gpu_num=1
26 |
27 | class YOLO(object):
28 | def __init__(self, model, anchors, classes):
29 |
30 | self.model_path = model
31 | self.anchors_path = anchors
32 | self.classes_path = classes
33 | self.score = 0.3
34 | self.iou = 0.45
35 | self.class_names = self._get_class()
36 | self.anchors = self._get_anchors()
37 | #config = tf.ConfigProto()
38 | #config.gpu_options.allow_growth = True
39 |
40 | config = tf.ConfigProto()
41 | config.gpu_options.per_process_gpu_memory_fraction = 0.3
42 | set_session(tf.Session(config=config))
43 | #self.sess = tf.Session(config=config)
44 | self.sess = K.get_session()
45 | #K.set_session(self.sess)
46 | self.model_image_size = (416, 416) # fixed size or (None, None), hw
47 | self.boxes, self.scores, self.classes = self.generate()
48 | self.graph = tf.get_default_graph()
49 |
50 |
51 | def _get_class(self):
52 | classes_path = os.path.expanduser(self.classes_path)
53 | with open(classes_path) as f:
54 | class_names = f.readlines()
55 | class_names = [c.strip() for c in class_names]
56 | return class_names
57 |
58 | def _get_anchors(self):
59 | anchors_path = os.path.expanduser(self.anchors_path)
60 | with open(anchors_path) as f:
61 | anchors = f.readline()
62 | anchors = [float(x) for x in anchors.split(',')]
63 | return np.array(anchors).reshape(-1, 2)
64 |
65 | def generate(self):
66 | model_path = os.path.expanduser(self.model_path)
67 | assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
68 |
69 | # Load model, or construct model and load weights.
70 | num_anchors = len(self.anchors)
71 | num_classes = len(self.class_names)
72 | is_tiny_version = num_anchors==6 # default setting
73 | try:
74 | self.yolo_model = load_model(model_path, compile=False)
75 | except:
76 | self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
77 | if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
78 | self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
79 | else:
80 | assert self.yolo_model.layers[-1].output_shape[-1] == \
81 | num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
82 | 'Mismatch between model and given anchor and class sizes'
83 |
84 | print('{} model, anchors, and classes loaded.'.format(model_path))
85 |
86 | # Generate colors for drawing bounding boxes.
87 | hsv_tuples = [(x / len(self.class_names), 1., 1.)
88 | for x in range(len(self.class_names))]
89 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
90 | self.colors = list(
91 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
92 | self.colors))
93 | np.random.seed(10101) # Fixed seed for consistent colors across runs.
94 | np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
95 | np.random.seed(None) # Reset seed to default.
96 |
97 | # Generate output tensor targets for filtered bounding boxes.
98 | self.input_image_shape = K.placeholder(shape=(2, ))
99 | #if gpu_num>=2:
100 | # self.yolo_model = multi_gpu_model(self.yolo_model, gpus=gpu_num)
101 | boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
102 | len(self.class_names), self.input_image_shape,
103 | score_threshold=self.score, iou_threshold=self.iou)
104 | return boxes, scores, classes
105 |
106 | def detect_image(self, image):
107 | start = timer()
108 | if self.model_image_size != (None, None):
109 | assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
110 | assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
111 | boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
112 | else:
113 | new_image_size = (image.width - (image.width % 32),
114 | image.height - (image.height % 32))
115 | boxed_image = letterbox_image(image, new_image_size)
116 | image_data = np.array(boxed_image, dtype='float32')
117 |
118 | #print(image_data.shape)
119 | image_data /= 255.
120 | image_data = np.expand_dims(image_data, 0) # Add batch dimension.
121 | with self.graph.as_default():
122 | out_boxes, out_scores, out_classes = self.sess.run(
123 | [self.boxes, self.scores, self.classes],
124 | feed_dict={
125 | self.yolo_model.input: image_data,
126 | self.input_image_shape: [image.shape[1], image.shape[0]],
127 | K.learning_phase(): 1
128 | })
129 |
130 | print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
131 |
132 | return out_scores, out_classes, image
133 | '''
134 | font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
135 | size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
136 | thickness = (image.size[0] + image.size[1]) // 300
137 |
138 | for i, c in reversed(list(enumerate(out_classes))):
139 | predicted_class = self.class_names[c]
140 | box = out_boxes[i]
141 | score = out_scores[i]
142 |
143 | label = '{} {:.2f}'.format(predicted_class, score)
144 | draw = ImageDraw.Draw(image)
145 | label_size = draw.textsize(label, font)
146 |
147 | top, left, bottom, right = box
148 | top = max(0, np.floor(top + 0.5).astype('int32'))
149 | left = max(0, np.floor(left + 0.5).astype('int32'))
150 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
151 | right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
152 | print(label, (left, top), (right, bottom))
153 |
154 | if top - label_size[1] >= 0:
155 | text_origin = np.array([left, top - label_size[1]])
156 | else:
157 | text_origin = np.array([left, top + 1])
158 |
159 | # My kingdom for a good redistributable image drawing library.
160 | for i in range(thickness):
161 | draw.rectangle(
162 | [left + i, top + i, right - i, bottom - i],
163 | outline=self.colors[c])
164 | draw.rectangle(
165 | [tuple(text_origin), tuple(text_origin + label_size)],
166 | fill=self.colors[c])
167 | draw.text(text_origin, label, fill=(0, 0, 0), font=font)
168 | del draw
169 |
170 | end = timer()
171 | print(end - start)
172 | return image'''
173 |
174 | def close_session(self):
175 | self.sess.close()
176 |
177 | def detect_img(yolo):
178 | while True:
179 | img = input('Input image filename:')
180 | try:
181 | image = Image.open(img)
182 | except:
183 | print('Open Error! Try again!')
184 | continue
185 | else:
186 | r_image = yolo.detect_image(image)
187 | r_image.show()
188 | yolo.close_session()
189 |
190 |
--------------------------------------------------------------------------------
/scripts/classifier/yolo.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/yolo.pyc
--------------------------------------------------------------------------------
/scripts/classifier/yolo3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/yolo3/__init__.py
--------------------------------------------------------------------------------
/scripts/classifier/yolo3/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/yolo3/__init__.pyc
--------------------------------------------------------------------------------
/scripts/classifier/yolo3/model.py:
--------------------------------------------------------------------------------
1 | """YOLO_v3 Model Defined in Keras."""
2 |
3 | from functools import wraps
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from keras import backend as K
8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
9 | from keras.layers.advanced_activations import LeakyReLU
10 | from keras.layers.normalization import BatchNormalization
11 | from keras.models import Model
12 | from keras.regularizers import l2
13 |
14 | from utils import compose
15 |
16 |
17 | @wraps(Conv2D)
18 | def DarknetConv2D(*args, **kwargs):
19 | """Wrapper to set Darknet parameters for Convolution2D."""
20 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
21 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
22 | darknet_conv_kwargs.update(kwargs)
23 | return Conv2D(*args, **darknet_conv_kwargs)
24 |
25 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
26 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
27 | no_bias_kwargs = {'use_bias': False}
28 | no_bias_kwargs.update(kwargs)
29 | return compose(
30 | DarknetConv2D(*args, **no_bias_kwargs),
31 | BatchNormalization(),
32 | LeakyReLU(alpha=0.1))
33 |
34 | def resblock_body(x, num_filters, num_blocks):
35 | '''A series of resblocks starting with a downsampling Convolution2D'''
36 | # Darknet uses left and top padding instead of 'same' mode
37 | x = ZeroPadding2D(((1,0),(1,0)))(x)
38 | x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x)
39 | for i in range(num_blocks):
40 | y = compose(
41 | DarknetConv2D_BN_Leaky(num_filters//2, (1,1)),
42 | DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x)
43 | x = Add()([x,y])
44 | return x
45 |
46 | def darknet_body(x):
47 | '''Darknent body having 52 Convolution2D layers'''
48 | x = DarknetConv2D_BN_Leaky(32, (3,3))(x)
49 | x = resblock_body(x, 64, 1)
50 | x = resblock_body(x, 128, 2)
51 | x = resblock_body(x, 256, 8)
52 | x = resblock_body(x, 512, 8)
53 | x = resblock_body(x, 1024, 4)
54 | return x
55 |
56 | def make_last_layers(x, num_filters, out_filters):
57 | '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
58 | x = compose(
59 | DarknetConv2D_BN_Leaky(num_filters, (1,1)),
60 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
61 | DarknetConv2D_BN_Leaky(num_filters, (1,1)),
62 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
63 | DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x)
64 | y = compose(
65 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
66 | DarknetConv2D(out_filters, (1,1)))(x)
67 | return x, y
68 |
69 |
70 | def yolo_body(inputs, num_anchors, num_classes):
71 | """Create YOLO_V3 model CNN body in Keras."""
72 | darknet = Model(inputs, darknet_body(inputs))
73 | x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5))
74 |
75 | x = compose(
76 | DarknetConv2D_BN_Leaky(256, (1,1)),
77 | UpSampling2D(2))(x)
78 | x = Concatenate()([x,darknet.layers[152].output])
79 | x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5))
80 |
81 | x = compose(
82 | DarknetConv2D_BN_Leaky(128, (1,1)),
83 | UpSampling2D(2))(x)
84 | x = Concatenate()([x,darknet.layers[92].output])
85 | x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5))
86 |
87 | return Model(inputs, [y1,y2,y3])
88 |
89 | def tiny_yolo_body(inputs, num_anchors, num_classes):
90 | '''Create Tiny YOLO_v3 model CNN body in keras.'''
91 | x1 = compose(
92 | DarknetConv2D_BN_Leaky(16, (3,3)),
93 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
94 | DarknetConv2D_BN_Leaky(32, (3,3)),
95 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
96 | DarknetConv2D_BN_Leaky(64, (3,3)),
97 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
98 | DarknetConv2D_BN_Leaky(128, (3,3)),
99 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
100 | DarknetConv2D_BN_Leaky(256, (3,3)))(inputs)
101 | x2 = compose(
102 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
103 | DarknetConv2D_BN_Leaky(512, (3,3)),
104 | MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'),
105 | DarknetConv2D_BN_Leaky(1024, (3,3)),
106 | DarknetConv2D_BN_Leaky(256, (1,1)))(x1)
107 | y1 = compose(
108 | DarknetConv2D_BN_Leaky(512, (3,3)),
109 | DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2)
110 |
111 | x2 = compose(
112 | DarknetConv2D_BN_Leaky(128, (1,1)),
113 | UpSampling2D(2))(x2)
114 | y2 = compose(
115 | Concatenate(),
116 | DarknetConv2D_BN_Leaky(256, (3,3)),
117 | DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1])
118 |
119 | return Model(inputs, [y1,y2])
120 |
121 |
122 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
123 | """Convert final layer features to bounding box parameters."""
124 | num_anchors = len(anchors)
125 | # Reshape to batch, height, width, num_anchors, box_params.
126 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
127 |
128 | grid_shape = K.shape(feats)[1:3] # height, width
129 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
130 | [1, grid_shape[1], 1, 1])
131 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
132 | [grid_shape[0], 1, 1, 1])
133 | grid = K.concatenate([grid_x, grid_y])
134 | grid = K.cast(grid, K.dtype(feats))
135 |
136 | feats = K.reshape(
137 | feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
138 |
139 | # Adjust preditions to each spatial grid point and anchor size.
140 | box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
141 | box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
142 | box_confidence = K.sigmoid(feats[..., 4:5])
143 | box_class_probs = K.sigmoid(feats[..., 5:])
144 |
145 | if calc_loss == True:
146 | return grid, feats, box_xy, box_wh
147 | return box_xy, box_wh, box_confidence, box_class_probs
148 |
149 |
150 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
151 | '''Get corrected boxes'''
152 | box_yx = box_xy[..., ::-1]
153 | box_hw = box_wh[..., ::-1]
154 | input_shape = K.cast(input_shape, K.dtype(box_yx))
155 | image_shape = K.cast(image_shape, K.dtype(box_yx))
156 | new_shape = K.round(image_shape * K.min(input_shape/image_shape))
157 | offset = (input_shape-new_shape)/2./input_shape
158 | scale = input_shape/new_shape
159 | box_yx = (box_yx - offset) * scale
160 | box_hw *= scale
161 |
162 | box_mins = box_yx - (box_hw / 2.)
163 | box_maxes = box_yx + (box_hw / 2.)
164 | boxes = K.concatenate([
165 | box_mins[..., 0:1], # y_min
166 | box_mins[..., 1:2], # x_min
167 | box_maxes[..., 0:1], # y_max
168 | box_maxes[..., 1:2] # x_max
169 | ])
170 |
171 | # Scale boxes back to original image shape.
172 | boxes *= K.concatenate([image_shape, image_shape])
173 | return boxes
174 |
175 |
176 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
177 | '''Process Conv layer output'''
178 | box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
179 | anchors, num_classes, input_shape)
180 | boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
181 | boxes = K.reshape(boxes, [-1, 4])
182 | box_scores = box_confidence * box_class_probs
183 | box_scores = K.reshape(box_scores, [-1, num_classes])
184 | return boxes, box_scores
185 |
186 |
187 | def yolo_eval(yolo_outputs,
188 | anchors,
189 | num_classes,
190 | image_shape,
191 | max_boxes=20,
192 | score_threshold=.6,
193 | iou_threshold=.5):
194 | """Evaluate YOLO model on given input and return filtered boxes."""
195 | num_layers = len(yolo_outputs)
196 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
197 | input_shape = K.shape(yolo_outputs[0])[1:3] * 32
198 | boxes = []
199 | box_scores = []
200 | for l in range(num_layers):
201 | _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
202 | anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
203 | boxes.append(_boxes)
204 | box_scores.append(_box_scores)
205 | boxes = K.concatenate(boxes, axis=0)
206 | box_scores = K.concatenate(box_scores, axis=0)
207 |
208 | mask = box_scores >= score_threshold
209 | max_boxes_tensor = K.constant(max_boxes, dtype='int32')
210 | boxes_ = []
211 | scores_ = []
212 | classes_ = []
213 | for c in range(num_classes):
214 | # TODO: use keras backend instead of tf.
215 | class_boxes = tf.boolean_mask(boxes, mask[:, c])
216 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
217 | nms_index = tf.image.non_max_suppression(
218 | class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
219 | class_boxes = K.gather(class_boxes, nms_index)
220 | class_box_scores = K.gather(class_box_scores, nms_index)
221 | classes = K.ones_like(class_box_scores, 'int32') * c
222 | boxes_.append(class_boxes)
223 | scores_.append(class_box_scores)
224 | classes_.append(classes)
225 | boxes_ = K.concatenate(boxes_, axis=0)
226 | scores_ = K.concatenate(scores_, axis=0)
227 | classes_ = K.concatenate(classes_, axis=0)
228 |
229 | return boxes_, scores_, classes_
230 |
231 |
232 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
233 | '''Preprocess true boxes to training input format
234 |
235 | Parameters
236 | ----------
237 | true_boxes: array, shape=(m, T, 5)
238 | Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
239 | input_shape: array-like, hw, multiples of 32
240 | anchors: array, shape=(N, 2), wh
241 | num_classes: integer
242 |
243 | Returns
244 | -------
245 | y_true: list of array, shape like yolo_outputs, xywh are reletive value
246 |
247 | '''
248 | assert (true_boxes[..., 4]0
269 |
270 | for b in range(m):
271 | # Discard zero rows.
272 | wh = boxes_wh[b, valid_mask[b]]
273 | if len(wh)==0: continue
274 | # Expand dim to apply broadcasting.
275 | wh = np.expand_dims(wh, -2)
276 | box_maxes = wh / 2.
277 | box_mins = -box_maxes
278 |
279 | intersect_mins = np.maximum(box_mins, anchor_mins)
280 | intersect_maxes = np.minimum(box_maxes, anchor_maxes)
281 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
282 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
283 | box_area = wh[..., 0] * wh[..., 1]
284 | anchor_area = anchors[..., 0] * anchors[..., 1]
285 | iou = intersect_area / (box_area + anchor_area - intersect_area)
286 |
287 | # Find best anchor for each true box
288 | best_anchor = np.argmax(iou, axis=-1)
289 |
290 | for t, n in enumerate(best_anchor):
291 | for l in range(num_layers):
292 | if n in anchor_mask[l]:
293 | i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
294 | j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
295 | k = anchor_mask[l].index(n)
296 | c = true_boxes[b,t, 4].astype('int32')
297 | y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
298 | y_true[l][b, j, i, k, 4] = 1
299 | y_true[l][b, j, i, k, 5+c] = 1
300 |
301 | return y_true
302 |
303 |
304 | def box_iou(b1, b2):
305 | '''Return iou tensor
306 |
307 | Parameters
308 | ----------
309 | b1: tensor, shape=(i1,...,iN, 4), xywh
310 | b2: tensor, shape=(j, 4), xywh
311 |
312 | Returns
313 | -------
314 | iou: tensor, shape=(i1,...,iN, j)
315 |
316 | '''
317 |
318 | # Expand dim to apply broadcasting.
319 | b1 = K.expand_dims(b1, -2)
320 | b1_xy = b1[..., :2]
321 | b1_wh = b1[..., 2:4]
322 | b1_wh_half = b1_wh/2.
323 | b1_mins = b1_xy - b1_wh_half
324 | b1_maxes = b1_xy + b1_wh_half
325 |
326 | # Expand dim to apply broadcasting.
327 | b2 = K.expand_dims(b2, 0)
328 | b2_xy = b2[..., :2]
329 | b2_wh = b2[..., 2:4]
330 | b2_wh_half = b2_wh/2.
331 | b2_mins = b2_xy - b2_wh_half
332 | b2_maxes = b2_xy + b2_wh_half
333 |
334 | intersect_mins = K.maximum(b1_mins, b2_mins)
335 | intersect_maxes = K.minimum(b1_maxes, b2_maxes)
336 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
337 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
338 | b1_area = b1_wh[..., 0] * b1_wh[..., 1]
339 | b2_area = b2_wh[..., 0] * b2_wh[..., 1]
340 | iou = intersect_area / (b1_area + b2_area - intersect_area)
341 |
342 | return iou
343 |
344 |
345 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
346 | '''Return yolo_loss tensor
347 |
348 | Parameters
349 | ----------
350 | yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
351 | y_true: list of array, the output of preprocess_true_boxes
352 | anchors: array, shape=(N, 2), wh
353 | num_classes: integer
354 | ignore_thresh: float, the iou threshold whether to ignore object confidence loss
355 |
356 | Returns
357 | -------
358 | loss: tensor, shape=(1,)
359 |
360 | '''
361 | num_layers = len(anchors)//3 # default setting
362 | yolo_outputs = args[:num_layers]
363 | y_true = args[num_layers:]
364 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
365 | input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
366 | grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
367 | loss = 0
368 | m = K.shape(yolo_outputs[0])[0] # batch size, tensor
369 | mf = K.cast(m, K.dtype(yolo_outputs[0]))
370 |
371 | for l in range(num_layers):
372 | object_mask = y_true[l][..., 4:5]
373 | true_class_probs = y_true[l][..., 5:]
374 |
375 | grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
376 | anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
377 | pred_box = K.concatenate([pred_xy, pred_wh])
378 |
379 | # Darknet raw box to calculate loss.
380 | raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
381 | raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
382 | raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
383 | box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
384 |
385 | # Find ignore mask, iterate over each of batch.
386 | ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
387 | object_mask_bool = K.cast(object_mask, 'bool')
388 | def loop_body(b, ignore_mask):
389 | true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
390 | iou = box_iou(pred_box[b], true_box)
391 | best_iou = K.max(iou, axis=-1)
392 | ignore_mask = ignore_mask.write(b, K.cast(best_iou