├── CMakeLists.txt ├── README.md ├── config └── object_detection.yaml ├── launch └── object_detection.launch ├── package.xml └── scripts ├── classifier ├── __init__.py ├── __init__.pyc ├── model │ ├── classes.txt │ ├── tiny_yolo_anchors.txt │ └── yolo.h5 ├── test.py ├── yolo.py ├── yolo.pyc └── yolo3 │ ├── __init__.py │ ├── __init__.pyc │ ├── model.py │ ├── model.pyc │ ├── utils.py │ └── utils.pyc └── object_detection.py /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | project(object_detection) 3 | 4 | ## Compile as C++11, supported in ROS Kinetic and newer 5 | # add_compile_options(-std=c++11) 6 | 7 | ## Find catkin macros and libraries 8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) 9 | ## is used, also find other catkin packages 10 | find_package(catkin REQUIRED COMPONENTS 11 | roscpp 12 | rospy 13 | std_msgs 14 | ) 15 | 16 | ## System dependencies are found with CMake's conventions 17 | # find_package(Boost REQUIRED COMPONENTS system) 18 | 19 | 20 | ## Uncomment this if the package has a setup.py. This macro ensures 21 | ## modules and global scripts declared therein get installed 22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html 23 | # catkin_python_setup() 24 | 25 | ################################################ 26 | ## Declare ROS messages, services and actions ## 27 | ################################################ 28 | 29 | ## To declare and build messages, services or actions from within this 30 | ## package, follow these steps: 31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in 32 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). 33 | ## * In the file package.xml: 34 | ## * add a build_depend tag for "message_generation" 35 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET 36 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in 37 | ## but can be declared for certainty nonetheless: 38 | ## * add a exec_depend tag for "message_runtime" 39 | ## * In this file (CMakeLists.txt): 40 | ## * add "message_generation" and every package in MSG_DEP_SET to 41 | ## find_package(catkin REQUIRED COMPONENTS ...) 42 | ## * add "message_runtime" and every package in MSG_DEP_SET to 43 | ## catkin_package(CATKIN_DEPENDS ...) 44 | ## * uncomment the add_*_files sections below as needed 45 | ## and list every .msg/.srv/.action file to be processed 46 | ## * uncomment the generate_messages entry below 47 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) 48 | 49 | ## Generate messages in the 'msg' folder 50 | # add_message_files( 51 | # FILES 52 | # Message1.msg 53 | # Message2.msg 54 | # ) 55 | 56 | ## Generate services in the 'srv' folder 57 | # add_service_files( 58 | # FILES 59 | # Service1.srv 60 | # Service2.srv 61 | # ) 62 | 63 | ## Generate actions in the 'action' folder 64 | # add_action_files( 65 | # FILES 66 | # Action1.action 67 | # Action2.action 68 | # ) 69 | 70 | ## Generate added messages and services with any dependencies listed here 71 | # generate_messages( 72 | # DEPENDENCIES 73 | # std_msgs 74 | # ) 75 | 76 | ################################################ 77 | ## Declare ROS dynamic reconfigure parameters ## 78 | ################################################ 79 | 80 | ## To declare and build dynamic reconfigure parameters within this 81 | ## package, follow these steps: 82 | ## * In the file package.xml: 83 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" 84 | ## * In this file (CMakeLists.txt): 85 | ## * add "dynamic_reconfigure" to 86 | ## find_package(catkin REQUIRED COMPONENTS ...) 87 | ## * uncomment the "generate_dynamic_reconfigure_options" section below 88 | ## and list every .cfg file to be processed 89 | 90 | ## Generate dynamic reconfigure parameters in the 'cfg' folder 91 | # generate_dynamic_reconfigure_options( 92 | # cfg/DynReconf1.cfg 93 | # cfg/DynReconf2.cfg 94 | # ) 95 | 96 | ################################### 97 | ## catkin specific configuration ## 98 | ################################### 99 | ## The catkin_package macro generates cmake config files for your package 100 | ## Declare things to be passed to dependent projects 101 | ## INCLUDE_DIRS: uncomment this if your package contains header files 102 | ## LIBRARIES: libraries you create in this project that dependent projects also need 103 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 104 | ## DEPENDS: system dependencies of this project that dependent projects also need 105 | catkin_package( 106 | # INCLUDE_DIRS include 107 | # LIBRARIES object_detection 108 | # CATKIN_DEPENDS roscpp rospy std_msgs 109 | # DEPENDS system_lib 110 | ) 111 | 112 | ########### 113 | ## Build ## 114 | ########### 115 | 116 | ## Specify additional locations of header files 117 | ## Your package locations should be listed before other locations 118 | include_directories( 119 | # include 120 | ${catkin_INCLUDE_DIRS} 121 | ) 122 | 123 | ## Declare a C++ library 124 | # add_library(${PROJECT_NAME} 125 | # src/${PROJECT_NAME}/object_detection.cpp 126 | # ) 127 | 128 | ## Add cmake target dependencies of the library 129 | ## as an example, code may need to be generated before libraries 130 | ## either from message generation or dynamic reconfigure 131 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 132 | 133 | ## Declare a C++ executable 134 | ## With catkin_make all packages are built within a single CMake context 135 | ## The recommended prefix ensures that target names across packages don't collide 136 | # add_executable(${PROJECT_NAME}_node src/object_detection_node.cpp) 137 | 138 | ## Rename C++ executable without prefix 139 | ## The above recommended prefix causes long target names, the following renames the 140 | ## target back to the shorter version for ease of user use 141 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" 142 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") 143 | 144 | ## Add cmake target dependencies of the executable 145 | ## same as for the library above 146 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 147 | 148 | ## Specify libraries to link a library or executable target against 149 | # target_link_libraries(${PROJECT_NAME}_node 150 | # ${catkin_LIBRARIES} 151 | # ) 152 | 153 | ############# 154 | ## Install ## 155 | ############# 156 | 157 | # all install targets should use catkin DESTINATION variables 158 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html 159 | 160 | ## Mark executable scripts (Python etc.) for installation 161 | ## in contrast to setup.py, you can choose the destination 162 | # install(PROGRAMS 163 | # scripts/my_python_script 164 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 165 | # ) 166 | 167 | ## Mark executables and/or libraries for installation 168 | # install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node 169 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 170 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 171 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 172 | # ) 173 | 174 | ## Mark cpp header files for installation 175 | # install(DIRECTORY include/${PROJECT_NAME}/ 176 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} 177 | # FILES_MATCHING PATTERN "*.h" 178 | # PATTERN ".svn" EXCLUDE 179 | # ) 180 | 181 | ## Mark other files for installation (e.g. launch and bag files, etc.) 182 | # install(FILES 183 | # # myfile1 184 | # # myfile2 185 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 186 | # ) 187 | 188 | ############# 189 | ## Testing ## 190 | ############# 191 | 192 | ## Add gtest based cpp test target and link libraries 193 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_object_detection.cpp) 194 | # if(TARGET ${PROJECT_NAME}-test) 195 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) 196 | # endif() 197 | 198 | ## Add folders to be run by python nosetests 199 | # catkin_add_nosetests(test) 200 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3-tiny on Jetson tx2 2 | 3 | This is a tested ROS node for YOLOv3-tiny on Jetson tx2. 4 | 5 | Please see the medium post to get the understanding about this repo: https://medium.com/intro-to-artificial-intelligence/run-yolo-v3-as-ros-node-on-jetson-tx2-without-tensorrt-43f562aadc68 6 | 7 | ### Credit 8 | 9 | I have used components of below resources to make this ROS node. Thanking them for the great effort. 10 | 11 | * https://medium.com/@manivannan_data/how-to-train-yolov3-to-detect-custom-objects-ccbcafeb13d2 12 | * https://medium.com/@manivannan_data/how-to-train-yolov2-to-detect-custom-objects-9010df784f36 13 | * https://github.com/qqwweee/keras-yolo3 14 | -------------------------------------------------------------------------------- /config/object_detection.yaml: -------------------------------------------------------------------------------- 1 | classification: 2 | model: '/scripts/classifier/model/yolo.h5' 3 | anchors: '/scripts/classifier/model/tiny_yolo_anchors.txt' 4 | classes: '/scripts/classifier/model/classes.txt' 5 | -------------------------------------------------------------------------------- /launch/object_detection.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | object_detection 4 | 0.0.0 5 | The object_detection package 6 | 7 | 8 | 9 | 10 | nvidia 11 | 12 | 13 | 14 | 15 | 16 | TODO 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | catkin 52 | roscpp 53 | rospy 54 | std_msgs 55 | roscpp 56 | rospy 57 | std_msgs 58 | roscpp 59 | rospy 60 | std_msgs 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /scripts/classifier/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/__init__.py -------------------------------------------------------------------------------- /scripts/classifier/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/__init__.pyc -------------------------------------------------------------------------------- /scripts/classifier/model/classes.txt: -------------------------------------------------------------------------------- 1 | Red 2 | Green 3 | Yellow 4 | -------------------------------------------------------------------------------- /scripts/classifier/model/tiny_yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 2 | -------------------------------------------------------------------------------- /scripts/classifier/model/yolo.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/model/yolo.h5 -------------------------------------------------------------------------------- /scripts/classifier/test.py: -------------------------------------------------------------------------------- 1 | import rospkg 2 | 3 | # get an instance of RosPack with the default search paths 4 | rospack = rospkg.RosPack() 5 | 6 | # get the file path for rospy_tutorials 7 | rospack.get_path('object_detection') 8 | -------------------------------------------------------------------------------- /scripts/classifier/yolo.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Run a YOLO_v3 style detection model on test images. 5 | """ 6 | 7 | import colorsys 8 | import os 9 | from timeit import default_timer as timer 10 | import tensorflow as tf 11 | import numpy as np 12 | from keras import backend as K 13 | from keras.models import load_model 14 | from keras.layers import Input 15 | from keras.backend.tensorflow_backend import set_session 16 | from PIL import Image, ImageFont, ImageDraw 17 | 18 | from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body 19 | from yolo3.utils import letterbox_image 20 | import os 21 | import rospkg 22 | 23 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 24 | #from keras.utils import multi_gpu_model 25 | gpu_num=1 26 | 27 | class YOLO(object): 28 | def __init__(self, model, anchors, classes): 29 | 30 | self.model_path = model 31 | self.anchors_path = anchors 32 | self.classes_path = classes 33 | self.score = 0.3 34 | self.iou = 0.45 35 | self.class_names = self._get_class() 36 | self.anchors = self._get_anchors() 37 | #config = tf.ConfigProto() 38 | #config.gpu_options.allow_growth = True 39 | 40 | config = tf.ConfigProto() 41 | config.gpu_options.per_process_gpu_memory_fraction = 0.3 42 | set_session(tf.Session(config=config)) 43 | #self.sess = tf.Session(config=config) 44 | self.sess = K.get_session() 45 | #K.set_session(self.sess) 46 | self.model_image_size = (416, 416) # fixed size or (None, None), hw 47 | self.boxes, self.scores, self.classes = self.generate() 48 | self.graph = tf.get_default_graph() 49 | 50 | 51 | def _get_class(self): 52 | classes_path = os.path.expanduser(self.classes_path) 53 | with open(classes_path) as f: 54 | class_names = f.readlines() 55 | class_names = [c.strip() for c in class_names] 56 | return class_names 57 | 58 | def _get_anchors(self): 59 | anchors_path = os.path.expanduser(self.anchors_path) 60 | with open(anchors_path) as f: 61 | anchors = f.readline() 62 | anchors = [float(x) for x in anchors.split(',')] 63 | return np.array(anchors).reshape(-1, 2) 64 | 65 | def generate(self): 66 | model_path = os.path.expanduser(self.model_path) 67 | assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' 68 | 69 | # Load model, or construct model and load weights. 70 | num_anchors = len(self.anchors) 71 | num_classes = len(self.class_names) 72 | is_tiny_version = num_anchors==6 # default setting 73 | try: 74 | self.yolo_model = load_model(model_path, compile=False) 75 | except: 76 | self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ 77 | if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) 78 | self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match 79 | else: 80 | assert self.yolo_model.layers[-1].output_shape[-1] == \ 81 | num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ 82 | 'Mismatch between model and given anchor and class sizes' 83 | 84 | print('{} model, anchors, and classes loaded.'.format(model_path)) 85 | 86 | # Generate colors for drawing bounding boxes. 87 | hsv_tuples = [(x / len(self.class_names), 1., 1.) 88 | for x in range(len(self.class_names))] 89 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 90 | self.colors = list( 91 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), 92 | self.colors)) 93 | np.random.seed(10101) # Fixed seed for consistent colors across runs. 94 | np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. 95 | np.random.seed(None) # Reset seed to default. 96 | 97 | # Generate output tensor targets for filtered bounding boxes. 98 | self.input_image_shape = K.placeholder(shape=(2, )) 99 | #if gpu_num>=2: 100 | # self.yolo_model = multi_gpu_model(self.yolo_model, gpus=gpu_num) 101 | boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, 102 | len(self.class_names), self.input_image_shape, 103 | score_threshold=self.score, iou_threshold=self.iou) 104 | return boxes, scores, classes 105 | 106 | def detect_image(self, image): 107 | start = timer() 108 | if self.model_image_size != (None, None): 109 | assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' 110 | assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' 111 | boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) 112 | else: 113 | new_image_size = (image.width - (image.width % 32), 114 | image.height - (image.height % 32)) 115 | boxed_image = letterbox_image(image, new_image_size) 116 | image_data = np.array(boxed_image, dtype='float32') 117 | 118 | #print(image_data.shape) 119 | image_data /= 255. 120 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 121 | with self.graph.as_default(): 122 | out_boxes, out_scores, out_classes = self.sess.run( 123 | [self.boxes, self.scores, self.classes], 124 | feed_dict={ 125 | self.yolo_model.input: image_data, 126 | self.input_image_shape: [image.shape[1], image.shape[0]], 127 | K.learning_phase(): 1 128 | }) 129 | 130 | print('Found {} boxes for {}'.format(len(out_boxes), 'img')) 131 | 132 | return out_scores, out_classes, image 133 | ''' 134 | font = ImageFont.truetype(font='font/FiraMono-Medium.otf', 135 | size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 136 | thickness = (image.size[0] + image.size[1]) // 300 137 | 138 | for i, c in reversed(list(enumerate(out_classes))): 139 | predicted_class = self.class_names[c] 140 | box = out_boxes[i] 141 | score = out_scores[i] 142 | 143 | label = '{} {:.2f}'.format(predicted_class, score) 144 | draw = ImageDraw.Draw(image) 145 | label_size = draw.textsize(label, font) 146 | 147 | top, left, bottom, right = box 148 | top = max(0, np.floor(top + 0.5).astype('int32')) 149 | left = max(0, np.floor(left + 0.5).astype('int32')) 150 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) 151 | right = min(image.size[0], np.floor(right + 0.5).astype('int32')) 152 | print(label, (left, top), (right, bottom)) 153 | 154 | if top - label_size[1] >= 0: 155 | text_origin = np.array([left, top - label_size[1]]) 156 | else: 157 | text_origin = np.array([left, top + 1]) 158 | 159 | # My kingdom for a good redistributable image drawing library. 160 | for i in range(thickness): 161 | draw.rectangle( 162 | [left + i, top + i, right - i, bottom - i], 163 | outline=self.colors[c]) 164 | draw.rectangle( 165 | [tuple(text_origin), tuple(text_origin + label_size)], 166 | fill=self.colors[c]) 167 | draw.text(text_origin, label, fill=(0, 0, 0), font=font) 168 | del draw 169 | 170 | end = timer() 171 | print(end - start) 172 | return image''' 173 | 174 | def close_session(self): 175 | self.sess.close() 176 | 177 | def detect_img(yolo): 178 | while True: 179 | img = input('Input image filename:') 180 | try: 181 | image = Image.open(img) 182 | except: 183 | print('Open Error! Try again!') 184 | continue 185 | else: 186 | r_image = yolo.detect_image(image) 187 | r_image.show() 188 | yolo.close_session() 189 | 190 | -------------------------------------------------------------------------------- /scripts/classifier/yolo.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/yolo.pyc -------------------------------------------------------------------------------- /scripts/classifier/yolo3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/yolo3/__init__.py -------------------------------------------------------------------------------- /scripts/classifier/yolo3/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dkarunakaran/ROS-node-YOLO-v3-tiny/03aab28756984b8ac3fd5a0f3b9676323a6bf0dc/scripts/classifier/yolo3/__init__.pyc -------------------------------------------------------------------------------- /scripts/classifier/yolo3/model.py: -------------------------------------------------------------------------------- 1 | """YOLO_v3 Model Defined in Keras.""" 2 | 3 | from functools import wraps 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D 9 | from keras.layers.advanced_activations import LeakyReLU 10 | from keras.layers.normalization import BatchNormalization 11 | from keras.models import Model 12 | from keras.regularizers import l2 13 | 14 | from utils import compose 15 | 16 | 17 | @wraps(Conv2D) 18 | def DarknetConv2D(*args, **kwargs): 19 | """Wrapper to set Darknet parameters for Convolution2D.""" 20 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 21 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 22 | darknet_conv_kwargs.update(kwargs) 23 | return Conv2D(*args, **darknet_conv_kwargs) 24 | 25 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 26 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" 27 | no_bias_kwargs = {'use_bias': False} 28 | no_bias_kwargs.update(kwargs) 29 | return compose( 30 | DarknetConv2D(*args, **no_bias_kwargs), 31 | BatchNormalization(), 32 | LeakyReLU(alpha=0.1)) 33 | 34 | def resblock_body(x, num_filters, num_blocks): 35 | '''A series of resblocks starting with a downsampling Convolution2D''' 36 | # Darknet uses left and top padding instead of 'same' mode 37 | x = ZeroPadding2D(((1,0),(1,0)))(x) 38 | x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x) 39 | for i in range(num_blocks): 40 | y = compose( 41 | DarknetConv2D_BN_Leaky(num_filters//2, (1,1)), 42 | DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x) 43 | x = Add()([x,y]) 44 | return x 45 | 46 | def darknet_body(x): 47 | '''Darknent body having 52 Convolution2D layers''' 48 | x = DarknetConv2D_BN_Leaky(32, (3,3))(x) 49 | x = resblock_body(x, 64, 1) 50 | x = resblock_body(x, 128, 2) 51 | x = resblock_body(x, 256, 8) 52 | x = resblock_body(x, 512, 8) 53 | x = resblock_body(x, 1024, 4) 54 | return x 55 | 56 | def make_last_layers(x, num_filters, out_filters): 57 | '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer''' 58 | x = compose( 59 | DarknetConv2D_BN_Leaky(num_filters, (1,1)), 60 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 61 | DarknetConv2D_BN_Leaky(num_filters, (1,1)), 62 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 63 | DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x) 64 | y = compose( 65 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 66 | DarknetConv2D(out_filters, (1,1)))(x) 67 | return x, y 68 | 69 | 70 | def yolo_body(inputs, num_anchors, num_classes): 71 | """Create YOLO_V3 model CNN body in Keras.""" 72 | darknet = Model(inputs, darknet_body(inputs)) 73 | x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5)) 74 | 75 | x = compose( 76 | DarknetConv2D_BN_Leaky(256, (1,1)), 77 | UpSampling2D(2))(x) 78 | x = Concatenate()([x,darknet.layers[152].output]) 79 | x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5)) 80 | 81 | x = compose( 82 | DarknetConv2D_BN_Leaky(128, (1,1)), 83 | UpSampling2D(2))(x) 84 | x = Concatenate()([x,darknet.layers[92].output]) 85 | x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5)) 86 | 87 | return Model(inputs, [y1,y2,y3]) 88 | 89 | def tiny_yolo_body(inputs, num_anchors, num_classes): 90 | '''Create Tiny YOLO_v3 model CNN body in keras.''' 91 | x1 = compose( 92 | DarknetConv2D_BN_Leaky(16, (3,3)), 93 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 94 | DarknetConv2D_BN_Leaky(32, (3,3)), 95 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 96 | DarknetConv2D_BN_Leaky(64, (3,3)), 97 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 98 | DarknetConv2D_BN_Leaky(128, (3,3)), 99 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 100 | DarknetConv2D_BN_Leaky(256, (3,3)))(inputs) 101 | x2 = compose( 102 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 103 | DarknetConv2D_BN_Leaky(512, (3,3)), 104 | MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'), 105 | DarknetConv2D_BN_Leaky(1024, (3,3)), 106 | DarknetConv2D_BN_Leaky(256, (1,1)))(x1) 107 | y1 = compose( 108 | DarknetConv2D_BN_Leaky(512, (3,3)), 109 | DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) 110 | 111 | x2 = compose( 112 | DarknetConv2D_BN_Leaky(128, (1,1)), 113 | UpSampling2D(2))(x2) 114 | y2 = compose( 115 | Concatenate(), 116 | DarknetConv2D_BN_Leaky(256, (3,3)), 117 | DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1]) 118 | 119 | return Model(inputs, [y1,y2]) 120 | 121 | 122 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): 123 | """Convert final layer features to bounding box parameters.""" 124 | num_anchors = len(anchors) 125 | # Reshape to batch, height, width, num_anchors, box_params. 126 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) 127 | 128 | grid_shape = K.shape(feats)[1:3] # height, width 129 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), 130 | [1, grid_shape[1], 1, 1]) 131 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), 132 | [grid_shape[0], 1, 1, 1]) 133 | grid = K.concatenate([grid_x, grid_y]) 134 | grid = K.cast(grid, K.dtype(feats)) 135 | 136 | feats = K.reshape( 137 | feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) 138 | 139 | # Adjust preditions to each spatial grid point and anchor size. 140 | box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) 141 | box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) 142 | box_confidence = K.sigmoid(feats[..., 4:5]) 143 | box_class_probs = K.sigmoid(feats[..., 5:]) 144 | 145 | if calc_loss == True: 146 | return grid, feats, box_xy, box_wh 147 | return box_xy, box_wh, box_confidence, box_class_probs 148 | 149 | 150 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): 151 | '''Get corrected boxes''' 152 | box_yx = box_xy[..., ::-1] 153 | box_hw = box_wh[..., ::-1] 154 | input_shape = K.cast(input_shape, K.dtype(box_yx)) 155 | image_shape = K.cast(image_shape, K.dtype(box_yx)) 156 | new_shape = K.round(image_shape * K.min(input_shape/image_shape)) 157 | offset = (input_shape-new_shape)/2./input_shape 158 | scale = input_shape/new_shape 159 | box_yx = (box_yx - offset) * scale 160 | box_hw *= scale 161 | 162 | box_mins = box_yx - (box_hw / 2.) 163 | box_maxes = box_yx + (box_hw / 2.) 164 | boxes = K.concatenate([ 165 | box_mins[..., 0:1], # y_min 166 | box_mins[..., 1:2], # x_min 167 | box_maxes[..., 0:1], # y_max 168 | box_maxes[..., 1:2] # x_max 169 | ]) 170 | 171 | # Scale boxes back to original image shape. 172 | boxes *= K.concatenate([image_shape, image_shape]) 173 | return boxes 174 | 175 | 176 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): 177 | '''Process Conv layer output''' 178 | box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, 179 | anchors, num_classes, input_shape) 180 | boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) 181 | boxes = K.reshape(boxes, [-1, 4]) 182 | box_scores = box_confidence * box_class_probs 183 | box_scores = K.reshape(box_scores, [-1, num_classes]) 184 | return boxes, box_scores 185 | 186 | 187 | def yolo_eval(yolo_outputs, 188 | anchors, 189 | num_classes, 190 | image_shape, 191 | max_boxes=20, 192 | score_threshold=.6, 193 | iou_threshold=.5): 194 | """Evaluate YOLO model on given input and return filtered boxes.""" 195 | num_layers = len(yolo_outputs) 196 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting 197 | input_shape = K.shape(yolo_outputs[0])[1:3] * 32 198 | boxes = [] 199 | box_scores = [] 200 | for l in range(num_layers): 201 | _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], 202 | anchors[anchor_mask[l]], num_classes, input_shape, image_shape) 203 | boxes.append(_boxes) 204 | box_scores.append(_box_scores) 205 | boxes = K.concatenate(boxes, axis=0) 206 | box_scores = K.concatenate(box_scores, axis=0) 207 | 208 | mask = box_scores >= score_threshold 209 | max_boxes_tensor = K.constant(max_boxes, dtype='int32') 210 | boxes_ = [] 211 | scores_ = [] 212 | classes_ = [] 213 | for c in range(num_classes): 214 | # TODO: use keras backend instead of tf. 215 | class_boxes = tf.boolean_mask(boxes, mask[:, c]) 216 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) 217 | nms_index = tf.image.non_max_suppression( 218 | class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) 219 | class_boxes = K.gather(class_boxes, nms_index) 220 | class_box_scores = K.gather(class_box_scores, nms_index) 221 | classes = K.ones_like(class_box_scores, 'int32') * c 222 | boxes_.append(class_boxes) 223 | scores_.append(class_box_scores) 224 | classes_.append(classes) 225 | boxes_ = K.concatenate(boxes_, axis=0) 226 | scores_ = K.concatenate(scores_, axis=0) 227 | classes_ = K.concatenate(classes_, axis=0) 228 | 229 | return boxes_, scores_, classes_ 230 | 231 | 232 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): 233 | '''Preprocess true boxes to training input format 234 | 235 | Parameters 236 | ---------- 237 | true_boxes: array, shape=(m, T, 5) 238 | Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape. 239 | input_shape: array-like, hw, multiples of 32 240 | anchors: array, shape=(N, 2), wh 241 | num_classes: integer 242 | 243 | Returns 244 | ------- 245 | y_true: list of array, shape like yolo_outputs, xywh are reletive value 246 | 247 | ''' 248 | assert (true_boxes[..., 4]0 269 | 270 | for b in range(m): 271 | # Discard zero rows. 272 | wh = boxes_wh[b, valid_mask[b]] 273 | if len(wh)==0: continue 274 | # Expand dim to apply broadcasting. 275 | wh = np.expand_dims(wh, -2) 276 | box_maxes = wh / 2. 277 | box_mins = -box_maxes 278 | 279 | intersect_mins = np.maximum(box_mins, anchor_mins) 280 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 281 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 282 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 283 | box_area = wh[..., 0] * wh[..., 1] 284 | anchor_area = anchors[..., 0] * anchors[..., 1] 285 | iou = intersect_area / (box_area + anchor_area - intersect_area) 286 | 287 | # Find best anchor for each true box 288 | best_anchor = np.argmax(iou, axis=-1) 289 | 290 | for t, n in enumerate(best_anchor): 291 | for l in range(num_layers): 292 | if n in anchor_mask[l]: 293 | i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32') 294 | j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32') 295 | k = anchor_mask[l].index(n) 296 | c = true_boxes[b,t, 4].astype('int32') 297 | y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4] 298 | y_true[l][b, j, i, k, 4] = 1 299 | y_true[l][b, j, i, k, 5+c] = 1 300 | 301 | return y_true 302 | 303 | 304 | def box_iou(b1, b2): 305 | '''Return iou tensor 306 | 307 | Parameters 308 | ---------- 309 | b1: tensor, shape=(i1,...,iN, 4), xywh 310 | b2: tensor, shape=(j, 4), xywh 311 | 312 | Returns 313 | ------- 314 | iou: tensor, shape=(i1,...,iN, j) 315 | 316 | ''' 317 | 318 | # Expand dim to apply broadcasting. 319 | b1 = K.expand_dims(b1, -2) 320 | b1_xy = b1[..., :2] 321 | b1_wh = b1[..., 2:4] 322 | b1_wh_half = b1_wh/2. 323 | b1_mins = b1_xy - b1_wh_half 324 | b1_maxes = b1_xy + b1_wh_half 325 | 326 | # Expand dim to apply broadcasting. 327 | b2 = K.expand_dims(b2, 0) 328 | b2_xy = b2[..., :2] 329 | b2_wh = b2[..., 2:4] 330 | b2_wh_half = b2_wh/2. 331 | b2_mins = b2_xy - b2_wh_half 332 | b2_maxes = b2_xy + b2_wh_half 333 | 334 | intersect_mins = K.maximum(b1_mins, b2_mins) 335 | intersect_maxes = K.minimum(b1_maxes, b2_maxes) 336 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) 337 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 338 | b1_area = b1_wh[..., 0] * b1_wh[..., 1] 339 | b2_area = b2_wh[..., 0] * b2_wh[..., 1] 340 | iou = intersect_area / (b1_area + b2_area - intersect_area) 341 | 342 | return iou 343 | 344 | 345 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): 346 | '''Return yolo_loss tensor 347 | 348 | Parameters 349 | ---------- 350 | yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body 351 | y_true: list of array, the output of preprocess_true_boxes 352 | anchors: array, shape=(N, 2), wh 353 | num_classes: integer 354 | ignore_thresh: float, the iou threshold whether to ignore object confidence loss 355 | 356 | Returns 357 | ------- 358 | loss: tensor, shape=(1,) 359 | 360 | ''' 361 | num_layers = len(anchors)//3 # default setting 362 | yolo_outputs = args[:num_layers] 363 | y_true = args[num_layers:] 364 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] 365 | input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) 366 | grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] 367 | loss = 0 368 | m = K.shape(yolo_outputs[0])[0] # batch size, tensor 369 | mf = K.cast(m, K.dtype(yolo_outputs[0])) 370 | 371 | for l in range(num_layers): 372 | object_mask = y_true[l][..., 4:5] 373 | true_class_probs = y_true[l][..., 5:] 374 | 375 | grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], 376 | anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) 377 | pred_box = K.concatenate([pred_xy, pred_wh]) 378 | 379 | # Darknet raw box to calculate loss. 380 | raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid 381 | raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) 382 | raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf 383 | box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] 384 | 385 | # Find ignore mask, iterate over each of batch. 386 | ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) 387 | object_mask_bool = K.cast(object_mask, 'bool') 388 | def loop_body(b, ignore_mask): 389 | true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) 390 | iou = box_iou(pred_box[b], true_box) 391 | best_iou = K.max(iou, axis=-1) 392 | ignore_mask = ignore_mask.write(b, K.cast(best_iou