├── .gitignore
├── CHANGELOG.rst
├── CMakeLists.txt
├── README.md
├── launch
    └── dnn_detect.launch
├── model
    ├── MobileNetSSD_deploy.caffemodel
    └── MobileNetSSD_deploy.prototxt.txt
├── msg
    ├── DetectedObject.msg
    └── DetectedObjectArray.msg
├── package.xml
├── src
    └── dnn_detect.cpp
├── srv
    └── Detect.srv
└── test
    ├── dnn_images.test
    ├── dnn_images_test.cpp
    └── test_images
        └── cat.jpg


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 2 | Changelog for package dnn_detect
 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 4 | 
 5 | 0.1.0 (2020-09-21)
 6 | ------------------
 7 | * dnn_images_test.cpp - support opencv version 4
 8 | * Noetic support
 9 | * Contributors: Jim Vaughan, Rohan Agrawal, Tim
10 | 
11 | 0.0.3 (2018-02-16)
12 | ------------------
13 | * Add optional rotation of image
14 | * Added one shot mode, which requires a service call to trigger detection.
15 | * Update README.md
16 | * Contributors: Jim Vaughan
17 | 
18 | 0.0.2 (2017-12-03)
19 | ------------------
20 | * Initial commit
21 | * Contributors: Jim Vaughan
22 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | cmake_minimum_required(VERSION 2.8.3)
 3 | project(dnn_detect)
 4 | 
 5 | find_package(catkin REQUIRED COMPONENTS
 6 |   roscpp
 7 |   tf2_geometry_msgs
 8 |   tf2_ros
 9 |   tf2
10 |   visualization_msgs
11 |   image_transport
12 |   cv_bridge
13 |   std_msgs
14 | )
15 | 
16 | find_package(OpenCV REQUIRED)
17 | 
18 | 
19 | add_message_files(
20 |    FILES
21 |    DetectedObject.msg
22 |    DetectedObjectArray.msg
23 | )
24 | 
25 | 
26 | add_service_files(
27 |   FILES
28 |   Detect.srv
29 | )
30 | 
31 | generate_messages(
32 |   DEPENDENCIES
33 |   std_msgs
34 | )
35 | catkin_package(INCLUDE_DIRS DEPENDS OpenCV)
36 | 
37 | ###########
38 | ## Build ##
39 | ###########
40 | 
41 | 
42 | add_definitions(-std=c++11)
43 | 
44 | include_directories(${catkin_INCLUDE_DIRS})
45 | include_directories(${OpenCV_INCLUDE_DIRS})
46 | 
47 | add_executable(dnn_detect src/dnn_detect.cpp)
48 | 
49 | add_dependencies(dnn_detect ${${PROJECT_NAME}_EXPORTED_TARGETS}
50 |                  ${catkin_EXPORTED_TARGETS})
51 | 
52 | target_link_libraries(dnn_detect ${catkin_LIBRARIES} ${OpenCV_LIBS})
53 | 
54 | #############
55 | ## Install ##
56 | #############
57 | 
58 | ## Mark executables and/or libraries for installation
59 | install(TARGETS dnn_detect
60 |    ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
61 |    LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
62 |    RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
63 | )
64 | 
65 | install(DIRECTORY launch/
66 |         DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/launch
67 | ) 
68 | 
69 | install(DIRECTORY model/
70 |         DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/model
71 | ) 
72 | 
73 | ###########
74 | ## Tests ##
75 | ###########
76 | 
77 | if(CATKIN_ENABLE_TESTING)
78 |         find_package(rostest REQUIRED)
79 | 
80 |         # Tests need c++11
81 |         add_definitions(-std=c++11)
82 |         
83 |         add_rostest_gtest(dnn_images_test 
84 |           test/dnn_images.test 
85 |           test/dnn_images_test.cpp)
86 |         add_dependencies(dnn_images_test ${PROJECT_NAME}_generate_messages)
87 |         target_link_libraries(dnn_images_test ${catkin_LIBRARIES} ${OpenCV_LIBS})
88 | endif()
89 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # dnn_detect
3 | 
4 | This package provides object detection using OpenCV's Deep Neural Network module.
5 | 
6 | Documentation is at [http://wiki.ros.org/dnn_detect](http://wiki.ros.org/dnn_detect).
7 | 
8 | The model used by default is from [chuanqi305's MobileNet-SSD](https://github.com/chuanqi305/MobileNet-SSD).
9 | 


--------------------------------------------------------------------------------
/launch/dnn_detect.launch:
--------------------------------------------------------------------------------
 1 | <!-- Run the dnn_detect node -->
 2 | <launch>
 3 |   <!-- namespace for camera input -->
 4 |   <arg name="camera" default="/camera"/>
 5 |   <arg name="image" default="image"/>
 6 |   <arg name="transport" default="compressed"/>
 7 | 
 8 |   <node pkg="dnn_detect" name="dnn_detect"
 9 |     type="dnn_detect" output="screen" respawn="false">
10 |     <param name="image_transport" value="$(arg transport)"/>
11 |     <param name="publish_images" value="true" />
12 |     <param name="data_dir" value="$(find dnn_detect)/model"/>
13 |     <remap from="/camera/compressed" 
14 |         to="$(arg camera)/$(arg image)/$(arg transport)"/>
15 |     <remap from="/camera_info" to="$(arg camera)/camera_info"/>
16 |   </node>
17 | </launch>
18 | 


--------------------------------------------------------------------------------
/model/MobileNetSSD_deploy.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UbiquityRobotics/dnn_detect/c23161c9c1c2a2bd15618b6b3450522ac8aad2cb/model/MobileNetSSD_deploy.caffemodel


--------------------------------------------------------------------------------
/model/MobileNetSSD_deploy.prototxt.txt:
--------------------------------------------------------------------------------
   1 | name: "MobileNet-SSD"
   2 | input: "data"
   3 | input_shape {
   4 |   dim: 1
   5 |   dim: 3
   6 |   dim: 300
   7 |   dim: 300
   8 | }
   9 | layer {
  10 |   name: "conv0"
  11 |   type: "Convolution"
  12 |   bottom: "data"
  13 |   top: "conv0"
  14 |   param {
  15 |     lr_mult: 1.0
  16 |     decay_mult: 1.0
  17 |   }
  18 |   param {
  19 |     lr_mult: 2.0
  20 |     decay_mult: 0.0
  21 |   }
  22 |   convolution_param {
  23 |     num_output: 32
  24 |     pad: 1
  25 |     kernel_size: 3
  26 |     stride: 2
  27 |     weight_filler {
  28 |       type: "msra"
  29 |     }
  30 |     bias_filler {
  31 |       type: "constant"
  32 |       value: 0.0
  33 |     }
  34 |   }
  35 | }
  36 | layer {
  37 |   name: "conv0/relu"
  38 |   type: "ReLU"
  39 |   bottom: "conv0"
  40 |   top: "conv0"
  41 | }
  42 | layer {
  43 |   name: "conv1/dw"
  44 |   type: "Convolution"
  45 |   bottom: "conv0"
  46 |   top: "conv1/dw"
  47 |   param {
  48 |     lr_mult: 1.0
  49 |     decay_mult: 1.0
  50 |   }
  51 |   param {
  52 |     lr_mult: 2.0
  53 |     decay_mult: 0.0
  54 |   }
  55 |   convolution_param {
  56 |     num_output: 32
  57 |     pad: 1
  58 |     kernel_size: 3
  59 |     group: 32
  60 |     engine: CAFFE
  61 |     weight_filler {
  62 |       type: "msra"
  63 |     }
  64 |     bias_filler {
  65 |       type: "constant"
  66 |       value: 0.0
  67 |     }
  68 |   }
  69 | }
  70 | layer {
  71 |   name: "conv1/dw/relu"
  72 |   type: "ReLU"
  73 |   bottom: "conv1/dw"
  74 |   top: "conv1/dw"
  75 | }
  76 | layer {
  77 |   name: "conv1"
  78 |   type: "Convolution"
  79 |   bottom: "conv1/dw"
  80 |   top: "conv1"
  81 |   param {
  82 |     lr_mult: 1.0
  83 |     decay_mult: 1.0
  84 |   }
  85 |   param {
  86 |     lr_mult: 2.0
  87 |     decay_mult: 0.0
  88 |   }
  89 |   convolution_param {
  90 |     num_output: 64
  91 |     kernel_size: 1
  92 |     weight_filler {
  93 |       type: "msra"
  94 |     }
  95 |     bias_filler {
  96 |       type: "constant"
  97 |       value: 0.0
  98 |     }
  99 |   }
 100 | }
 101 | layer {
 102 |   name: "conv1/relu"
 103 |   type: "ReLU"
 104 |   bottom: "conv1"
 105 |   top: "conv1"
 106 | }
 107 | layer {
 108 |   name: "conv2/dw"
 109 |   type: "Convolution"
 110 |   bottom: "conv1"
 111 |   top: "conv2/dw"
 112 |   param {
 113 |     lr_mult: 1.0
 114 |     decay_mult: 1.0
 115 |   }
 116 |   param {
 117 |     lr_mult: 2.0
 118 |     decay_mult: 0.0
 119 |   }
 120 |   convolution_param {
 121 |     num_output: 64
 122 |     pad: 1
 123 |     kernel_size: 3
 124 |     stride: 2
 125 |     group: 64
 126 |     engine: CAFFE
 127 |     weight_filler {
 128 |       type: "msra"
 129 |     }
 130 |     bias_filler {
 131 |       type: "constant"
 132 |       value: 0.0
 133 |     }
 134 |   }
 135 | }
 136 | layer {
 137 |   name: "conv2/dw/relu"
 138 |   type: "ReLU"
 139 |   bottom: "conv2/dw"
 140 |   top: "conv2/dw"
 141 | }
 142 | layer {
 143 |   name: "conv2"
 144 |   type: "Convolution"
 145 |   bottom: "conv2/dw"
 146 |   top: "conv2"
 147 |   param {
 148 |     lr_mult: 1.0
 149 |     decay_mult: 1.0
 150 |   }
 151 |   param {
 152 |     lr_mult: 2.0
 153 |     decay_mult: 0.0
 154 |   }
 155 |   convolution_param {
 156 |     num_output: 128
 157 |     kernel_size: 1
 158 |     weight_filler {
 159 |       type: "msra"
 160 |     }
 161 |     bias_filler {
 162 |       type: "constant"
 163 |       value: 0.0
 164 |     }
 165 |   }
 166 | }
 167 | layer {
 168 |   name: "conv2/relu"
 169 |   type: "ReLU"
 170 |   bottom: "conv2"
 171 |   top: "conv2"
 172 | }
 173 | layer {
 174 |   name: "conv3/dw"
 175 |   type: "Convolution"
 176 |   bottom: "conv2"
 177 |   top: "conv3/dw"
 178 |   param {
 179 |     lr_mult: 1.0
 180 |     decay_mult: 1.0
 181 |   }
 182 |   param {
 183 |     lr_mult: 2.0
 184 |     decay_mult: 0.0
 185 |   }
 186 |   convolution_param {
 187 |     num_output: 128
 188 |     pad: 1
 189 |     kernel_size: 3
 190 |     group: 128
 191 |     engine: CAFFE
 192 |     weight_filler {
 193 |       type: "msra"
 194 |     }
 195 |     bias_filler {
 196 |       type: "constant"
 197 |       value: 0.0
 198 |     }
 199 |   }
 200 | }
 201 | layer {
 202 |   name: "conv3/dw/relu"
 203 |   type: "ReLU"
 204 |   bottom: "conv3/dw"
 205 |   top: "conv3/dw"
 206 | }
 207 | layer {
 208 |   name: "conv3"
 209 |   type: "Convolution"
 210 |   bottom: "conv3/dw"
 211 |   top: "conv3"
 212 |   param {
 213 |     lr_mult: 1.0
 214 |     decay_mult: 1.0
 215 |   }
 216 |   param {
 217 |     lr_mult: 2.0
 218 |     decay_mult: 0.0
 219 |   }
 220 |   convolution_param {
 221 |     num_output: 128
 222 |     kernel_size: 1
 223 |     weight_filler {
 224 |       type: "msra"
 225 |     }
 226 |     bias_filler {
 227 |       type: "constant"
 228 |       value: 0.0
 229 |     }
 230 |   }
 231 | }
 232 | layer {
 233 |   name: "conv3/relu"
 234 |   type: "ReLU"
 235 |   bottom: "conv3"
 236 |   top: "conv3"
 237 | }
 238 | layer {
 239 |   name: "conv4/dw"
 240 |   type: "Convolution"
 241 |   bottom: "conv3"
 242 |   top: "conv4/dw"
 243 |   param {
 244 |     lr_mult: 1.0
 245 |     decay_mult: 1.0
 246 |   }
 247 |   param {
 248 |     lr_mult: 2.0
 249 |     decay_mult: 0.0
 250 |   }
 251 |   convolution_param {
 252 |     num_output: 128
 253 |     pad: 1
 254 |     kernel_size: 3
 255 |     stride: 2
 256 |     group: 128
 257 |     engine: CAFFE
 258 |     weight_filler {
 259 |       type: "msra"
 260 |     }
 261 |     bias_filler {
 262 |       type: "constant"
 263 |       value: 0.0
 264 |     }
 265 |   }
 266 | }
 267 | layer {
 268 |   name: "conv4/dw/relu"
 269 |   type: "ReLU"
 270 |   bottom: "conv4/dw"
 271 |   top: "conv4/dw"
 272 | }
 273 | layer {
 274 |   name: "conv4"
 275 |   type: "Convolution"
 276 |   bottom: "conv4/dw"
 277 |   top: "conv4"
 278 |   param {
 279 |     lr_mult: 1.0
 280 |     decay_mult: 1.0
 281 |   }
 282 |   param {
 283 |     lr_mult: 2.0
 284 |     decay_mult: 0.0
 285 |   }
 286 |   convolution_param {
 287 |     num_output: 256
 288 |     kernel_size: 1
 289 |     weight_filler {
 290 |       type: "msra"
 291 |     }
 292 |     bias_filler {
 293 |       type: "constant"
 294 |       value: 0.0
 295 |     }
 296 |   }
 297 | }
 298 | layer {
 299 |   name: "conv4/relu"
 300 |   type: "ReLU"
 301 |   bottom: "conv4"
 302 |   top: "conv4"
 303 | }
 304 | layer {
 305 |   name: "conv5/dw"
 306 |   type: "Convolution"
 307 |   bottom: "conv4"
 308 |   top: "conv5/dw"
 309 |   param {
 310 |     lr_mult: 1.0
 311 |     decay_mult: 1.0
 312 |   }
 313 |   param {
 314 |     lr_mult: 2.0
 315 |     decay_mult: 0.0
 316 |   }
 317 |   convolution_param {
 318 |     num_output: 256
 319 |     pad: 1
 320 |     kernel_size: 3
 321 |     group: 256
 322 |     engine: CAFFE
 323 |     weight_filler {
 324 |       type: "msra"
 325 |     }
 326 |     bias_filler {
 327 |       type: "constant"
 328 |       value: 0.0
 329 |     }
 330 |   }
 331 | }
 332 | layer {
 333 |   name: "conv5/dw/relu"
 334 |   type: "ReLU"
 335 |   bottom: "conv5/dw"
 336 |   top: "conv5/dw"
 337 | }
 338 | layer {
 339 |   name: "conv5"
 340 |   type: "Convolution"
 341 |   bottom: "conv5/dw"
 342 |   top: "conv5"
 343 |   param {
 344 |     lr_mult: 1.0
 345 |     decay_mult: 1.0
 346 |   }
 347 |   param {
 348 |     lr_mult: 2.0
 349 |     decay_mult: 0.0
 350 |   }
 351 |   convolution_param {
 352 |     num_output: 256
 353 |     kernel_size: 1
 354 |     weight_filler {
 355 |       type: "msra"
 356 |     }
 357 |     bias_filler {
 358 |       type: "constant"
 359 |       value: 0.0
 360 |     }
 361 |   }
 362 | }
 363 | layer {
 364 |   name: "conv5/relu"
 365 |   type: "ReLU"
 366 |   bottom: "conv5"
 367 |   top: "conv5"
 368 | }
 369 | layer {
 370 |   name: "conv6/dw"
 371 |   type: "Convolution"
 372 |   bottom: "conv5"
 373 |   top: "conv6/dw"
 374 |   param {
 375 |     lr_mult: 1.0
 376 |     decay_mult: 1.0
 377 |   }
 378 |   param {
 379 |     lr_mult: 2.0
 380 |     decay_mult: 0.0
 381 |   }
 382 |   convolution_param {
 383 |     num_output: 256
 384 |     pad: 1
 385 |     kernel_size: 3
 386 |     stride: 2
 387 |     group: 256
 388 |     engine: CAFFE
 389 |     weight_filler {
 390 |       type: "msra"
 391 |     }
 392 |     bias_filler {
 393 |       type: "constant"
 394 |       value: 0.0
 395 |     }
 396 |   }
 397 | }
 398 | layer {
 399 |   name: "conv6/dw/relu"
 400 |   type: "ReLU"
 401 |   bottom: "conv6/dw"
 402 |   top: "conv6/dw"
 403 | }
 404 | layer {
 405 |   name: "conv6"
 406 |   type: "Convolution"
 407 |   bottom: "conv6/dw"
 408 |   top: "conv6"
 409 |   param {
 410 |     lr_mult: 1.0
 411 |     decay_mult: 1.0
 412 |   }
 413 |   param {
 414 |     lr_mult: 2.0
 415 |     decay_mult: 0.0
 416 |   }
 417 |   convolution_param {
 418 |     num_output: 512
 419 |     kernel_size: 1
 420 |     weight_filler {
 421 |       type: "msra"
 422 |     }
 423 |     bias_filler {
 424 |       type: "constant"
 425 |       value: 0.0
 426 |     }
 427 |   }
 428 | }
 429 | layer {
 430 |   name: "conv6/relu"
 431 |   type: "ReLU"
 432 |   bottom: "conv6"
 433 |   top: "conv6"
 434 | }
 435 | layer {
 436 |   name: "conv7/dw"
 437 |   type: "Convolution"
 438 |   bottom: "conv6"
 439 |   top: "conv7/dw"
 440 |   param {
 441 |     lr_mult: 1.0
 442 |     decay_mult: 1.0
 443 |   }
 444 |   param {
 445 |     lr_mult: 2.0
 446 |     decay_mult: 0.0
 447 |   }
 448 |   convolution_param {
 449 |     num_output: 512
 450 |     pad: 1
 451 |     kernel_size: 3
 452 |     group: 512
 453 |     engine: CAFFE
 454 |     weight_filler {
 455 |       type: "msra"
 456 |     }
 457 |     bias_filler {
 458 |       type: "constant"
 459 |       value: 0.0
 460 |     }
 461 |   }
 462 | }
 463 | layer {
 464 |   name: "conv7/dw/relu"
 465 |   type: "ReLU"
 466 |   bottom: "conv7/dw"
 467 |   top: "conv7/dw"
 468 | }
 469 | layer {
 470 |   name: "conv7"
 471 |   type: "Convolution"
 472 |   bottom: "conv7/dw"
 473 |   top: "conv7"
 474 |   param {
 475 |     lr_mult: 1.0
 476 |     decay_mult: 1.0
 477 |   }
 478 |   param {
 479 |     lr_mult: 2.0
 480 |     decay_mult: 0.0
 481 |   }
 482 |   convolution_param {
 483 |     num_output: 512
 484 |     kernel_size: 1
 485 |     weight_filler {
 486 |       type: "msra"
 487 |     }
 488 |     bias_filler {
 489 |       type: "constant"
 490 |       value: 0.0
 491 |     }
 492 |   }
 493 | }
 494 | layer {
 495 |   name: "conv7/relu"
 496 |   type: "ReLU"
 497 |   bottom: "conv7"
 498 |   top: "conv7"
 499 | }
 500 | layer {
 501 |   name: "conv8/dw"
 502 |   type: "Convolution"
 503 |   bottom: "conv7"
 504 |   top: "conv8/dw"
 505 |   param {
 506 |     lr_mult: 1.0
 507 |     decay_mult: 1.0
 508 |   }
 509 |   param {
 510 |     lr_mult: 2.0
 511 |     decay_mult: 0.0
 512 |   }
 513 |   convolution_param {
 514 |     num_output: 512
 515 |     pad: 1
 516 |     kernel_size: 3
 517 |     group: 512
 518 |     engine: CAFFE
 519 |     weight_filler {
 520 |       type: "msra"
 521 |     }
 522 |     bias_filler {
 523 |       type: "constant"
 524 |       value: 0.0
 525 |     }
 526 |   }
 527 | }
 528 | layer {
 529 |   name: "conv8/dw/relu"
 530 |   type: "ReLU"
 531 |   bottom: "conv8/dw"
 532 |   top: "conv8/dw"
 533 | }
 534 | layer {
 535 |   name: "conv8"
 536 |   type: "Convolution"
 537 |   bottom: "conv8/dw"
 538 |   top: "conv8"
 539 |   param {
 540 |     lr_mult: 1.0
 541 |     decay_mult: 1.0
 542 |   }
 543 |   param {
 544 |     lr_mult: 2.0
 545 |     decay_mult: 0.0
 546 |   }
 547 |   convolution_param {
 548 |     num_output: 512
 549 |     kernel_size: 1
 550 |     weight_filler {
 551 |       type: "msra"
 552 |     }
 553 |     bias_filler {
 554 |       type: "constant"
 555 |       value: 0.0
 556 |     }
 557 |   }
 558 | }
 559 | layer {
 560 |   name: "conv8/relu"
 561 |   type: "ReLU"
 562 |   bottom: "conv8"
 563 |   top: "conv8"
 564 | }
 565 | layer {
 566 |   name: "conv9/dw"
 567 |   type: "Convolution"
 568 |   bottom: "conv8"
 569 |   top: "conv9/dw"
 570 |   param {
 571 |     lr_mult: 1.0
 572 |     decay_mult: 1.0
 573 |   }
 574 |   param {
 575 |     lr_mult: 2.0
 576 |     decay_mult: 0.0
 577 |   }
 578 |   convolution_param {
 579 |     num_output: 512
 580 |     pad: 1
 581 |     kernel_size: 3
 582 |     group: 512
 583 |     engine: CAFFE
 584 |     weight_filler {
 585 |       type: "msra"
 586 |     }
 587 |     bias_filler {
 588 |       type: "constant"
 589 |       value: 0.0
 590 |     }
 591 |   }
 592 | }
 593 | layer {
 594 |   name: "conv9/dw/relu"
 595 |   type: "ReLU"
 596 |   bottom: "conv9/dw"
 597 |   top: "conv9/dw"
 598 | }
 599 | layer {
 600 |   name: "conv9"
 601 |   type: "Convolution"
 602 |   bottom: "conv9/dw"
 603 |   top: "conv9"
 604 |   param {
 605 |     lr_mult: 1.0
 606 |     decay_mult: 1.0
 607 |   }
 608 |   param {
 609 |     lr_mult: 2.0
 610 |     decay_mult: 0.0
 611 |   }
 612 |   convolution_param {
 613 |     num_output: 512
 614 |     kernel_size: 1
 615 |     weight_filler {
 616 |       type: "msra"
 617 |     }
 618 |     bias_filler {
 619 |       type: "constant"
 620 |       value: 0.0
 621 |     }
 622 |   }
 623 | }
 624 | layer {
 625 |   name: "conv9/relu"
 626 |   type: "ReLU"
 627 |   bottom: "conv9"
 628 |   top: "conv9"
 629 | }
 630 | layer {
 631 |   name: "conv10/dw"
 632 |   type: "Convolution"
 633 |   bottom: "conv9"
 634 |   top: "conv10/dw"
 635 |   param {
 636 |     lr_mult: 1.0
 637 |     decay_mult: 1.0
 638 |   }
 639 |   param {
 640 |     lr_mult: 2.0
 641 |     decay_mult: 0.0
 642 |   }
 643 |   convolution_param {
 644 |     num_output: 512
 645 |     pad: 1
 646 |     kernel_size: 3
 647 |     group: 512
 648 |     engine: CAFFE
 649 |     weight_filler {
 650 |       type: "msra"
 651 |     }
 652 |     bias_filler {
 653 |       type: "constant"
 654 |       value: 0.0
 655 |     }
 656 |   }
 657 | }
 658 | layer {
 659 |   name: "conv10/dw/relu"
 660 |   type: "ReLU"
 661 |   bottom: "conv10/dw"
 662 |   top: "conv10/dw"
 663 | }
 664 | layer {
 665 |   name: "conv10"
 666 |   type: "Convolution"
 667 |   bottom: "conv10/dw"
 668 |   top: "conv10"
 669 |   param {
 670 |     lr_mult: 1.0
 671 |     decay_mult: 1.0
 672 |   }
 673 |   param {
 674 |     lr_mult: 2.0
 675 |     decay_mult: 0.0
 676 |   }
 677 |   convolution_param {
 678 |     num_output: 512
 679 |     kernel_size: 1
 680 |     weight_filler {
 681 |       type: "msra"
 682 |     }
 683 |     bias_filler {
 684 |       type: "constant"
 685 |       value: 0.0
 686 |     }
 687 |   }
 688 | }
 689 | layer {
 690 |   name: "conv10/relu"
 691 |   type: "ReLU"
 692 |   bottom: "conv10"
 693 |   top: "conv10"
 694 | }
 695 | layer {
 696 |   name: "conv11/dw"
 697 |   type: "Convolution"
 698 |   bottom: "conv10"
 699 |   top: "conv11/dw"
 700 |   param {
 701 |     lr_mult: 1.0
 702 |     decay_mult: 1.0
 703 |   }
 704 |   param {
 705 |     lr_mult: 2.0
 706 |     decay_mult: 0.0
 707 |   }
 708 |   convolution_param {
 709 |     num_output: 512
 710 |     pad: 1
 711 |     kernel_size: 3
 712 |     group: 512
 713 |     engine: CAFFE
 714 |     weight_filler {
 715 |       type: "msra"
 716 |     }
 717 |     bias_filler {
 718 |       type: "constant"
 719 |       value: 0.0
 720 |     }
 721 |   }
 722 | }
 723 | layer {
 724 |   name: "conv11/dw/relu"
 725 |   type: "ReLU"
 726 |   bottom: "conv11/dw"
 727 |   top: "conv11/dw"
 728 | }
 729 | layer {
 730 |   name: "conv11"
 731 |   type: "Convolution"
 732 |   bottom: "conv11/dw"
 733 |   top: "conv11"
 734 |   param {
 735 |     lr_mult: 1.0
 736 |     decay_mult: 1.0
 737 |   }
 738 |   param {
 739 |     lr_mult: 2.0
 740 |     decay_mult: 0.0
 741 |   }
 742 |   convolution_param {
 743 |     num_output: 512
 744 |     kernel_size: 1
 745 |     weight_filler {
 746 |       type: "msra"
 747 |     }
 748 |     bias_filler {
 749 |       type: "constant"
 750 |       value: 0.0
 751 |     }
 752 |   }
 753 | }
 754 | layer {
 755 |   name: "conv11/relu"
 756 |   type: "ReLU"
 757 |   bottom: "conv11"
 758 |   top: "conv11"
 759 | }
 760 | layer {
 761 |   name: "conv12/dw"
 762 |   type: "Convolution"
 763 |   bottom: "conv11"
 764 |   top: "conv12/dw"
 765 |   param {
 766 |     lr_mult: 1.0
 767 |     decay_mult: 1.0
 768 |   }
 769 |   param {
 770 |     lr_mult: 2.0
 771 |     decay_mult: 0.0
 772 |   }
 773 |   convolution_param {
 774 |     num_output: 512
 775 |     pad: 1
 776 |     kernel_size: 3
 777 |     stride: 2
 778 |     group: 512
 779 |     engine: CAFFE
 780 |     weight_filler {
 781 |       type: "msra"
 782 |     }
 783 |     bias_filler {
 784 |       type: "constant"
 785 |       value: 0.0
 786 |     }
 787 |   }
 788 | }
 789 | layer {
 790 |   name: "conv12/dw/relu"
 791 |   type: "ReLU"
 792 |   bottom: "conv12/dw"
 793 |   top: "conv12/dw"
 794 | }
 795 | layer {
 796 |   name: "conv12"
 797 |   type: "Convolution"
 798 |   bottom: "conv12/dw"
 799 |   top: "conv12"
 800 |   param {
 801 |     lr_mult: 1.0
 802 |     decay_mult: 1.0
 803 |   }
 804 |   param {
 805 |     lr_mult: 2.0
 806 |     decay_mult: 0.0
 807 |   }
 808 |   convolution_param {
 809 |     num_output: 1024
 810 |     kernel_size: 1
 811 |     weight_filler {
 812 |       type: "msra"
 813 |     }
 814 |     bias_filler {
 815 |       type: "constant"
 816 |       value: 0.0
 817 |     }
 818 |   }
 819 | }
 820 | layer {
 821 |   name: "conv12/relu"
 822 |   type: "ReLU"
 823 |   bottom: "conv12"
 824 |   top: "conv12"
 825 | }
 826 | layer {
 827 |   name: "conv13/dw"
 828 |   type: "Convolution"
 829 |   bottom: "conv12"
 830 |   top: "conv13/dw"
 831 |   param {
 832 |     lr_mult: 1.0
 833 |     decay_mult: 1.0
 834 |   }
 835 |   param {
 836 |     lr_mult: 2.0
 837 |     decay_mult: 0.0
 838 |   }
 839 |   convolution_param {
 840 |     num_output: 1024
 841 |     pad: 1
 842 |     kernel_size: 3
 843 |     group: 1024
 844 |     engine: CAFFE
 845 |     weight_filler {
 846 |       type: "msra"
 847 |     }
 848 |     bias_filler {
 849 |       type: "constant"
 850 |       value: 0.0
 851 |     }
 852 |   }
 853 | }
 854 | layer {
 855 |   name: "conv13/dw/relu"
 856 |   type: "ReLU"
 857 |   bottom: "conv13/dw"
 858 |   top: "conv13/dw"
 859 | }
 860 | layer {
 861 |   name: "conv13"
 862 |   type: "Convolution"
 863 |   bottom: "conv13/dw"
 864 |   top: "conv13"
 865 |   param {
 866 |     lr_mult: 1.0
 867 |     decay_mult: 1.0
 868 |   }
 869 |   param {
 870 |     lr_mult: 2.0
 871 |     decay_mult: 0.0
 872 |   }
 873 |   convolution_param {
 874 |     num_output: 1024
 875 |     kernel_size: 1
 876 |     weight_filler {
 877 |       type: "msra"
 878 |     }
 879 |     bias_filler {
 880 |       type: "constant"
 881 |       value: 0.0
 882 |     }
 883 |   }
 884 | }
 885 | layer {
 886 |   name: "conv13/relu"
 887 |   type: "ReLU"
 888 |   bottom: "conv13"
 889 |   top: "conv13"
 890 | }
 891 | layer {
 892 |   name: "conv14_1"
 893 |   type: "Convolution"
 894 |   bottom: "conv13"
 895 |   top: "conv14_1"
 896 |   param {
 897 |     lr_mult: 1.0
 898 |     decay_mult: 1.0
 899 |   }
 900 |   param {
 901 |     lr_mult: 2.0
 902 |     decay_mult: 0.0
 903 |   }
 904 |   convolution_param {
 905 |     num_output: 256
 906 |     kernel_size: 1
 907 |     weight_filler {
 908 |       type: "msra"
 909 |     }
 910 |     bias_filler {
 911 |       type: "constant"
 912 |       value: 0.0
 913 |     }
 914 |   }
 915 | }
 916 | layer {
 917 |   name: "conv14_1/relu"
 918 |   type: "ReLU"
 919 |   bottom: "conv14_1"
 920 |   top: "conv14_1"
 921 | }
 922 | layer {
 923 |   name: "conv14_2"
 924 |   type: "Convolution"
 925 |   bottom: "conv14_1"
 926 |   top: "conv14_2"
 927 |   param {
 928 |     lr_mult: 1.0
 929 |     decay_mult: 1.0
 930 |   }
 931 |   param {
 932 |     lr_mult: 2.0
 933 |     decay_mult: 0.0
 934 |   }
 935 |   convolution_param {
 936 |     num_output: 512
 937 |     pad: 1
 938 |     kernel_size: 3
 939 |     stride: 2
 940 |     weight_filler {
 941 |       type: "msra"
 942 |     }
 943 |     bias_filler {
 944 |       type: "constant"
 945 |       value: 0.0
 946 |     }
 947 |   }
 948 | }
 949 | layer {
 950 |   name: "conv14_2/relu"
 951 |   type: "ReLU"
 952 |   bottom: "conv14_2"
 953 |   top: "conv14_2"
 954 | }
 955 | layer {
 956 |   name: "conv15_1"
 957 |   type: "Convolution"
 958 |   bottom: "conv14_2"
 959 |   top: "conv15_1"
 960 |   param {
 961 |     lr_mult: 1.0
 962 |     decay_mult: 1.0
 963 |   }
 964 |   param {
 965 |     lr_mult: 2.0
 966 |     decay_mult: 0.0
 967 |   }
 968 |   convolution_param {
 969 |     num_output: 128
 970 |     kernel_size: 1
 971 |     weight_filler {
 972 |       type: "msra"
 973 |     }
 974 |     bias_filler {
 975 |       type: "constant"
 976 |       value: 0.0
 977 |     }
 978 |   }
 979 | }
 980 | layer {
 981 |   name: "conv15_1/relu"
 982 |   type: "ReLU"
 983 |   bottom: "conv15_1"
 984 |   top: "conv15_1"
 985 | }
 986 | layer {
 987 |   name: "conv15_2"
 988 |   type: "Convolution"
 989 |   bottom: "conv15_1"
 990 |   top: "conv15_2"
 991 |   param {
 992 |     lr_mult: 1.0
 993 |     decay_mult: 1.0
 994 |   }
 995 |   param {
 996 |     lr_mult: 2.0
 997 |     decay_mult: 0.0
 998 |   }
 999 |   convolution_param {
1000 |     num_output: 256
1001 |     pad: 1
1002 |     kernel_size: 3
1003 |     stride: 2
1004 |     weight_filler {
1005 |       type: "msra"
1006 |     }
1007 |     bias_filler {
1008 |       type: "constant"
1009 |       value: 0.0
1010 |     }
1011 |   }
1012 | }
1013 | layer {
1014 |   name: "conv15_2/relu"
1015 |   type: "ReLU"
1016 |   bottom: "conv15_2"
1017 |   top: "conv15_2"
1018 | }
1019 | layer {
1020 |   name: "conv16_1"
1021 |   type: "Convolution"
1022 |   bottom: "conv15_2"
1023 |   top: "conv16_1"
1024 |   param {
1025 |     lr_mult: 1.0
1026 |     decay_mult: 1.0
1027 |   }
1028 |   param {
1029 |     lr_mult: 2.0
1030 |     decay_mult: 0.0
1031 |   }
1032 |   convolution_param {
1033 |     num_output: 128
1034 |     kernel_size: 1
1035 |     weight_filler {
1036 |       type: "msra"
1037 |     }
1038 |     bias_filler {
1039 |       type: "constant"
1040 |       value: 0.0
1041 |     }
1042 |   }
1043 | }
1044 | layer {
1045 |   name: "conv16_1/relu"
1046 |   type: "ReLU"
1047 |   bottom: "conv16_1"
1048 |   top: "conv16_1"
1049 | }
1050 | layer {
1051 |   name: "conv16_2"
1052 |   type: "Convolution"
1053 |   bottom: "conv16_1"
1054 |   top: "conv16_2"
1055 |   param {
1056 |     lr_mult: 1.0
1057 |     decay_mult: 1.0
1058 |   }
1059 |   param {
1060 |     lr_mult: 2.0
1061 |     decay_mult: 0.0
1062 |   }
1063 |   convolution_param {
1064 |     num_output: 256
1065 |     pad: 1
1066 |     kernel_size: 3
1067 |     stride: 2
1068 |     weight_filler {
1069 |       type: "msra"
1070 |     }
1071 |     bias_filler {
1072 |       type: "constant"
1073 |       value: 0.0
1074 |     }
1075 |   }
1076 | }
1077 | layer {
1078 |   name: "conv16_2/relu"
1079 |   type: "ReLU"
1080 |   bottom: "conv16_2"
1081 |   top: "conv16_2"
1082 | }
1083 | layer {
1084 |   name: "conv17_1"
1085 |   type: "Convolution"
1086 |   bottom: "conv16_2"
1087 |   top: "conv17_1"
1088 |   param {
1089 |     lr_mult: 1.0
1090 |     decay_mult: 1.0
1091 |   }
1092 |   param {
1093 |     lr_mult: 2.0
1094 |     decay_mult: 0.0
1095 |   }
1096 |   convolution_param {
1097 |     num_output: 64
1098 |     kernel_size: 1
1099 |     weight_filler {
1100 |       type: "msra"
1101 |     }
1102 |     bias_filler {
1103 |       type: "constant"
1104 |       value: 0.0
1105 |     }
1106 |   }
1107 | }
1108 | layer {
1109 |   name: "conv17_1/relu"
1110 |   type: "ReLU"
1111 |   bottom: "conv17_1"
1112 |   top: "conv17_1"
1113 | }
1114 | layer {
1115 |   name: "conv17_2"
1116 |   type: "Convolution"
1117 |   bottom: "conv17_1"
1118 |   top: "conv17_2"
1119 |   param {
1120 |     lr_mult: 1.0
1121 |     decay_mult: 1.0
1122 |   }
1123 |   param {
1124 |     lr_mult: 2.0
1125 |     decay_mult: 0.0
1126 |   }
1127 |   convolution_param {
1128 |     num_output: 128
1129 |     pad: 1
1130 |     kernel_size: 3
1131 |     stride: 2
1132 |     weight_filler {
1133 |       type: "msra"
1134 |     }
1135 |     bias_filler {
1136 |       type: "constant"
1137 |       value: 0.0
1138 |     }
1139 |   }
1140 | }
1141 | layer {
1142 |   name: "conv17_2/relu"
1143 |   type: "ReLU"
1144 |   bottom: "conv17_2"
1145 |   top: "conv17_2"
1146 | }
1147 | layer {
1148 |   name: "conv11_mbox_loc"
1149 |   type: "Convolution"
1150 |   bottom: "conv11"
1151 |   top: "conv11_mbox_loc"
1152 |   param {
1153 |     lr_mult: 1.0
1154 |     decay_mult: 1.0
1155 |   }
1156 |   param {
1157 |     lr_mult: 2.0
1158 |     decay_mult: 0.0
1159 |   }
1160 |   convolution_param {
1161 |     num_output: 12
1162 |     kernel_size: 1
1163 |     weight_filler {
1164 |       type: "msra"
1165 |     }
1166 |     bias_filler {
1167 |       type: "constant"
1168 |       value: 0.0
1169 |     }
1170 |   }
1171 | }
1172 | layer {
1173 |   name: "conv11_mbox_loc_perm"
1174 |   type: "Permute"
1175 |   bottom: "conv11_mbox_loc"
1176 |   top: "conv11_mbox_loc_perm"
1177 |   permute_param {
1178 |     order: 0
1179 |     order: 2
1180 |     order: 3
1181 |     order: 1
1182 |   }
1183 | }
1184 | layer {
1185 |   name: "conv11_mbox_loc_flat"
1186 |   type: "Flatten"
1187 |   bottom: "conv11_mbox_loc_perm"
1188 |   top: "conv11_mbox_loc_flat"
1189 |   flatten_param {
1190 |     axis: 1
1191 |   }
1192 | }
1193 | layer {
1194 |   name: "conv11_mbox_conf"
1195 |   type: "Convolution"
1196 |   bottom: "conv11"
1197 |   top: "conv11_mbox_conf"
1198 |   param {
1199 |     lr_mult: 1.0
1200 |     decay_mult: 1.0
1201 |   }
1202 |   param {
1203 |     lr_mult: 2.0
1204 |     decay_mult: 0.0
1205 |   }
1206 |   convolution_param {
1207 |     num_output: 63
1208 |     kernel_size: 1
1209 |     weight_filler {
1210 |       type: "msra"
1211 |     }
1212 |     bias_filler {
1213 |       type: "constant"
1214 |       value: 0.0
1215 |     }
1216 |   }
1217 | }
1218 | layer {
1219 |   name: "conv11_mbox_conf_perm"
1220 |   type: "Permute"
1221 |   bottom: "conv11_mbox_conf"
1222 |   top: "conv11_mbox_conf_perm"
1223 |   permute_param {
1224 |     order: 0
1225 |     order: 2
1226 |     order: 3
1227 |     order: 1
1228 |   }
1229 | }
1230 | layer {
1231 |   name: "conv11_mbox_conf_flat"
1232 |   type: "Flatten"
1233 |   bottom: "conv11_mbox_conf_perm"
1234 |   top: "conv11_mbox_conf_flat"
1235 |   flatten_param {
1236 |     axis: 1
1237 |   }
1238 | }
1239 | layer {
1240 |   name: "conv11_mbox_priorbox"
1241 |   type: "PriorBox"
1242 |   bottom: "conv11"
1243 |   bottom: "data"
1244 |   top: "conv11_mbox_priorbox"
1245 |   prior_box_param {
1246 |     min_size: 60.0
1247 |     aspect_ratio: 2.0
1248 |     flip: true
1249 |     clip: false
1250 |     variance: 0.1
1251 |     variance: 0.1
1252 |     variance: 0.2
1253 |     variance: 0.2
1254 |     offset: 0.5
1255 |   }
1256 | }
1257 | layer {
1258 |   name: "conv13_mbox_loc"
1259 |   type: "Convolution"
1260 |   bottom: "conv13"
1261 |   top: "conv13_mbox_loc"
1262 |   param {
1263 |     lr_mult: 1.0
1264 |     decay_mult: 1.0
1265 |   }
1266 |   param {
1267 |     lr_mult: 2.0
1268 |     decay_mult: 0.0
1269 |   }
1270 |   convolution_param {
1271 |     num_output: 24
1272 |     kernel_size: 1
1273 |     weight_filler {
1274 |       type: "msra"
1275 |     }
1276 |     bias_filler {
1277 |       type: "constant"
1278 |       value: 0.0
1279 |     }
1280 |   }
1281 | }
1282 | layer {
1283 |   name: "conv13_mbox_loc_perm"
1284 |   type: "Permute"
1285 |   bottom: "conv13_mbox_loc"
1286 |   top: "conv13_mbox_loc_perm"
1287 |   permute_param {
1288 |     order: 0
1289 |     order: 2
1290 |     order: 3
1291 |     order: 1
1292 |   }
1293 | }
1294 | layer {
1295 |   name: "conv13_mbox_loc_flat"
1296 |   type: "Flatten"
1297 |   bottom: "conv13_mbox_loc_perm"
1298 |   top: "conv13_mbox_loc_flat"
1299 |   flatten_param {
1300 |     axis: 1
1301 |   }
1302 | }
1303 | layer {
1304 |   name: "conv13_mbox_conf"
1305 |   type: "Convolution"
1306 |   bottom: "conv13"
1307 |   top: "conv13_mbox_conf"
1308 |   param {
1309 |     lr_mult: 1.0
1310 |     decay_mult: 1.0
1311 |   }
1312 |   param {
1313 |     lr_mult: 2.0
1314 |     decay_mult: 0.0
1315 |   }
1316 |   convolution_param {
1317 |     num_output: 126
1318 |     kernel_size: 1
1319 |     weight_filler {
1320 |       type: "msra"
1321 |     }
1322 |     bias_filler {
1323 |       type: "constant"
1324 |       value: 0.0
1325 |     }
1326 |   }
1327 | }
1328 | layer {
1329 |   name: "conv13_mbox_conf_perm"
1330 |   type: "Permute"
1331 |   bottom: "conv13_mbox_conf"
1332 |   top: "conv13_mbox_conf_perm"
1333 |   permute_param {
1334 |     order: 0
1335 |     order: 2
1336 |     order: 3
1337 |     order: 1
1338 |   }
1339 | }
1340 | layer {
1341 |   name: "conv13_mbox_conf_flat"
1342 |   type: "Flatten"
1343 |   bottom: "conv13_mbox_conf_perm"
1344 |   top: "conv13_mbox_conf_flat"
1345 |   flatten_param {
1346 |     axis: 1
1347 |   }
1348 | }
1349 | layer {
1350 |   name: "conv13_mbox_priorbox"
1351 |   type: "PriorBox"
1352 |   bottom: "conv13"
1353 |   bottom: "data"
1354 |   top: "conv13_mbox_priorbox"
1355 |   prior_box_param {
1356 |     min_size: 105.0
1357 |     max_size: 150.0
1358 |     aspect_ratio: 2.0
1359 |     aspect_ratio: 3.0
1360 |     flip: true
1361 |     clip: false
1362 |     variance: 0.1
1363 |     variance: 0.1
1364 |     variance: 0.2
1365 |     variance: 0.2
1366 |     offset: 0.5
1367 |   }
1368 | }
1369 | layer {
1370 |   name: "conv14_2_mbox_loc"
1371 |   type: "Convolution"
1372 |   bottom: "conv14_2"
1373 |   top: "conv14_2_mbox_loc"
1374 |   param {
1375 |     lr_mult: 1.0
1376 |     decay_mult: 1.0
1377 |   }
1378 |   param {
1379 |     lr_mult: 2.0
1380 |     decay_mult: 0.0
1381 |   }
1382 |   convolution_param {
1383 |     num_output: 24
1384 |     kernel_size: 1
1385 |     weight_filler {
1386 |       type: "msra"
1387 |     }
1388 |     bias_filler {
1389 |       type: "constant"
1390 |       value: 0.0
1391 |     }
1392 |   }
1393 | }
1394 | layer {
1395 |   name: "conv14_2_mbox_loc_perm"
1396 |   type: "Permute"
1397 |   bottom: "conv14_2_mbox_loc"
1398 |   top: "conv14_2_mbox_loc_perm"
1399 |   permute_param {
1400 |     order: 0
1401 |     order: 2
1402 |     order: 3
1403 |     order: 1
1404 |   }
1405 | }
1406 | layer {
1407 |   name: "conv14_2_mbox_loc_flat"
1408 |   type: "Flatten"
1409 |   bottom: "conv14_2_mbox_loc_perm"
1410 |   top: "conv14_2_mbox_loc_flat"
1411 |   flatten_param {
1412 |     axis: 1
1413 |   }
1414 | }
1415 | layer {
1416 |   name: "conv14_2_mbox_conf"
1417 |   type: "Convolution"
1418 |   bottom: "conv14_2"
1419 |   top: "conv14_2_mbox_conf"
1420 |   param {
1421 |     lr_mult: 1.0
1422 |     decay_mult: 1.0
1423 |   }
1424 |   param {
1425 |     lr_mult: 2.0
1426 |     decay_mult: 0.0
1427 |   }
1428 |   convolution_param {
1429 |     num_output: 126
1430 |     kernel_size: 1
1431 |     weight_filler {
1432 |       type: "msra"
1433 |     }
1434 |     bias_filler {
1435 |       type: "constant"
1436 |       value: 0.0
1437 |     }
1438 |   }
1439 | }
1440 | layer {
1441 |   name: "conv14_2_mbox_conf_perm"
1442 |   type: "Permute"
1443 |   bottom: "conv14_2_mbox_conf"
1444 |   top: "conv14_2_mbox_conf_perm"
1445 |   permute_param {
1446 |     order: 0
1447 |     order: 2
1448 |     order: 3
1449 |     order: 1
1450 |   }
1451 | }
1452 | layer {
1453 |   name: "conv14_2_mbox_conf_flat"
1454 |   type: "Flatten"
1455 |   bottom: "conv14_2_mbox_conf_perm"
1456 |   top: "conv14_2_mbox_conf_flat"
1457 |   flatten_param {
1458 |     axis: 1
1459 |   }
1460 | }
1461 | layer {
1462 |   name: "conv14_2_mbox_priorbox"
1463 |   type: "PriorBox"
1464 |   bottom: "conv14_2"
1465 |   bottom: "data"
1466 |   top: "conv14_2_mbox_priorbox"
1467 |   prior_box_param {
1468 |     min_size: 150.0
1469 |     max_size: 195.0
1470 |     aspect_ratio: 2.0
1471 |     aspect_ratio: 3.0
1472 |     flip: true
1473 |     clip: false
1474 |     variance: 0.1
1475 |     variance: 0.1
1476 |     variance: 0.2
1477 |     variance: 0.2
1478 |     offset: 0.5
1479 |   }
1480 | }
1481 | layer {
1482 |   name: "conv15_2_mbox_loc"
1483 |   type: "Convolution"
1484 |   bottom: "conv15_2"
1485 |   top: "conv15_2_mbox_loc"
1486 |   param {
1487 |     lr_mult: 1.0
1488 |     decay_mult: 1.0
1489 |   }
1490 |   param {
1491 |     lr_mult: 2.0
1492 |     decay_mult: 0.0
1493 |   }
1494 |   convolution_param {
1495 |     num_output: 24
1496 |     kernel_size: 1
1497 |     weight_filler {
1498 |       type: "msra"
1499 |     }
1500 |     bias_filler {
1501 |       type: "constant"
1502 |       value: 0.0
1503 |     }
1504 |   }
1505 | }
1506 | layer {
1507 |   name: "conv15_2_mbox_loc_perm"
1508 |   type: "Permute"
1509 |   bottom: "conv15_2_mbox_loc"
1510 |   top: "conv15_2_mbox_loc_perm"
1511 |   permute_param {
1512 |     order: 0
1513 |     order: 2
1514 |     order: 3
1515 |     order: 1
1516 |   }
1517 | }
1518 | layer {
1519 |   name: "conv15_2_mbox_loc_flat"
1520 |   type: "Flatten"
1521 |   bottom: "conv15_2_mbox_loc_perm"
1522 |   top: "conv15_2_mbox_loc_flat"
1523 |   flatten_param {
1524 |     axis: 1
1525 |   }
1526 | }
1527 | layer {
1528 |   name: "conv15_2_mbox_conf"
1529 |   type: "Convolution"
1530 |   bottom: "conv15_2"
1531 |   top: "conv15_2_mbox_conf"
1532 |   param {
1533 |     lr_mult: 1.0
1534 |     decay_mult: 1.0
1535 |   }
1536 |   param {
1537 |     lr_mult: 2.0
1538 |     decay_mult: 0.0
1539 |   }
1540 |   convolution_param {
1541 |     num_output: 126
1542 |     kernel_size: 1
1543 |     weight_filler {
1544 |       type: "msra"
1545 |     }
1546 |     bias_filler {
1547 |       type: "constant"
1548 |       value: 0.0
1549 |     }
1550 |   }
1551 | }
1552 | layer {
1553 |   name: "conv15_2_mbox_conf_perm"
1554 |   type: "Permute"
1555 |   bottom: "conv15_2_mbox_conf"
1556 |   top: "conv15_2_mbox_conf_perm"
1557 |   permute_param {
1558 |     order: 0
1559 |     order: 2
1560 |     order: 3
1561 |     order: 1
1562 |   }
1563 | }
1564 | layer {
1565 |   name: "conv15_2_mbox_conf_flat"
1566 |   type: "Flatten"
1567 |   bottom: "conv15_2_mbox_conf_perm"
1568 |   top: "conv15_2_mbox_conf_flat"
1569 |   flatten_param {
1570 |     axis: 1
1571 |   }
1572 | }
1573 | layer {
1574 |   name: "conv15_2_mbox_priorbox"
1575 |   type: "PriorBox"
1576 |   bottom: "conv15_2"
1577 |   bottom: "data"
1578 |   top: "conv15_2_mbox_priorbox"
1579 |   prior_box_param {
1580 |     min_size: 195.0
1581 |     max_size: 240.0
1582 |     aspect_ratio: 2.0
1583 |     aspect_ratio: 3.0
1584 |     flip: true
1585 |     clip: false
1586 |     variance: 0.1
1587 |     variance: 0.1
1588 |     variance: 0.2
1589 |     variance: 0.2
1590 |     offset: 0.5
1591 |   }
1592 | }
1593 | layer {
1594 |   name: "conv16_2_mbox_loc"
1595 |   type: "Convolution"
1596 |   bottom: "conv16_2"
1597 |   top: "conv16_2_mbox_loc"
1598 |   param {
1599 |     lr_mult: 1.0
1600 |     decay_mult: 1.0
1601 |   }
1602 |   param {
1603 |     lr_mult: 2.0
1604 |     decay_mult: 0.0
1605 |   }
1606 |   convolution_param {
1607 |     num_output: 24
1608 |     kernel_size: 1
1609 |     weight_filler {
1610 |       type: "msra"
1611 |     }
1612 |     bias_filler {
1613 |       type: "constant"
1614 |       value: 0.0
1615 |     }
1616 |   }
1617 | }
1618 | layer {
1619 |   name: "conv16_2_mbox_loc_perm"
1620 |   type: "Permute"
1621 |   bottom: "conv16_2_mbox_loc"
1622 |   top: "conv16_2_mbox_loc_perm"
1623 |   permute_param {
1624 |     order: 0
1625 |     order: 2
1626 |     order: 3
1627 |     order: 1
1628 |   }
1629 | }
1630 | layer {
1631 |   name: "conv16_2_mbox_loc_flat"
1632 |   type: "Flatten"
1633 |   bottom: "conv16_2_mbox_loc_perm"
1634 |   top: "conv16_2_mbox_loc_flat"
1635 |   flatten_param {
1636 |     axis: 1
1637 |   }
1638 | }
1639 | layer {
1640 |   name: "conv16_2_mbox_conf"
1641 |   type: "Convolution"
1642 |   bottom: "conv16_2"
1643 |   top: "conv16_2_mbox_conf"
1644 |   param {
1645 |     lr_mult: 1.0
1646 |     decay_mult: 1.0
1647 |   }
1648 |   param {
1649 |     lr_mult: 2.0
1650 |     decay_mult: 0.0
1651 |   }
1652 |   convolution_param {
1653 |     num_output: 126
1654 |     kernel_size: 1
1655 |     weight_filler {
1656 |       type: "msra"
1657 |     }
1658 |     bias_filler {
1659 |       type: "constant"
1660 |       value: 0.0
1661 |     }
1662 |   }
1663 | }
1664 | layer {
1665 |   name: "conv16_2_mbox_conf_perm"
1666 |   type: "Permute"
1667 |   bottom: "conv16_2_mbox_conf"
1668 |   top: "conv16_2_mbox_conf_perm"
1669 |   permute_param {
1670 |     order: 0
1671 |     order: 2
1672 |     order: 3
1673 |     order: 1
1674 |   }
1675 | }
1676 | layer {
1677 |   name: "conv16_2_mbox_conf_flat"
1678 |   type: "Flatten"
1679 |   bottom: "conv16_2_mbox_conf_perm"
1680 |   top: "conv16_2_mbox_conf_flat"
1681 |   flatten_param {
1682 |     axis: 1
1683 |   }
1684 | }
1685 | layer {
1686 |   name: "conv16_2_mbox_priorbox"
1687 |   type: "PriorBox"
1688 |   bottom: "conv16_2"
1689 |   bottom: "data"
1690 |   top: "conv16_2_mbox_priorbox"
1691 |   prior_box_param {
1692 |     min_size: 240.0
1693 |     max_size: 285.0
1694 |     aspect_ratio: 2.0
1695 |     aspect_ratio: 3.0
1696 |     flip: true
1697 |     clip: false
1698 |     variance: 0.1
1699 |     variance: 0.1
1700 |     variance: 0.2
1701 |     variance: 0.2
1702 |     offset: 0.5
1703 |   }
1704 | }
1705 | layer {
1706 |   name: "conv17_2_mbox_loc"
1707 |   type: "Convolution"
1708 |   bottom: "conv17_2"
1709 |   top: "conv17_2_mbox_loc"
1710 |   param {
1711 |     lr_mult: 1.0
1712 |     decay_mult: 1.0
1713 |   }
1714 |   param {
1715 |     lr_mult: 2.0
1716 |     decay_mult: 0.0
1717 |   }
1718 |   convolution_param {
1719 |     num_output: 24
1720 |     kernel_size: 1
1721 |     weight_filler {
1722 |       type: "msra"
1723 |     }
1724 |     bias_filler {
1725 |       type: "constant"
1726 |       value: 0.0
1727 |     }
1728 |   }
1729 | }
1730 | layer {
1731 |   name: "conv17_2_mbox_loc_perm"
1732 |   type: "Permute"
1733 |   bottom: "conv17_2_mbox_loc"
1734 |   top: "conv17_2_mbox_loc_perm"
1735 |   permute_param {
1736 |     order: 0
1737 |     order: 2
1738 |     order: 3
1739 |     order: 1
1740 |   }
1741 | }
1742 | layer {
1743 |   name: "conv17_2_mbox_loc_flat"
1744 |   type: "Flatten"
1745 |   bottom: "conv17_2_mbox_loc_perm"
1746 |   top: "conv17_2_mbox_loc_flat"
1747 |   flatten_param {
1748 |     axis: 1
1749 |   }
1750 | }
1751 | layer {
1752 |   name: "conv17_2_mbox_conf"
1753 |   type: "Convolution"
1754 |   bottom: "conv17_2"
1755 |   top: "conv17_2_mbox_conf"
1756 |   param {
1757 |     lr_mult: 1.0
1758 |     decay_mult: 1.0
1759 |   }
1760 |   param {
1761 |     lr_mult: 2.0
1762 |     decay_mult: 0.0
1763 |   }
1764 |   convolution_param {
1765 |     num_output: 126
1766 |     kernel_size: 1
1767 |     weight_filler {
1768 |       type: "msra"
1769 |     }
1770 |     bias_filler {
1771 |       type: "constant"
1772 |       value: 0.0
1773 |     }
1774 |   }
1775 | }
1776 | layer {
1777 |   name: "conv17_2_mbox_conf_perm"
1778 |   type: "Permute"
1779 |   bottom: "conv17_2_mbox_conf"
1780 |   top: "conv17_2_mbox_conf_perm"
1781 |   permute_param {
1782 |     order: 0
1783 |     order: 2
1784 |     order: 3
1785 |     order: 1
1786 |   }
1787 | }
1788 | layer {
1789 |   name: "conv17_2_mbox_conf_flat"
1790 |   type: "Flatten"
1791 |   bottom: "conv17_2_mbox_conf_perm"
1792 |   top: "conv17_2_mbox_conf_flat"
1793 |   flatten_param {
1794 |     axis: 1
1795 |   }
1796 | }
1797 | layer {
1798 |   name: "conv17_2_mbox_priorbox"
1799 |   type: "PriorBox"
1800 |   bottom: "conv17_2"
1801 |   bottom: "data"
1802 |   top: "conv17_2_mbox_priorbox"
1803 |   prior_box_param {
1804 |     min_size: 285.0
1805 |     max_size: 300.0
1806 |     aspect_ratio: 2.0
1807 |     aspect_ratio: 3.0
1808 |     flip: true
1809 |     clip: false
1810 |     variance: 0.1
1811 |     variance: 0.1
1812 |     variance: 0.2
1813 |     variance: 0.2
1814 |     offset: 0.5
1815 |   }
1816 | }
1817 | layer {
1818 |   name: "mbox_loc"
1819 |   type: "Concat"
1820 |   bottom: "conv11_mbox_loc_flat"
1821 |   bottom: "conv13_mbox_loc_flat"
1822 |   bottom: "conv14_2_mbox_loc_flat"
1823 |   bottom: "conv15_2_mbox_loc_flat"
1824 |   bottom: "conv16_2_mbox_loc_flat"
1825 |   bottom: "conv17_2_mbox_loc_flat"
1826 |   top: "mbox_loc"
1827 |   concat_param {
1828 |     axis: 1
1829 |   }
1830 | }
1831 | layer {
1832 |   name: "mbox_conf"
1833 |   type: "Concat"
1834 |   bottom: "conv11_mbox_conf_flat"
1835 |   bottom: "conv13_mbox_conf_flat"
1836 |   bottom: "conv14_2_mbox_conf_flat"
1837 |   bottom: "conv15_2_mbox_conf_flat"
1838 |   bottom: "conv16_2_mbox_conf_flat"
1839 |   bottom: "conv17_2_mbox_conf_flat"
1840 |   top: "mbox_conf"
1841 |   concat_param {
1842 |     axis: 1
1843 |   }
1844 | }
1845 | layer {
1846 |   name: "mbox_priorbox"
1847 |   type: "Concat"
1848 |   bottom: "conv11_mbox_priorbox"
1849 |   bottom: "conv13_mbox_priorbox"
1850 |   bottom: "conv14_2_mbox_priorbox"
1851 |   bottom: "conv15_2_mbox_priorbox"
1852 |   bottom: "conv16_2_mbox_priorbox"
1853 |   bottom: "conv17_2_mbox_priorbox"
1854 |   top: "mbox_priorbox"
1855 |   concat_param {
1856 |     axis: 2
1857 |   }
1858 | }
1859 | layer {
1860 |   name: "mbox_conf_reshape"
1861 |   type: "Reshape"
1862 |   bottom: "mbox_conf"
1863 |   top: "mbox_conf_reshape"
1864 |   reshape_param {
1865 |     shape {
1866 |       dim: 0
1867 |       dim: -1
1868 |       dim: 21
1869 |     }
1870 |   }
1871 | }
1872 | layer {
1873 |   name: "mbox_conf_softmax"
1874 |   type: "Softmax"
1875 |   bottom: "mbox_conf_reshape"
1876 |   top: "mbox_conf_softmax"
1877 |   softmax_param {
1878 |     axis: 2
1879 |   }
1880 | }
1881 | layer {
1882 |   name: "mbox_conf_flatten"
1883 |   type: "Flatten"
1884 |   bottom: "mbox_conf_softmax"
1885 |   top: "mbox_conf_flatten"
1886 |   flatten_param {
1887 |     axis: 1
1888 |   }
1889 | }
1890 | layer {
1891 |   name: "detection_out"
1892 |   type: "DetectionOutput"
1893 |   bottom: "mbox_loc"
1894 |   bottom: "mbox_conf_flatten"
1895 |   bottom: "mbox_priorbox"
1896 |   top: "detection_out"
1897 |   include {
1898 |     phase: TEST
1899 |   }
1900 |   detection_output_param {
1901 |     num_classes: 21
1902 |     share_location: true
1903 |     background_label_id: 0
1904 |     nms_param {
1905 |       nms_threshold: 0.45
1906 |       top_k: 100
1907 |     }
1908 |     code_type: CENTER_SIZE
1909 |     keep_top_k: 100
1910 |     confidence_threshold: 0.25
1911 |   }
1912 | }
1913 | 


--------------------------------------------------------------------------------
/msg/DetectedObject.msg:
--------------------------------------------------------------------------------
1 | string class_name
2 | float32 confidence
3 | float32 x_min
4 | float32 x_max
5 | float32 y_min
6 | float32 y_max
7 | 


--------------------------------------------------------------------------------
/msg/DetectedObjectArray.msg:
--------------------------------------------------------------------------------
1 | Header header
2 | DetectedObject[] objects
3 | 


--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package format="2">
 3 |   <name>dnn_detect</name>
 4 |   <version>0.1.0</version>
 5 |   <description>DNN based detection</description>
 6 | 
 7 |   <maintainer email="jimv@mrjim.com">Jim Vaughan</maintainer>
 8 |   <maintainer email="send2arohan@gmail.com">Rohan Agrawal</maintainer>
 9 | 
10 |   <license>BSD</license>
11 | 
12 |   <author email="jimv@mrjim.com">Jim Vaughan</author>
13 | 
14 |   <buildtool_depend>catkin</buildtool_depend>
15 | 
16 |   <depend>roscpp</depend>
17 |   <depend>tf2_geometry_msgs</depend>
18 |   <depend>tf2_ros</depend>
19 |   <depend>tf2</depend>
20 |   <depend>visualization_msgs</depend>
21 |   <depend>image_transport</depend>
22 |   <depend>image_transport_plugins</depend>
23 |   <depend>sensor_msgs</depend>
24 |   <depend>cv_bridge</depend>
25 |   <depend>dynamic_reconfigure</depend>
26 | 
27 | </package>
28 | 


--------------------------------------------------------------------------------
/src/dnn_detect.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2017, Ubiquity Robotics
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  *
  8 |  * 1. Redistributions of source code must retain the above copyright notice,
  9 |  *    this list of conditions and the following disclaimer.
 10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 |  *    this list of conditions and the following disclaimer in the documentation
 12 |  *    and/or other materials provided with the distribution.
 13 |  *
 14 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 18 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 19 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 20 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 22 |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24 |  * POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  * The views and conclusions contained in the software and documentation are
 27 |  * those of the authors and should not be interpreted as representing official
 28 |  * policies, either expressed or implied, of the FreeBSD Project.
 29 |  *
 30 |  */
 31 | 
 32 | #include <assert.h>
 33 | #include <sys/time.h>
 34 | #include <unistd.h>
 35 | 
 36 | #include <ros/ros.h>
 37 | #include <image_transport/image_transport.h>
 38 | #include <cv_bridge/cv_bridge.h>
 39 | 
 40 | #include "dnn_detect/DetectedObject.h"
 41 | #include "dnn_detect/DetectedObjectArray.h"
 42 | #include "dnn_detect/Detect.h"
 43 | 
 44 | #include <opencv2/highgui.hpp>
 45 | #include <opencv2/dnn.hpp>
 46 | #include <opencv2/calib3d.hpp>
 47 | 
 48 | #include <list>
 49 | #include <string>
 50 | #include <boost/algorithm/string.hpp>
 51 | #include <boost/format.hpp>
 52 | 
 53 | #include <thread>
 54 | #include <mutex>
 55 | #include <condition_variable>
 56 | 
 57 | using namespace std;
 58 | using namespace cv;
 59 | 
 60 | std::condition_variable cond;
 61 | std::mutex mutx;
 62 | 
 63 | class DnnNode {
 64 |   private:
 65 |     ros::Publisher results_pub;
 66 | 
 67 |     image_transport::ImageTransport it;
 68 |     image_transport::Subscriber img_sub;
 69 | 
 70 |     // if set, we publish the images that contain objects
 71 |     bool publish_images;
 72 | 
 73 |     int frame_num;
 74 |     float min_confidence;
 75 |     int im_size;
 76 |     int rotate_flag;
 77 |     float scale_factor;
 78 |     float mean_val;
 79 |     std::vector<std::string> class_names;
 80 | 
 81 |     image_transport::Publisher image_pub;
 82 | 
 83 |     cv::dnn::Net net;
 84 |     cv::Mat resized_image;
 85 |     cv::Mat rotated_image;
 86 | 
 87 |     bool single_shot;
 88 |     volatile bool triggered;
 89 |     volatile bool processed;
 90 | 
 91 |     dnn_detect::DetectedObjectArray results;
 92 | 
 93 |     ros::ServiceServer detect_srv;
 94 | 
 95 |     bool trigger_callback(dnn_detect::Detect::Request &req,
 96 |                           dnn_detect::Detect::Response &res);
 97 | 
 98 |     void image_callback(const sensor_msgs::ImageConstPtr &msg);
 99 | 
100 |   public:
101 |     DnnNode(ros::NodeHandle &nh);
102 | };
103 | 
104 | bool DnnNode::  trigger_callback(dnn_detect::Detect::Request &req,
105 |                                  dnn_detect::Detect::Response &res)
106 | {
107 |     ROS_INFO("Got service request");
108 |     triggered = true;
109 | 
110 |     std::unique_lock<std::mutex> lock(mutx);
111 | 
112 |     while (!processed) {
113 |       cond.wait(lock);
114 |     }
115 |     res.result = results;
116 |     processed = false;
117 |     return true;
118 | }
119 | 
120 | 
121 | void DnnNode::image_callback(const sensor_msgs::ImageConstPtr & msg)
122 | {
123 |     if (single_shot && !triggered) {
124 |         return;
125 |     }
126 |     triggered = false;
127 | 
128 |     ROS_INFO("Got image %d", msg->header.seq);
129 |     frame_num++;
130 | 
131 |     cv_bridge::CvImagePtr cv_ptr;
132 | 
133 |     try {
134 |         cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
135 | 
136 |         int w = cv_ptr->image.cols;
137 |         int h = cv_ptr->image.rows;
138 | 
139 |         if (rotate_flag >= 0) {
140 |           cv::rotate(cv_ptr->image, rotated_image, rotate_flag);
141 |           rotated_image.copyTo(cv_ptr->image);
142 |         }
143 | 
144 |         cv::resize(cv_ptr->image, resized_image, cvSize(im_size, im_size));
145 |         cv::Mat blob = cv::dnn::blobFromImage(resized_image, scale_factor,
146 |           cvSize(im_size, im_size), mean_val, false);
147 | 
148 |         net.setInput(blob, "data");
149 |         cv::Mat objs = net.forward("detection_out");
150 | 
151 |         cv::Mat detectionMat(objs.size[2], objs.size[3], CV_32F,
152 |                              objs.ptr<float>());
153 | 
154 |         std::unique_lock<std::mutex> lock(mutx);
155 |         results.header.frame_id = msg->header.frame_id;
156 |         results.objects.clear();
157 | 
158 |         for(int i = 0; i < detectionMat.rows; i++) {
159 | 
160 |             float confidence = detectionMat.at<float>(i, 2);
161 |             if (confidence > min_confidence) {
162 |                 int object_class = (int)(detectionMat.at<float>(i, 1));
163 | 
164 |                 int x_min = static_cast<int>(detectionMat.at<float>(i, 3) * w);
165 |                 int y_min = static_cast<int>(detectionMat.at<float>(i, 4) * h);
166 |                 int x_max = static_cast<int>(detectionMat.at<float>(i, 5) * w);
167 |                 int y_max = static_cast<int>(detectionMat.at<float>(i, 6) * h);
168 | 
169 |                 std::string class_name;
170 |                 if (object_class >= class_names.size()) {
171 |                      class_name = "unknown";
172 |                      ROS_ERROR("Object class %d out of range of class names",
173 |                                object_class);
174 |                 }
175 |                 else {
176 |                      class_name = class_names[object_class];
177 |                 }
178 |                 std::string label = str(boost::format{"%1% %2%"} %
179 |                                         class_name % confidence);
180 | 
181 |                 ROS_INFO("%s", label.c_str());
182 |                 dnn_detect::DetectedObject obj;
183 |                 obj.class_name = class_name;
184 |                 obj.confidence = confidence;
185 |                 obj.x_min = x_min;
186 |                 obj.x_max = x_max;
187 |                 obj.y_min = y_min;
188 |                 obj.y_max = y_max;
189 |                 results.objects.push_back(obj);
190 | 
191 |                 Rect object(x_min, y_min, x_max-x_min, y_max-y_min);
192 | 
193 |                 rectangle(cv_ptr->image, object, Scalar(0, 255, 0));
194 |                 int baseline=0;
195 |                 cv::Size text_size = cv::getTextSize(label,
196 |                                      FONT_HERSHEY_SIMPLEX, 0.75, 2, &baseline);
197 |                 putText(cv_ptr->image, label, Point(x_min, y_min-text_size.height),
198 |                         FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0));
199 |             }
200 |         }
201 | 
202 |         results_pub.publish(results);
203 | 
204 | 	image_pub.publish(cv_ptr->toImageMsg());
205 | 
206 |     }
207 |     catch(cv_bridge::Exception & e) {
208 |         ROS_ERROR("cv_bridge exception: %s", e.what());
209 |     }
210 |     catch(cv::Exception & e) {
211 |         ROS_ERROR("cv exception: %s", e.what());
212 |     }
213 |     ROS_DEBUG("Notifying condition variable");
214 |     processed = true;
215 |     cond.notify_all();
216 | }
217 | 
218 | DnnNode::DnnNode(ros::NodeHandle & nh) : it(nh)
219 | {
220 |     frame_num = 0;
221 | 
222 |     std::string dir;
223 |     std::string proto_net_file;
224 |     std::string caffe_model_file;
225 |     std::string classes("background,"
226 |        "aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,"
227 |        "cow,diningtable,dog,horse,motorbike,person,pottedplant,"
228 |        "sheep,sofa,train,tvmonitor");
229 | 
230 |     nh.param<bool>("single_shot", single_shot, false);
231 | 
232 |     nh.param<bool>("publish_images", publish_images, false);
233 |     nh.param<string>("data_dir", dir, "");
234 |     nh.param<string>("protonet_file", proto_net_file,
235 |                      "MobileNetSSD_deploy.prototxt.txt");
236 |     nh.param<string>("caffe_model_file", caffe_model_file,
237 |                      "MobileNetSSD_deploy.caffemodel");
238 |     nh.param<float>("min_confidence", min_confidence, 0.2);
239 |     nh.param<int>("im_size", im_size, 300);
240 |     nh.param<int>("rotate_flag", rotate_flag, -1);
241 |     nh.param<float>("scale_factor", scale_factor, 0.007843f);
242 |     nh.param<float>("mean_val", mean_val, 127.5f);
243 |     nh.param<std::string>("class_names", classes, classes);
244 | 
245 |     boost::split(class_names, classes, boost::is_any_of(","));
246 |     ROS_INFO("Read %d class names", (int)class_names.size());
247 | 
248 |     try {
249 |         net = cv::dnn::readNetFromCaffe(dir + "/" + proto_net_file,
250 |                                         dir + "/" + caffe_model_file);
251 |     }
252 |     catch(cv::Exception & e) {
253 |         ROS_ERROR("cv exception: %s", e.what());
254 |         exit(1);
255 |     }
256 | 
257 |     triggered = false;
258 | 
259 |     detect_srv = nh.advertiseService("detect", &DnnNode::trigger_callback, this);
260 | 
261 |     results_pub =
262 |         nh.advertise<dnn_detect::DetectedObjectArray>("/dnn_objects", 20);
263 | 
264 |     image_pub = it.advertise("/dnn_images", 1);
265 | 
266 |     img_sub = it.subscribe("/camera", 1,
267 |                            &DnnNode::image_callback, this);
268 | 
269 |     ROS_INFO("DNN detection ready");
270 | }
271 | 
272 | int main(int argc, char ** argv) {
273 |     ros::init(argc, argv, "dnn_detect");
274 |     ros::NodeHandle nh("~");
275 | 
276 |     DnnNode node = DnnNode(nh);
277 |     ros::MultiThreadedSpinner spinner(2);
278 |     spinner.spin();
279 | 
280 |     return 0;
281 | }
282 | 


--------------------------------------------------------------------------------
/srv/Detect.srv:
--------------------------------------------------------------------------------
1 | ---
2 | DetectedObjectArray result
3 | 


--------------------------------------------------------------------------------
/test/dnn_images.test:
--------------------------------------------------------------------------------
 1 | <launch>
 2 |   <node pkg="dnn_detect" name="dnn_detect" type="dnn_detect">
 3 |     <param name="image_transport" value="raw" />
 4 |     <param name="single_shot" value="true" />
 5 |     <param name="data_dir" value="$(find dnn_detect)/model"/>
 6 |     <remap from="/camera/" to="/camera/image/"/>
 7 |   </node>
 8 | 
 9 |   <test test-name="dnn_images_test" pkg="dnn_detect" type="dnn_images_test">
10 |     <param name="image_directory" value="$(find dnn_detect)/test/test_images/"/>
11 |   </test>
12 | 
13 | </launch>
14 | 
15 | 


--------------------------------------------------------------------------------
/test/dnn_images_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | #include <ros/ros.h>
 4 | #include <image_transport/image_transport.h>
 5 | #include <opencv2/highgui/highgui.hpp>
 6 | #include <cv_bridge/cv_bridge.h>
 7 | 
 8 | #include "dnn_detect/DetectedObject.h"
 9 | #include "dnn_detect/DetectedObjectArray.h"
10 | #include "dnn_detect/Detect.h"
11 | 
12 | #include <boost/thread/thread.hpp>
13 | 
14 | #if CV_MAJOR_VERSION < 4
15 |     #define IMREAD_COLOR_MODE CV_LOAD_IMAGE_COLOR
16 | #else
17 |     #define IMREAD_COLOR_MODE cv::IMREAD_COLOR
18 | #endif
19 | 
20 | class DnnImagesTest : public ::testing::Test {
21 | protected:
22 |   virtual void SetUp() {
23 |     it = new image_transport::ImageTransport(nh);
24 |     image_pub = it->advertise("camera/image", 1);
25 | 
26 |     ros::NodeHandle nh_priv("~");
27 |     nh_priv.getParam("image_directory", image_directory);
28 |     object_sub = nh.subscribe("/dnn_objects", 1, &DnnImagesTest::object_callback, this);
29 |     got_object = false;
30 |     got_cat = false;
31 | 
32 |   }
33 | 
34 |   // Make a service request to trigger detection
35 |   void trigger() {
36 |     ros::NodeHandle node;
37 |     ros::ServiceClient client =
38 |        node.serviceClient<dnn_detect::Detect>("/dnn_detect/detect");
39 |     dnn_detect::Detect d;
40 |     client.call(d);
41 |   }
42 | 
43 |   virtual void TearDown() { delete it;}
44 | 
45 |   void publish_image(std::string file) {
46 |     boost::thread trig(&DnnImagesTest::trigger, this);
47 | 
48 |     sleep(1);
49 |     cv::Mat image = cv::imread(image_directory+file, IMREAD_COLOR_MODE);
50 |     sensor_msgs::ImagePtr msg = cv_bridge::CvImage(std_msgs::Header(), "bgr8",
51 |         image).toImageMsg();
52 |     image_pub.publish(msg);
53 |   }
54 | 
55 |   ros::NodeHandle nh;
56 | 
57 |   // Set up Publishing of static images
58 |   image_transport::ImageTransport* it;
59 |   image_transport::Publisher image_pub;
60 | 
61 |   bool got_object;
62 |   bool got_cat;
63 |   ros::Subscriber object_sub;
64 | 
65 |   std::string image_directory;
66 | 
67 |   // Set up subscribing
68 |   void object_callback(const dnn_detect::DetectedObjectArray& results) {
69 |     got_object = true;
70 |     for (const auto& obj : results.objects) {
71 |       if (obj.class_name == "cat") {
72 |         got_cat = true;
73 |       }
74 |     }
75 |   }
76 | };
77 | 
78 | 
79 | TEST_F(DnnImagesTest, cat) {
80 |   ros::Rate loop_rate(5);
81 |   while (nh.ok() && !got_object && !got_cat) {
82 |     publish_image("cat.jpg");
83 |     ros::spinOnce();
84 |     loop_rate.sleep();
85 |   }
86 | 
87 |   ASSERT_TRUE(got_cat);
88 | }
89 | 
90 | int main(int argc, char** argv)
91 | {
92 | 
93 |   testing::InitGoogleTest(&argc, argv);
94 |   ros::init(argc, argv, "DnnImagesTest");
95 |   return RUN_ALL_TESTS();
96 | }
97 | 


--------------------------------------------------------------------------------
/test/test_images/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UbiquityRobotics/dnn_detect/c23161c9c1c2a2bd15618b6b3450522ac8aad2cb/test/test_images/cat.jpg


--------------------------------------------------------------------------------