├── .gitignore
├── CHANGELOG.rst
├── CMakeLists.txt
├── README.md
├── launch
└── dnn_detect.launch
├── model
├── MobileNetSSD_deploy.caffemodel
└── MobileNetSSD_deploy.prototxt.txt
├── msg
├── DetectedObject.msg
└── DetectedObjectArray.msg
├── package.xml
├── src
└── dnn_detect.cpp
├── srv
└── Detect.srv
└── test
├── dnn_images.test
├── dnn_images_test.cpp
└── test_images
└── cat.jpg
/.gitignore:
--------------------------------------------------------------------------------
1 | # Prerequisites
2 | *.d
3 |
4 | # Compiled Object files
5 | *.slo
6 | *.lo
7 | *.o
8 | *.obj
9 |
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 |
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 |
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 |
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 |
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 |
--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2 | Changelog for package dnn_detect
3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4 |
5 | 0.1.0 (2020-09-21)
6 | ------------------
7 | * dnn_images_test.cpp - support opencv version 4
8 | * Noetic support
9 | * Contributors: Jim Vaughan, Rohan Agrawal, Tim
10 |
11 | 0.0.3 (2018-02-16)
12 | ------------------
13 | * Add optional rotation of image
14 | * Added one shot mode, which requires a service call to trigger detection.
15 | * Update README.md
16 | * Contributors: Jim Vaughan
17 |
18 | 0.0.2 (2017-12-03)
19 | ------------------
20 | * Initial commit
21 | * Contributors: Jim Vaughan
22 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | cmake_minimum_required(VERSION 2.8.3)
3 | project(dnn_detect)
4 |
5 | find_package(catkin REQUIRED COMPONENTS
6 | roscpp
7 | tf2_geometry_msgs
8 | tf2_ros
9 | tf2
10 | visualization_msgs
11 | image_transport
12 | cv_bridge
13 | std_msgs
14 | )
15 |
16 | find_package(OpenCV REQUIRED)
17 |
18 |
19 | add_message_files(
20 | FILES
21 | DetectedObject.msg
22 | DetectedObjectArray.msg
23 | )
24 |
25 |
26 | add_service_files(
27 | FILES
28 | Detect.srv
29 | )
30 |
31 | generate_messages(
32 | DEPENDENCIES
33 | std_msgs
34 | )
35 | catkin_package(INCLUDE_DIRS DEPENDS OpenCV)
36 |
37 | ###########
38 | ## Build ##
39 | ###########
40 |
41 |
42 | add_definitions(-std=c++11)
43 |
44 | include_directories(${catkin_INCLUDE_DIRS})
45 | include_directories(${OpenCV_INCLUDE_DIRS})
46 |
47 | add_executable(dnn_detect src/dnn_detect.cpp)
48 |
49 | add_dependencies(dnn_detect ${${PROJECT_NAME}_EXPORTED_TARGETS}
50 | ${catkin_EXPORTED_TARGETS})
51 |
52 | target_link_libraries(dnn_detect ${catkin_LIBRARIES} ${OpenCV_LIBS})
53 |
54 | #############
55 | ## Install ##
56 | #############
57 |
58 | ## Mark executables and/or libraries for installation
59 | install(TARGETS dnn_detect
60 | ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
61 | LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
62 | RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
63 | )
64 |
65 | install(DIRECTORY launch/
66 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/launch
67 | )
68 |
69 | install(DIRECTORY model/
70 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/model
71 | )
72 |
73 | ###########
74 | ## Tests ##
75 | ###########
76 |
77 | if(CATKIN_ENABLE_TESTING)
78 | find_package(rostest REQUIRED)
79 |
80 | # Tests need c++11
81 | add_definitions(-std=c++11)
82 |
83 | add_rostest_gtest(dnn_images_test
84 | test/dnn_images.test
85 | test/dnn_images_test.cpp)
86 | add_dependencies(dnn_images_test ${PROJECT_NAME}_generate_messages)
87 | target_link_libraries(dnn_images_test ${catkin_LIBRARIES} ${OpenCV_LIBS})
88 | endif()
89 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # dnn_detect
3 |
4 | This package provides object detection using OpenCV's Deep Neural Network module.
5 |
6 | Documentation is at [http://wiki.ros.org/dnn_detect](http://wiki.ros.org/dnn_detect).
7 |
8 | The model used by default is from [chuanqi305's MobileNet-SSD](https://github.com/chuanqi305/MobileNet-SSD).
9 |
--------------------------------------------------------------------------------
/launch/dnn_detect.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/model/MobileNetSSD_deploy.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UbiquityRobotics/dnn_detect/c23161c9c1c2a2bd15618b6b3450522ac8aad2cb/model/MobileNetSSD_deploy.caffemodel
--------------------------------------------------------------------------------
/model/MobileNetSSD_deploy.prototxt.txt:
--------------------------------------------------------------------------------
1 | name: "MobileNet-SSD"
2 | input: "data"
3 | input_shape {
4 | dim: 1
5 | dim: 3
6 | dim: 300
7 | dim: 300
8 | }
9 | layer {
10 | name: "conv0"
11 | type: "Convolution"
12 | bottom: "data"
13 | top: "conv0"
14 | param {
15 | lr_mult: 1.0
16 | decay_mult: 1.0
17 | }
18 | param {
19 | lr_mult: 2.0
20 | decay_mult: 0.0
21 | }
22 | convolution_param {
23 | num_output: 32
24 | pad: 1
25 | kernel_size: 3
26 | stride: 2
27 | weight_filler {
28 | type: "msra"
29 | }
30 | bias_filler {
31 | type: "constant"
32 | value: 0.0
33 | }
34 | }
35 | }
36 | layer {
37 | name: "conv0/relu"
38 | type: "ReLU"
39 | bottom: "conv0"
40 | top: "conv0"
41 | }
42 | layer {
43 | name: "conv1/dw"
44 | type: "Convolution"
45 | bottom: "conv0"
46 | top: "conv1/dw"
47 | param {
48 | lr_mult: 1.0
49 | decay_mult: 1.0
50 | }
51 | param {
52 | lr_mult: 2.0
53 | decay_mult: 0.0
54 | }
55 | convolution_param {
56 | num_output: 32
57 | pad: 1
58 | kernel_size: 3
59 | group: 32
60 | engine: CAFFE
61 | weight_filler {
62 | type: "msra"
63 | }
64 | bias_filler {
65 | type: "constant"
66 | value: 0.0
67 | }
68 | }
69 | }
70 | layer {
71 | name: "conv1/dw/relu"
72 | type: "ReLU"
73 | bottom: "conv1/dw"
74 | top: "conv1/dw"
75 | }
76 | layer {
77 | name: "conv1"
78 | type: "Convolution"
79 | bottom: "conv1/dw"
80 | top: "conv1"
81 | param {
82 | lr_mult: 1.0
83 | decay_mult: 1.0
84 | }
85 | param {
86 | lr_mult: 2.0
87 | decay_mult: 0.0
88 | }
89 | convolution_param {
90 | num_output: 64
91 | kernel_size: 1
92 | weight_filler {
93 | type: "msra"
94 | }
95 | bias_filler {
96 | type: "constant"
97 | value: 0.0
98 | }
99 | }
100 | }
101 | layer {
102 | name: "conv1/relu"
103 | type: "ReLU"
104 | bottom: "conv1"
105 | top: "conv1"
106 | }
107 | layer {
108 | name: "conv2/dw"
109 | type: "Convolution"
110 | bottom: "conv1"
111 | top: "conv2/dw"
112 | param {
113 | lr_mult: 1.0
114 | decay_mult: 1.0
115 | }
116 | param {
117 | lr_mult: 2.0
118 | decay_mult: 0.0
119 | }
120 | convolution_param {
121 | num_output: 64
122 | pad: 1
123 | kernel_size: 3
124 | stride: 2
125 | group: 64
126 | engine: CAFFE
127 | weight_filler {
128 | type: "msra"
129 | }
130 | bias_filler {
131 | type: "constant"
132 | value: 0.0
133 | }
134 | }
135 | }
136 | layer {
137 | name: "conv2/dw/relu"
138 | type: "ReLU"
139 | bottom: "conv2/dw"
140 | top: "conv2/dw"
141 | }
142 | layer {
143 | name: "conv2"
144 | type: "Convolution"
145 | bottom: "conv2/dw"
146 | top: "conv2"
147 | param {
148 | lr_mult: 1.0
149 | decay_mult: 1.0
150 | }
151 | param {
152 | lr_mult: 2.0
153 | decay_mult: 0.0
154 | }
155 | convolution_param {
156 | num_output: 128
157 | kernel_size: 1
158 | weight_filler {
159 | type: "msra"
160 | }
161 | bias_filler {
162 | type: "constant"
163 | value: 0.0
164 | }
165 | }
166 | }
167 | layer {
168 | name: "conv2/relu"
169 | type: "ReLU"
170 | bottom: "conv2"
171 | top: "conv2"
172 | }
173 | layer {
174 | name: "conv3/dw"
175 | type: "Convolution"
176 | bottom: "conv2"
177 | top: "conv3/dw"
178 | param {
179 | lr_mult: 1.0
180 | decay_mult: 1.0
181 | }
182 | param {
183 | lr_mult: 2.0
184 | decay_mult: 0.0
185 | }
186 | convolution_param {
187 | num_output: 128
188 | pad: 1
189 | kernel_size: 3
190 | group: 128
191 | engine: CAFFE
192 | weight_filler {
193 | type: "msra"
194 | }
195 | bias_filler {
196 | type: "constant"
197 | value: 0.0
198 | }
199 | }
200 | }
201 | layer {
202 | name: "conv3/dw/relu"
203 | type: "ReLU"
204 | bottom: "conv3/dw"
205 | top: "conv3/dw"
206 | }
207 | layer {
208 | name: "conv3"
209 | type: "Convolution"
210 | bottom: "conv3/dw"
211 | top: "conv3"
212 | param {
213 | lr_mult: 1.0
214 | decay_mult: 1.0
215 | }
216 | param {
217 | lr_mult: 2.0
218 | decay_mult: 0.0
219 | }
220 | convolution_param {
221 | num_output: 128
222 | kernel_size: 1
223 | weight_filler {
224 | type: "msra"
225 | }
226 | bias_filler {
227 | type: "constant"
228 | value: 0.0
229 | }
230 | }
231 | }
232 | layer {
233 | name: "conv3/relu"
234 | type: "ReLU"
235 | bottom: "conv3"
236 | top: "conv3"
237 | }
238 | layer {
239 | name: "conv4/dw"
240 | type: "Convolution"
241 | bottom: "conv3"
242 | top: "conv4/dw"
243 | param {
244 | lr_mult: 1.0
245 | decay_mult: 1.0
246 | }
247 | param {
248 | lr_mult: 2.0
249 | decay_mult: 0.0
250 | }
251 | convolution_param {
252 | num_output: 128
253 | pad: 1
254 | kernel_size: 3
255 | stride: 2
256 | group: 128
257 | engine: CAFFE
258 | weight_filler {
259 | type: "msra"
260 | }
261 | bias_filler {
262 | type: "constant"
263 | value: 0.0
264 | }
265 | }
266 | }
267 | layer {
268 | name: "conv4/dw/relu"
269 | type: "ReLU"
270 | bottom: "conv4/dw"
271 | top: "conv4/dw"
272 | }
273 | layer {
274 | name: "conv4"
275 | type: "Convolution"
276 | bottom: "conv4/dw"
277 | top: "conv4"
278 | param {
279 | lr_mult: 1.0
280 | decay_mult: 1.0
281 | }
282 | param {
283 | lr_mult: 2.0
284 | decay_mult: 0.0
285 | }
286 | convolution_param {
287 | num_output: 256
288 | kernel_size: 1
289 | weight_filler {
290 | type: "msra"
291 | }
292 | bias_filler {
293 | type: "constant"
294 | value: 0.0
295 | }
296 | }
297 | }
298 | layer {
299 | name: "conv4/relu"
300 | type: "ReLU"
301 | bottom: "conv4"
302 | top: "conv4"
303 | }
304 | layer {
305 | name: "conv5/dw"
306 | type: "Convolution"
307 | bottom: "conv4"
308 | top: "conv5/dw"
309 | param {
310 | lr_mult: 1.0
311 | decay_mult: 1.0
312 | }
313 | param {
314 | lr_mult: 2.0
315 | decay_mult: 0.0
316 | }
317 | convolution_param {
318 | num_output: 256
319 | pad: 1
320 | kernel_size: 3
321 | group: 256
322 | engine: CAFFE
323 | weight_filler {
324 | type: "msra"
325 | }
326 | bias_filler {
327 | type: "constant"
328 | value: 0.0
329 | }
330 | }
331 | }
332 | layer {
333 | name: "conv5/dw/relu"
334 | type: "ReLU"
335 | bottom: "conv5/dw"
336 | top: "conv5/dw"
337 | }
338 | layer {
339 | name: "conv5"
340 | type: "Convolution"
341 | bottom: "conv5/dw"
342 | top: "conv5"
343 | param {
344 | lr_mult: 1.0
345 | decay_mult: 1.0
346 | }
347 | param {
348 | lr_mult: 2.0
349 | decay_mult: 0.0
350 | }
351 | convolution_param {
352 | num_output: 256
353 | kernel_size: 1
354 | weight_filler {
355 | type: "msra"
356 | }
357 | bias_filler {
358 | type: "constant"
359 | value: 0.0
360 | }
361 | }
362 | }
363 | layer {
364 | name: "conv5/relu"
365 | type: "ReLU"
366 | bottom: "conv5"
367 | top: "conv5"
368 | }
369 | layer {
370 | name: "conv6/dw"
371 | type: "Convolution"
372 | bottom: "conv5"
373 | top: "conv6/dw"
374 | param {
375 | lr_mult: 1.0
376 | decay_mult: 1.0
377 | }
378 | param {
379 | lr_mult: 2.0
380 | decay_mult: 0.0
381 | }
382 | convolution_param {
383 | num_output: 256
384 | pad: 1
385 | kernel_size: 3
386 | stride: 2
387 | group: 256
388 | engine: CAFFE
389 | weight_filler {
390 | type: "msra"
391 | }
392 | bias_filler {
393 | type: "constant"
394 | value: 0.0
395 | }
396 | }
397 | }
398 | layer {
399 | name: "conv6/dw/relu"
400 | type: "ReLU"
401 | bottom: "conv6/dw"
402 | top: "conv6/dw"
403 | }
404 | layer {
405 | name: "conv6"
406 | type: "Convolution"
407 | bottom: "conv6/dw"
408 | top: "conv6"
409 | param {
410 | lr_mult: 1.0
411 | decay_mult: 1.0
412 | }
413 | param {
414 | lr_mult: 2.0
415 | decay_mult: 0.0
416 | }
417 | convolution_param {
418 | num_output: 512
419 | kernel_size: 1
420 | weight_filler {
421 | type: "msra"
422 | }
423 | bias_filler {
424 | type: "constant"
425 | value: 0.0
426 | }
427 | }
428 | }
429 | layer {
430 | name: "conv6/relu"
431 | type: "ReLU"
432 | bottom: "conv6"
433 | top: "conv6"
434 | }
435 | layer {
436 | name: "conv7/dw"
437 | type: "Convolution"
438 | bottom: "conv6"
439 | top: "conv7/dw"
440 | param {
441 | lr_mult: 1.0
442 | decay_mult: 1.0
443 | }
444 | param {
445 | lr_mult: 2.0
446 | decay_mult: 0.0
447 | }
448 | convolution_param {
449 | num_output: 512
450 | pad: 1
451 | kernel_size: 3
452 | group: 512
453 | engine: CAFFE
454 | weight_filler {
455 | type: "msra"
456 | }
457 | bias_filler {
458 | type: "constant"
459 | value: 0.0
460 | }
461 | }
462 | }
463 | layer {
464 | name: "conv7/dw/relu"
465 | type: "ReLU"
466 | bottom: "conv7/dw"
467 | top: "conv7/dw"
468 | }
469 | layer {
470 | name: "conv7"
471 | type: "Convolution"
472 | bottom: "conv7/dw"
473 | top: "conv7"
474 | param {
475 | lr_mult: 1.0
476 | decay_mult: 1.0
477 | }
478 | param {
479 | lr_mult: 2.0
480 | decay_mult: 0.0
481 | }
482 | convolution_param {
483 | num_output: 512
484 | kernel_size: 1
485 | weight_filler {
486 | type: "msra"
487 | }
488 | bias_filler {
489 | type: "constant"
490 | value: 0.0
491 | }
492 | }
493 | }
494 | layer {
495 | name: "conv7/relu"
496 | type: "ReLU"
497 | bottom: "conv7"
498 | top: "conv7"
499 | }
500 | layer {
501 | name: "conv8/dw"
502 | type: "Convolution"
503 | bottom: "conv7"
504 | top: "conv8/dw"
505 | param {
506 | lr_mult: 1.0
507 | decay_mult: 1.0
508 | }
509 | param {
510 | lr_mult: 2.0
511 | decay_mult: 0.0
512 | }
513 | convolution_param {
514 | num_output: 512
515 | pad: 1
516 | kernel_size: 3
517 | group: 512
518 | engine: CAFFE
519 | weight_filler {
520 | type: "msra"
521 | }
522 | bias_filler {
523 | type: "constant"
524 | value: 0.0
525 | }
526 | }
527 | }
528 | layer {
529 | name: "conv8/dw/relu"
530 | type: "ReLU"
531 | bottom: "conv8/dw"
532 | top: "conv8/dw"
533 | }
534 | layer {
535 | name: "conv8"
536 | type: "Convolution"
537 | bottom: "conv8/dw"
538 | top: "conv8"
539 | param {
540 | lr_mult: 1.0
541 | decay_mult: 1.0
542 | }
543 | param {
544 | lr_mult: 2.0
545 | decay_mult: 0.0
546 | }
547 | convolution_param {
548 | num_output: 512
549 | kernel_size: 1
550 | weight_filler {
551 | type: "msra"
552 | }
553 | bias_filler {
554 | type: "constant"
555 | value: 0.0
556 | }
557 | }
558 | }
559 | layer {
560 | name: "conv8/relu"
561 | type: "ReLU"
562 | bottom: "conv8"
563 | top: "conv8"
564 | }
565 | layer {
566 | name: "conv9/dw"
567 | type: "Convolution"
568 | bottom: "conv8"
569 | top: "conv9/dw"
570 | param {
571 | lr_mult: 1.0
572 | decay_mult: 1.0
573 | }
574 | param {
575 | lr_mult: 2.0
576 | decay_mult: 0.0
577 | }
578 | convolution_param {
579 | num_output: 512
580 | pad: 1
581 | kernel_size: 3
582 | group: 512
583 | engine: CAFFE
584 | weight_filler {
585 | type: "msra"
586 | }
587 | bias_filler {
588 | type: "constant"
589 | value: 0.0
590 | }
591 | }
592 | }
593 | layer {
594 | name: "conv9/dw/relu"
595 | type: "ReLU"
596 | bottom: "conv9/dw"
597 | top: "conv9/dw"
598 | }
599 | layer {
600 | name: "conv9"
601 | type: "Convolution"
602 | bottom: "conv9/dw"
603 | top: "conv9"
604 | param {
605 | lr_mult: 1.0
606 | decay_mult: 1.0
607 | }
608 | param {
609 | lr_mult: 2.0
610 | decay_mult: 0.0
611 | }
612 | convolution_param {
613 | num_output: 512
614 | kernel_size: 1
615 | weight_filler {
616 | type: "msra"
617 | }
618 | bias_filler {
619 | type: "constant"
620 | value: 0.0
621 | }
622 | }
623 | }
624 | layer {
625 | name: "conv9/relu"
626 | type: "ReLU"
627 | bottom: "conv9"
628 | top: "conv9"
629 | }
630 | layer {
631 | name: "conv10/dw"
632 | type: "Convolution"
633 | bottom: "conv9"
634 | top: "conv10/dw"
635 | param {
636 | lr_mult: 1.0
637 | decay_mult: 1.0
638 | }
639 | param {
640 | lr_mult: 2.0
641 | decay_mult: 0.0
642 | }
643 | convolution_param {
644 | num_output: 512
645 | pad: 1
646 | kernel_size: 3
647 | group: 512
648 | engine: CAFFE
649 | weight_filler {
650 | type: "msra"
651 | }
652 | bias_filler {
653 | type: "constant"
654 | value: 0.0
655 | }
656 | }
657 | }
658 | layer {
659 | name: "conv10/dw/relu"
660 | type: "ReLU"
661 | bottom: "conv10/dw"
662 | top: "conv10/dw"
663 | }
664 | layer {
665 | name: "conv10"
666 | type: "Convolution"
667 | bottom: "conv10/dw"
668 | top: "conv10"
669 | param {
670 | lr_mult: 1.0
671 | decay_mult: 1.0
672 | }
673 | param {
674 | lr_mult: 2.0
675 | decay_mult: 0.0
676 | }
677 | convolution_param {
678 | num_output: 512
679 | kernel_size: 1
680 | weight_filler {
681 | type: "msra"
682 | }
683 | bias_filler {
684 | type: "constant"
685 | value: 0.0
686 | }
687 | }
688 | }
689 | layer {
690 | name: "conv10/relu"
691 | type: "ReLU"
692 | bottom: "conv10"
693 | top: "conv10"
694 | }
695 | layer {
696 | name: "conv11/dw"
697 | type: "Convolution"
698 | bottom: "conv10"
699 | top: "conv11/dw"
700 | param {
701 | lr_mult: 1.0
702 | decay_mult: 1.0
703 | }
704 | param {
705 | lr_mult: 2.0
706 | decay_mult: 0.0
707 | }
708 | convolution_param {
709 | num_output: 512
710 | pad: 1
711 | kernel_size: 3
712 | group: 512
713 | engine: CAFFE
714 | weight_filler {
715 | type: "msra"
716 | }
717 | bias_filler {
718 | type: "constant"
719 | value: 0.0
720 | }
721 | }
722 | }
723 | layer {
724 | name: "conv11/dw/relu"
725 | type: "ReLU"
726 | bottom: "conv11/dw"
727 | top: "conv11/dw"
728 | }
729 | layer {
730 | name: "conv11"
731 | type: "Convolution"
732 | bottom: "conv11/dw"
733 | top: "conv11"
734 | param {
735 | lr_mult: 1.0
736 | decay_mult: 1.0
737 | }
738 | param {
739 | lr_mult: 2.0
740 | decay_mult: 0.0
741 | }
742 | convolution_param {
743 | num_output: 512
744 | kernel_size: 1
745 | weight_filler {
746 | type: "msra"
747 | }
748 | bias_filler {
749 | type: "constant"
750 | value: 0.0
751 | }
752 | }
753 | }
754 | layer {
755 | name: "conv11/relu"
756 | type: "ReLU"
757 | bottom: "conv11"
758 | top: "conv11"
759 | }
760 | layer {
761 | name: "conv12/dw"
762 | type: "Convolution"
763 | bottom: "conv11"
764 | top: "conv12/dw"
765 | param {
766 | lr_mult: 1.0
767 | decay_mult: 1.0
768 | }
769 | param {
770 | lr_mult: 2.0
771 | decay_mult: 0.0
772 | }
773 | convolution_param {
774 | num_output: 512
775 | pad: 1
776 | kernel_size: 3
777 | stride: 2
778 | group: 512
779 | engine: CAFFE
780 | weight_filler {
781 | type: "msra"
782 | }
783 | bias_filler {
784 | type: "constant"
785 | value: 0.0
786 | }
787 | }
788 | }
789 | layer {
790 | name: "conv12/dw/relu"
791 | type: "ReLU"
792 | bottom: "conv12/dw"
793 | top: "conv12/dw"
794 | }
795 | layer {
796 | name: "conv12"
797 | type: "Convolution"
798 | bottom: "conv12/dw"
799 | top: "conv12"
800 | param {
801 | lr_mult: 1.0
802 | decay_mult: 1.0
803 | }
804 | param {
805 | lr_mult: 2.0
806 | decay_mult: 0.0
807 | }
808 | convolution_param {
809 | num_output: 1024
810 | kernel_size: 1
811 | weight_filler {
812 | type: "msra"
813 | }
814 | bias_filler {
815 | type: "constant"
816 | value: 0.0
817 | }
818 | }
819 | }
820 | layer {
821 | name: "conv12/relu"
822 | type: "ReLU"
823 | bottom: "conv12"
824 | top: "conv12"
825 | }
826 | layer {
827 | name: "conv13/dw"
828 | type: "Convolution"
829 | bottom: "conv12"
830 | top: "conv13/dw"
831 | param {
832 | lr_mult: 1.0
833 | decay_mult: 1.0
834 | }
835 | param {
836 | lr_mult: 2.0
837 | decay_mult: 0.0
838 | }
839 | convolution_param {
840 | num_output: 1024
841 | pad: 1
842 | kernel_size: 3
843 | group: 1024
844 | engine: CAFFE
845 | weight_filler {
846 | type: "msra"
847 | }
848 | bias_filler {
849 | type: "constant"
850 | value: 0.0
851 | }
852 | }
853 | }
854 | layer {
855 | name: "conv13/dw/relu"
856 | type: "ReLU"
857 | bottom: "conv13/dw"
858 | top: "conv13/dw"
859 | }
860 | layer {
861 | name: "conv13"
862 | type: "Convolution"
863 | bottom: "conv13/dw"
864 | top: "conv13"
865 | param {
866 | lr_mult: 1.0
867 | decay_mult: 1.0
868 | }
869 | param {
870 | lr_mult: 2.0
871 | decay_mult: 0.0
872 | }
873 | convolution_param {
874 | num_output: 1024
875 | kernel_size: 1
876 | weight_filler {
877 | type: "msra"
878 | }
879 | bias_filler {
880 | type: "constant"
881 | value: 0.0
882 | }
883 | }
884 | }
885 | layer {
886 | name: "conv13/relu"
887 | type: "ReLU"
888 | bottom: "conv13"
889 | top: "conv13"
890 | }
891 | layer {
892 | name: "conv14_1"
893 | type: "Convolution"
894 | bottom: "conv13"
895 | top: "conv14_1"
896 | param {
897 | lr_mult: 1.0
898 | decay_mult: 1.0
899 | }
900 | param {
901 | lr_mult: 2.0
902 | decay_mult: 0.0
903 | }
904 | convolution_param {
905 | num_output: 256
906 | kernel_size: 1
907 | weight_filler {
908 | type: "msra"
909 | }
910 | bias_filler {
911 | type: "constant"
912 | value: 0.0
913 | }
914 | }
915 | }
916 | layer {
917 | name: "conv14_1/relu"
918 | type: "ReLU"
919 | bottom: "conv14_1"
920 | top: "conv14_1"
921 | }
922 | layer {
923 | name: "conv14_2"
924 | type: "Convolution"
925 | bottom: "conv14_1"
926 | top: "conv14_2"
927 | param {
928 | lr_mult: 1.0
929 | decay_mult: 1.0
930 | }
931 | param {
932 | lr_mult: 2.0
933 | decay_mult: 0.0
934 | }
935 | convolution_param {
936 | num_output: 512
937 | pad: 1
938 | kernel_size: 3
939 | stride: 2
940 | weight_filler {
941 | type: "msra"
942 | }
943 | bias_filler {
944 | type: "constant"
945 | value: 0.0
946 | }
947 | }
948 | }
949 | layer {
950 | name: "conv14_2/relu"
951 | type: "ReLU"
952 | bottom: "conv14_2"
953 | top: "conv14_2"
954 | }
955 | layer {
956 | name: "conv15_1"
957 | type: "Convolution"
958 | bottom: "conv14_2"
959 | top: "conv15_1"
960 | param {
961 | lr_mult: 1.0
962 | decay_mult: 1.0
963 | }
964 | param {
965 | lr_mult: 2.0
966 | decay_mult: 0.0
967 | }
968 | convolution_param {
969 | num_output: 128
970 | kernel_size: 1
971 | weight_filler {
972 | type: "msra"
973 | }
974 | bias_filler {
975 | type: "constant"
976 | value: 0.0
977 | }
978 | }
979 | }
980 | layer {
981 | name: "conv15_1/relu"
982 | type: "ReLU"
983 | bottom: "conv15_1"
984 | top: "conv15_1"
985 | }
986 | layer {
987 | name: "conv15_2"
988 | type: "Convolution"
989 | bottom: "conv15_1"
990 | top: "conv15_2"
991 | param {
992 | lr_mult: 1.0
993 | decay_mult: 1.0
994 | }
995 | param {
996 | lr_mult: 2.0
997 | decay_mult: 0.0
998 | }
999 | convolution_param {
1000 | num_output: 256
1001 | pad: 1
1002 | kernel_size: 3
1003 | stride: 2
1004 | weight_filler {
1005 | type: "msra"
1006 | }
1007 | bias_filler {
1008 | type: "constant"
1009 | value: 0.0
1010 | }
1011 | }
1012 | }
1013 | layer {
1014 | name: "conv15_2/relu"
1015 | type: "ReLU"
1016 | bottom: "conv15_2"
1017 | top: "conv15_2"
1018 | }
1019 | layer {
1020 | name: "conv16_1"
1021 | type: "Convolution"
1022 | bottom: "conv15_2"
1023 | top: "conv16_1"
1024 | param {
1025 | lr_mult: 1.0
1026 | decay_mult: 1.0
1027 | }
1028 | param {
1029 | lr_mult: 2.0
1030 | decay_mult: 0.0
1031 | }
1032 | convolution_param {
1033 | num_output: 128
1034 | kernel_size: 1
1035 | weight_filler {
1036 | type: "msra"
1037 | }
1038 | bias_filler {
1039 | type: "constant"
1040 | value: 0.0
1041 | }
1042 | }
1043 | }
1044 | layer {
1045 | name: "conv16_1/relu"
1046 | type: "ReLU"
1047 | bottom: "conv16_1"
1048 | top: "conv16_1"
1049 | }
1050 | layer {
1051 | name: "conv16_2"
1052 | type: "Convolution"
1053 | bottom: "conv16_1"
1054 | top: "conv16_2"
1055 | param {
1056 | lr_mult: 1.0
1057 | decay_mult: 1.0
1058 | }
1059 | param {
1060 | lr_mult: 2.0
1061 | decay_mult: 0.0
1062 | }
1063 | convolution_param {
1064 | num_output: 256
1065 | pad: 1
1066 | kernel_size: 3
1067 | stride: 2
1068 | weight_filler {
1069 | type: "msra"
1070 | }
1071 | bias_filler {
1072 | type: "constant"
1073 | value: 0.0
1074 | }
1075 | }
1076 | }
1077 | layer {
1078 | name: "conv16_2/relu"
1079 | type: "ReLU"
1080 | bottom: "conv16_2"
1081 | top: "conv16_2"
1082 | }
1083 | layer {
1084 | name: "conv17_1"
1085 | type: "Convolution"
1086 | bottom: "conv16_2"
1087 | top: "conv17_1"
1088 | param {
1089 | lr_mult: 1.0
1090 | decay_mult: 1.0
1091 | }
1092 | param {
1093 | lr_mult: 2.0
1094 | decay_mult: 0.0
1095 | }
1096 | convolution_param {
1097 | num_output: 64
1098 | kernel_size: 1
1099 | weight_filler {
1100 | type: "msra"
1101 | }
1102 | bias_filler {
1103 | type: "constant"
1104 | value: 0.0
1105 | }
1106 | }
1107 | }
1108 | layer {
1109 | name: "conv17_1/relu"
1110 | type: "ReLU"
1111 | bottom: "conv17_1"
1112 | top: "conv17_1"
1113 | }
1114 | layer {
1115 | name: "conv17_2"
1116 | type: "Convolution"
1117 | bottom: "conv17_1"
1118 | top: "conv17_2"
1119 | param {
1120 | lr_mult: 1.0
1121 | decay_mult: 1.0
1122 | }
1123 | param {
1124 | lr_mult: 2.0
1125 | decay_mult: 0.0
1126 | }
1127 | convolution_param {
1128 | num_output: 128
1129 | pad: 1
1130 | kernel_size: 3
1131 | stride: 2
1132 | weight_filler {
1133 | type: "msra"
1134 | }
1135 | bias_filler {
1136 | type: "constant"
1137 | value: 0.0
1138 | }
1139 | }
1140 | }
1141 | layer {
1142 | name: "conv17_2/relu"
1143 | type: "ReLU"
1144 | bottom: "conv17_2"
1145 | top: "conv17_2"
1146 | }
1147 | layer {
1148 | name: "conv11_mbox_loc"
1149 | type: "Convolution"
1150 | bottom: "conv11"
1151 | top: "conv11_mbox_loc"
1152 | param {
1153 | lr_mult: 1.0
1154 | decay_mult: 1.0
1155 | }
1156 | param {
1157 | lr_mult: 2.0
1158 | decay_mult: 0.0
1159 | }
1160 | convolution_param {
1161 | num_output: 12
1162 | kernel_size: 1
1163 | weight_filler {
1164 | type: "msra"
1165 | }
1166 | bias_filler {
1167 | type: "constant"
1168 | value: 0.0
1169 | }
1170 | }
1171 | }
1172 | layer {
1173 | name: "conv11_mbox_loc_perm"
1174 | type: "Permute"
1175 | bottom: "conv11_mbox_loc"
1176 | top: "conv11_mbox_loc_perm"
1177 | permute_param {
1178 | order: 0
1179 | order: 2
1180 | order: 3
1181 | order: 1
1182 | }
1183 | }
1184 | layer {
1185 | name: "conv11_mbox_loc_flat"
1186 | type: "Flatten"
1187 | bottom: "conv11_mbox_loc_perm"
1188 | top: "conv11_mbox_loc_flat"
1189 | flatten_param {
1190 | axis: 1
1191 | }
1192 | }
1193 | layer {
1194 | name: "conv11_mbox_conf"
1195 | type: "Convolution"
1196 | bottom: "conv11"
1197 | top: "conv11_mbox_conf"
1198 | param {
1199 | lr_mult: 1.0
1200 | decay_mult: 1.0
1201 | }
1202 | param {
1203 | lr_mult: 2.0
1204 | decay_mult: 0.0
1205 | }
1206 | convolution_param {
1207 | num_output: 63
1208 | kernel_size: 1
1209 | weight_filler {
1210 | type: "msra"
1211 | }
1212 | bias_filler {
1213 | type: "constant"
1214 | value: 0.0
1215 | }
1216 | }
1217 | }
1218 | layer {
1219 | name: "conv11_mbox_conf_perm"
1220 | type: "Permute"
1221 | bottom: "conv11_mbox_conf"
1222 | top: "conv11_mbox_conf_perm"
1223 | permute_param {
1224 | order: 0
1225 | order: 2
1226 | order: 3
1227 | order: 1
1228 | }
1229 | }
1230 | layer {
1231 | name: "conv11_mbox_conf_flat"
1232 | type: "Flatten"
1233 | bottom: "conv11_mbox_conf_perm"
1234 | top: "conv11_mbox_conf_flat"
1235 | flatten_param {
1236 | axis: 1
1237 | }
1238 | }
1239 | layer {
1240 | name: "conv11_mbox_priorbox"
1241 | type: "PriorBox"
1242 | bottom: "conv11"
1243 | bottom: "data"
1244 | top: "conv11_mbox_priorbox"
1245 | prior_box_param {
1246 | min_size: 60.0
1247 | aspect_ratio: 2.0
1248 | flip: true
1249 | clip: false
1250 | variance: 0.1
1251 | variance: 0.1
1252 | variance: 0.2
1253 | variance: 0.2
1254 | offset: 0.5
1255 | }
1256 | }
1257 | layer {
1258 | name: "conv13_mbox_loc"
1259 | type: "Convolution"
1260 | bottom: "conv13"
1261 | top: "conv13_mbox_loc"
1262 | param {
1263 | lr_mult: 1.0
1264 | decay_mult: 1.0
1265 | }
1266 | param {
1267 | lr_mult: 2.0
1268 | decay_mult: 0.0
1269 | }
1270 | convolution_param {
1271 | num_output: 24
1272 | kernel_size: 1
1273 | weight_filler {
1274 | type: "msra"
1275 | }
1276 | bias_filler {
1277 | type: "constant"
1278 | value: 0.0
1279 | }
1280 | }
1281 | }
1282 | layer {
1283 | name: "conv13_mbox_loc_perm"
1284 | type: "Permute"
1285 | bottom: "conv13_mbox_loc"
1286 | top: "conv13_mbox_loc_perm"
1287 | permute_param {
1288 | order: 0
1289 | order: 2
1290 | order: 3
1291 | order: 1
1292 | }
1293 | }
1294 | layer {
1295 | name: "conv13_mbox_loc_flat"
1296 | type: "Flatten"
1297 | bottom: "conv13_mbox_loc_perm"
1298 | top: "conv13_mbox_loc_flat"
1299 | flatten_param {
1300 | axis: 1
1301 | }
1302 | }
1303 | layer {
1304 | name: "conv13_mbox_conf"
1305 | type: "Convolution"
1306 | bottom: "conv13"
1307 | top: "conv13_mbox_conf"
1308 | param {
1309 | lr_mult: 1.0
1310 | decay_mult: 1.0
1311 | }
1312 | param {
1313 | lr_mult: 2.0
1314 | decay_mult: 0.0
1315 | }
1316 | convolution_param {
1317 | num_output: 126
1318 | kernel_size: 1
1319 | weight_filler {
1320 | type: "msra"
1321 | }
1322 | bias_filler {
1323 | type: "constant"
1324 | value: 0.0
1325 | }
1326 | }
1327 | }
1328 | layer {
1329 | name: "conv13_mbox_conf_perm"
1330 | type: "Permute"
1331 | bottom: "conv13_mbox_conf"
1332 | top: "conv13_mbox_conf_perm"
1333 | permute_param {
1334 | order: 0
1335 | order: 2
1336 | order: 3
1337 | order: 1
1338 | }
1339 | }
1340 | layer {
1341 | name: "conv13_mbox_conf_flat"
1342 | type: "Flatten"
1343 | bottom: "conv13_mbox_conf_perm"
1344 | top: "conv13_mbox_conf_flat"
1345 | flatten_param {
1346 | axis: 1
1347 | }
1348 | }
1349 | layer {
1350 | name: "conv13_mbox_priorbox"
1351 | type: "PriorBox"
1352 | bottom: "conv13"
1353 | bottom: "data"
1354 | top: "conv13_mbox_priorbox"
1355 | prior_box_param {
1356 | min_size: 105.0
1357 | max_size: 150.0
1358 | aspect_ratio: 2.0
1359 | aspect_ratio: 3.0
1360 | flip: true
1361 | clip: false
1362 | variance: 0.1
1363 | variance: 0.1
1364 | variance: 0.2
1365 | variance: 0.2
1366 | offset: 0.5
1367 | }
1368 | }
1369 | layer {
1370 | name: "conv14_2_mbox_loc"
1371 | type: "Convolution"
1372 | bottom: "conv14_2"
1373 | top: "conv14_2_mbox_loc"
1374 | param {
1375 | lr_mult: 1.0
1376 | decay_mult: 1.0
1377 | }
1378 | param {
1379 | lr_mult: 2.0
1380 | decay_mult: 0.0
1381 | }
1382 | convolution_param {
1383 | num_output: 24
1384 | kernel_size: 1
1385 | weight_filler {
1386 | type: "msra"
1387 | }
1388 | bias_filler {
1389 | type: "constant"
1390 | value: 0.0
1391 | }
1392 | }
1393 | }
1394 | layer {
1395 | name: "conv14_2_mbox_loc_perm"
1396 | type: "Permute"
1397 | bottom: "conv14_2_mbox_loc"
1398 | top: "conv14_2_mbox_loc_perm"
1399 | permute_param {
1400 | order: 0
1401 | order: 2
1402 | order: 3
1403 | order: 1
1404 | }
1405 | }
1406 | layer {
1407 | name: "conv14_2_mbox_loc_flat"
1408 | type: "Flatten"
1409 | bottom: "conv14_2_mbox_loc_perm"
1410 | top: "conv14_2_mbox_loc_flat"
1411 | flatten_param {
1412 | axis: 1
1413 | }
1414 | }
1415 | layer {
1416 | name: "conv14_2_mbox_conf"
1417 | type: "Convolution"
1418 | bottom: "conv14_2"
1419 | top: "conv14_2_mbox_conf"
1420 | param {
1421 | lr_mult: 1.0
1422 | decay_mult: 1.0
1423 | }
1424 | param {
1425 | lr_mult: 2.0
1426 | decay_mult: 0.0
1427 | }
1428 | convolution_param {
1429 | num_output: 126
1430 | kernel_size: 1
1431 | weight_filler {
1432 | type: "msra"
1433 | }
1434 | bias_filler {
1435 | type: "constant"
1436 | value: 0.0
1437 | }
1438 | }
1439 | }
1440 | layer {
1441 | name: "conv14_2_mbox_conf_perm"
1442 | type: "Permute"
1443 | bottom: "conv14_2_mbox_conf"
1444 | top: "conv14_2_mbox_conf_perm"
1445 | permute_param {
1446 | order: 0
1447 | order: 2
1448 | order: 3
1449 | order: 1
1450 | }
1451 | }
1452 | layer {
1453 | name: "conv14_2_mbox_conf_flat"
1454 | type: "Flatten"
1455 | bottom: "conv14_2_mbox_conf_perm"
1456 | top: "conv14_2_mbox_conf_flat"
1457 | flatten_param {
1458 | axis: 1
1459 | }
1460 | }
1461 | layer {
1462 | name: "conv14_2_mbox_priorbox"
1463 | type: "PriorBox"
1464 | bottom: "conv14_2"
1465 | bottom: "data"
1466 | top: "conv14_2_mbox_priorbox"
1467 | prior_box_param {
1468 | min_size: 150.0
1469 | max_size: 195.0
1470 | aspect_ratio: 2.0
1471 | aspect_ratio: 3.0
1472 | flip: true
1473 | clip: false
1474 | variance: 0.1
1475 | variance: 0.1
1476 | variance: 0.2
1477 | variance: 0.2
1478 | offset: 0.5
1479 | }
1480 | }
1481 | layer {
1482 | name: "conv15_2_mbox_loc"
1483 | type: "Convolution"
1484 | bottom: "conv15_2"
1485 | top: "conv15_2_mbox_loc"
1486 | param {
1487 | lr_mult: 1.0
1488 | decay_mult: 1.0
1489 | }
1490 | param {
1491 | lr_mult: 2.0
1492 | decay_mult: 0.0
1493 | }
1494 | convolution_param {
1495 | num_output: 24
1496 | kernel_size: 1
1497 | weight_filler {
1498 | type: "msra"
1499 | }
1500 | bias_filler {
1501 | type: "constant"
1502 | value: 0.0
1503 | }
1504 | }
1505 | }
1506 | layer {
1507 | name: "conv15_2_mbox_loc_perm"
1508 | type: "Permute"
1509 | bottom: "conv15_2_mbox_loc"
1510 | top: "conv15_2_mbox_loc_perm"
1511 | permute_param {
1512 | order: 0
1513 | order: 2
1514 | order: 3
1515 | order: 1
1516 | }
1517 | }
1518 | layer {
1519 | name: "conv15_2_mbox_loc_flat"
1520 | type: "Flatten"
1521 | bottom: "conv15_2_mbox_loc_perm"
1522 | top: "conv15_2_mbox_loc_flat"
1523 | flatten_param {
1524 | axis: 1
1525 | }
1526 | }
1527 | layer {
1528 | name: "conv15_2_mbox_conf"
1529 | type: "Convolution"
1530 | bottom: "conv15_2"
1531 | top: "conv15_2_mbox_conf"
1532 | param {
1533 | lr_mult: 1.0
1534 | decay_mult: 1.0
1535 | }
1536 | param {
1537 | lr_mult: 2.0
1538 | decay_mult: 0.0
1539 | }
1540 | convolution_param {
1541 | num_output: 126
1542 | kernel_size: 1
1543 | weight_filler {
1544 | type: "msra"
1545 | }
1546 | bias_filler {
1547 | type: "constant"
1548 | value: 0.0
1549 | }
1550 | }
1551 | }
1552 | layer {
1553 | name: "conv15_2_mbox_conf_perm"
1554 | type: "Permute"
1555 | bottom: "conv15_2_mbox_conf"
1556 | top: "conv15_2_mbox_conf_perm"
1557 | permute_param {
1558 | order: 0
1559 | order: 2
1560 | order: 3
1561 | order: 1
1562 | }
1563 | }
1564 | layer {
1565 | name: "conv15_2_mbox_conf_flat"
1566 | type: "Flatten"
1567 | bottom: "conv15_2_mbox_conf_perm"
1568 | top: "conv15_2_mbox_conf_flat"
1569 | flatten_param {
1570 | axis: 1
1571 | }
1572 | }
1573 | layer {
1574 | name: "conv15_2_mbox_priorbox"
1575 | type: "PriorBox"
1576 | bottom: "conv15_2"
1577 | bottom: "data"
1578 | top: "conv15_2_mbox_priorbox"
1579 | prior_box_param {
1580 | min_size: 195.0
1581 | max_size: 240.0
1582 | aspect_ratio: 2.0
1583 | aspect_ratio: 3.0
1584 | flip: true
1585 | clip: false
1586 | variance: 0.1
1587 | variance: 0.1
1588 | variance: 0.2
1589 | variance: 0.2
1590 | offset: 0.5
1591 | }
1592 | }
1593 | layer {
1594 | name: "conv16_2_mbox_loc"
1595 | type: "Convolution"
1596 | bottom: "conv16_2"
1597 | top: "conv16_2_mbox_loc"
1598 | param {
1599 | lr_mult: 1.0
1600 | decay_mult: 1.0
1601 | }
1602 | param {
1603 | lr_mult: 2.0
1604 | decay_mult: 0.0
1605 | }
1606 | convolution_param {
1607 | num_output: 24
1608 | kernel_size: 1
1609 | weight_filler {
1610 | type: "msra"
1611 | }
1612 | bias_filler {
1613 | type: "constant"
1614 | value: 0.0
1615 | }
1616 | }
1617 | }
1618 | layer {
1619 | name: "conv16_2_mbox_loc_perm"
1620 | type: "Permute"
1621 | bottom: "conv16_2_mbox_loc"
1622 | top: "conv16_2_mbox_loc_perm"
1623 | permute_param {
1624 | order: 0
1625 | order: 2
1626 | order: 3
1627 | order: 1
1628 | }
1629 | }
1630 | layer {
1631 | name: "conv16_2_mbox_loc_flat"
1632 | type: "Flatten"
1633 | bottom: "conv16_2_mbox_loc_perm"
1634 | top: "conv16_2_mbox_loc_flat"
1635 | flatten_param {
1636 | axis: 1
1637 | }
1638 | }
1639 | layer {
1640 | name: "conv16_2_mbox_conf"
1641 | type: "Convolution"
1642 | bottom: "conv16_2"
1643 | top: "conv16_2_mbox_conf"
1644 | param {
1645 | lr_mult: 1.0
1646 | decay_mult: 1.0
1647 | }
1648 | param {
1649 | lr_mult: 2.0
1650 | decay_mult: 0.0
1651 | }
1652 | convolution_param {
1653 | num_output: 126
1654 | kernel_size: 1
1655 | weight_filler {
1656 | type: "msra"
1657 | }
1658 | bias_filler {
1659 | type: "constant"
1660 | value: 0.0
1661 | }
1662 | }
1663 | }
1664 | layer {
1665 | name: "conv16_2_mbox_conf_perm"
1666 | type: "Permute"
1667 | bottom: "conv16_2_mbox_conf"
1668 | top: "conv16_2_mbox_conf_perm"
1669 | permute_param {
1670 | order: 0
1671 | order: 2
1672 | order: 3
1673 | order: 1
1674 | }
1675 | }
1676 | layer {
1677 | name: "conv16_2_mbox_conf_flat"
1678 | type: "Flatten"
1679 | bottom: "conv16_2_mbox_conf_perm"
1680 | top: "conv16_2_mbox_conf_flat"
1681 | flatten_param {
1682 | axis: 1
1683 | }
1684 | }
1685 | layer {
1686 | name: "conv16_2_mbox_priorbox"
1687 | type: "PriorBox"
1688 | bottom: "conv16_2"
1689 | bottom: "data"
1690 | top: "conv16_2_mbox_priorbox"
1691 | prior_box_param {
1692 | min_size: 240.0
1693 | max_size: 285.0
1694 | aspect_ratio: 2.0
1695 | aspect_ratio: 3.0
1696 | flip: true
1697 | clip: false
1698 | variance: 0.1
1699 | variance: 0.1
1700 | variance: 0.2
1701 | variance: 0.2
1702 | offset: 0.5
1703 | }
1704 | }
1705 | layer {
1706 | name: "conv17_2_mbox_loc"
1707 | type: "Convolution"
1708 | bottom: "conv17_2"
1709 | top: "conv17_2_mbox_loc"
1710 | param {
1711 | lr_mult: 1.0
1712 | decay_mult: 1.0
1713 | }
1714 | param {
1715 | lr_mult: 2.0
1716 | decay_mult: 0.0
1717 | }
1718 | convolution_param {
1719 | num_output: 24
1720 | kernel_size: 1
1721 | weight_filler {
1722 | type: "msra"
1723 | }
1724 | bias_filler {
1725 | type: "constant"
1726 | value: 0.0
1727 | }
1728 | }
1729 | }
1730 | layer {
1731 | name: "conv17_2_mbox_loc_perm"
1732 | type: "Permute"
1733 | bottom: "conv17_2_mbox_loc"
1734 | top: "conv17_2_mbox_loc_perm"
1735 | permute_param {
1736 | order: 0
1737 | order: 2
1738 | order: 3
1739 | order: 1
1740 | }
1741 | }
1742 | layer {
1743 | name: "conv17_2_mbox_loc_flat"
1744 | type: "Flatten"
1745 | bottom: "conv17_2_mbox_loc_perm"
1746 | top: "conv17_2_mbox_loc_flat"
1747 | flatten_param {
1748 | axis: 1
1749 | }
1750 | }
1751 | layer {
1752 | name: "conv17_2_mbox_conf"
1753 | type: "Convolution"
1754 | bottom: "conv17_2"
1755 | top: "conv17_2_mbox_conf"
1756 | param {
1757 | lr_mult: 1.0
1758 | decay_mult: 1.0
1759 | }
1760 | param {
1761 | lr_mult: 2.0
1762 | decay_mult: 0.0
1763 | }
1764 | convolution_param {
1765 | num_output: 126
1766 | kernel_size: 1
1767 | weight_filler {
1768 | type: "msra"
1769 | }
1770 | bias_filler {
1771 | type: "constant"
1772 | value: 0.0
1773 | }
1774 | }
1775 | }
1776 | layer {
1777 | name: "conv17_2_mbox_conf_perm"
1778 | type: "Permute"
1779 | bottom: "conv17_2_mbox_conf"
1780 | top: "conv17_2_mbox_conf_perm"
1781 | permute_param {
1782 | order: 0
1783 | order: 2
1784 | order: 3
1785 | order: 1
1786 | }
1787 | }
1788 | layer {
1789 | name: "conv17_2_mbox_conf_flat"
1790 | type: "Flatten"
1791 | bottom: "conv17_2_mbox_conf_perm"
1792 | top: "conv17_2_mbox_conf_flat"
1793 | flatten_param {
1794 | axis: 1
1795 | }
1796 | }
1797 | layer {
1798 | name: "conv17_2_mbox_priorbox"
1799 | type: "PriorBox"
1800 | bottom: "conv17_2"
1801 | bottom: "data"
1802 | top: "conv17_2_mbox_priorbox"
1803 | prior_box_param {
1804 | min_size: 285.0
1805 | max_size: 300.0
1806 | aspect_ratio: 2.0
1807 | aspect_ratio: 3.0
1808 | flip: true
1809 | clip: false
1810 | variance: 0.1
1811 | variance: 0.1
1812 | variance: 0.2
1813 | variance: 0.2
1814 | offset: 0.5
1815 | }
1816 | }
1817 | layer {
1818 | name: "mbox_loc"
1819 | type: "Concat"
1820 | bottom: "conv11_mbox_loc_flat"
1821 | bottom: "conv13_mbox_loc_flat"
1822 | bottom: "conv14_2_mbox_loc_flat"
1823 | bottom: "conv15_2_mbox_loc_flat"
1824 | bottom: "conv16_2_mbox_loc_flat"
1825 | bottom: "conv17_2_mbox_loc_flat"
1826 | top: "mbox_loc"
1827 | concat_param {
1828 | axis: 1
1829 | }
1830 | }
1831 | layer {
1832 | name: "mbox_conf"
1833 | type: "Concat"
1834 | bottom: "conv11_mbox_conf_flat"
1835 | bottom: "conv13_mbox_conf_flat"
1836 | bottom: "conv14_2_mbox_conf_flat"
1837 | bottom: "conv15_2_mbox_conf_flat"
1838 | bottom: "conv16_2_mbox_conf_flat"
1839 | bottom: "conv17_2_mbox_conf_flat"
1840 | top: "mbox_conf"
1841 | concat_param {
1842 | axis: 1
1843 | }
1844 | }
1845 | layer {
1846 | name: "mbox_priorbox"
1847 | type: "Concat"
1848 | bottom: "conv11_mbox_priorbox"
1849 | bottom: "conv13_mbox_priorbox"
1850 | bottom: "conv14_2_mbox_priorbox"
1851 | bottom: "conv15_2_mbox_priorbox"
1852 | bottom: "conv16_2_mbox_priorbox"
1853 | bottom: "conv17_2_mbox_priorbox"
1854 | top: "mbox_priorbox"
1855 | concat_param {
1856 | axis: 2
1857 | }
1858 | }
1859 | layer {
1860 | name: "mbox_conf_reshape"
1861 | type: "Reshape"
1862 | bottom: "mbox_conf"
1863 | top: "mbox_conf_reshape"
1864 | reshape_param {
1865 | shape {
1866 | dim: 0
1867 | dim: -1
1868 | dim: 21
1869 | }
1870 | }
1871 | }
1872 | layer {
1873 | name: "mbox_conf_softmax"
1874 | type: "Softmax"
1875 | bottom: "mbox_conf_reshape"
1876 | top: "mbox_conf_softmax"
1877 | softmax_param {
1878 | axis: 2
1879 | }
1880 | }
1881 | layer {
1882 | name: "mbox_conf_flatten"
1883 | type: "Flatten"
1884 | bottom: "mbox_conf_softmax"
1885 | top: "mbox_conf_flatten"
1886 | flatten_param {
1887 | axis: 1
1888 | }
1889 | }
1890 | layer {
1891 | name: "detection_out"
1892 | type: "DetectionOutput"
1893 | bottom: "mbox_loc"
1894 | bottom: "mbox_conf_flatten"
1895 | bottom: "mbox_priorbox"
1896 | top: "detection_out"
1897 | include {
1898 | phase: TEST
1899 | }
1900 | detection_output_param {
1901 | num_classes: 21
1902 | share_location: true
1903 | background_label_id: 0
1904 | nms_param {
1905 | nms_threshold: 0.45
1906 | top_k: 100
1907 | }
1908 | code_type: CENTER_SIZE
1909 | keep_top_k: 100
1910 | confidence_threshold: 0.25
1911 | }
1912 | }
1913 |
--------------------------------------------------------------------------------
/msg/DetectedObject.msg:
--------------------------------------------------------------------------------
1 | string class_name
2 | float32 confidence
3 | float32 x_min
4 | float32 x_max
5 | float32 y_min
6 | float32 y_max
7 |
--------------------------------------------------------------------------------
/msg/DetectedObjectArray.msg:
--------------------------------------------------------------------------------
1 | Header header
2 | DetectedObject[] objects
3 |
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | dnn_detect
4 | 0.1.0
5 | DNN based detection
6 |
7 | Jim Vaughan
8 | Rohan Agrawal
9 |
10 | BSD
11 |
12 | Jim Vaughan
13 |
14 | catkin
15 |
16 | roscpp
17 | tf2_geometry_msgs
18 | tf2_ros
19 | tf2
20 | visualization_msgs
21 | image_transport
22 | image_transport_plugins
23 | sensor_msgs
24 | cv_bridge
25 | dynamic_reconfigure
26 |
27 |
28 |
--------------------------------------------------------------------------------
/src/dnn_detect.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017, Ubiquity Robotics
3 | * All rights reserved.
4 | *
5 | * Redistribution and use in source and binary forms, with or without
6 | * modification, are permitted provided that the following conditions are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright notice,
9 | * this list of conditions and the following disclaimer.
10 | * 2. Redistributions in binary form must reproduce the above copyright notice,
11 | * this list of conditions and the following disclaimer in the documentation
12 | * and/or other materials provided with the distribution.
13 | *
14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | * POSSIBILITY OF SUCH DAMAGE.
25 | *
26 | * The views and conclusions contained in the software and documentation are
27 | * those of the authors and should not be interpreted as representing official
28 | * policies, either expressed or implied, of the FreeBSD Project.
29 | *
30 | */
31 |
32 | #include
33 | #include
34 | #include
35 |
36 | #include
37 | #include
38 | #include
39 |
40 | #include "dnn_detect/DetectedObject.h"
41 | #include "dnn_detect/DetectedObjectArray.h"
42 | #include "dnn_detect/Detect.h"
43 |
44 | #include
45 | #include
46 | #include
47 |
48 | #include
49 | #include
50 | #include
51 | #include
52 |
53 | #include
54 | #include
55 | #include
56 |
57 | using namespace std;
58 | using namespace cv;
59 |
60 | std::condition_variable cond;
61 | std::mutex mutx;
62 |
63 | class DnnNode {
64 | private:
65 | ros::Publisher results_pub;
66 |
67 | image_transport::ImageTransport it;
68 | image_transport::Subscriber img_sub;
69 |
70 | // if set, we publish the images that contain objects
71 | bool publish_images;
72 |
73 | int frame_num;
74 | float min_confidence;
75 | int im_size;
76 | int rotate_flag;
77 | float scale_factor;
78 | float mean_val;
79 | std::vector class_names;
80 |
81 | image_transport::Publisher image_pub;
82 |
83 | cv::dnn::Net net;
84 | cv::Mat resized_image;
85 | cv::Mat rotated_image;
86 |
87 | bool single_shot;
88 | volatile bool triggered;
89 | volatile bool processed;
90 |
91 | dnn_detect::DetectedObjectArray results;
92 |
93 | ros::ServiceServer detect_srv;
94 |
95 | bool trigger_callback(dnn_detect::Detect::Request &req,
96 | dnn_detect::Detect::Response &res);
97 |
98 | void image_callback(const sensor_msgs::ImageConstPtr &msg);
99 |
100 | public:
101 | DnnNode(ros::NodeHandle &nh);
102 | };
103 |
104 | bool DnnNode:: trigger_callback(dnn_detect::Detect::Request &req,
105 | dnn_detect::Detect::Response &res)
106 | {
107 | ROS_INFO("Got service request");
108 | triggered = true;
109 |
110 | std::unique_lock lock(mutx);
111 |
112 | while (!processed) {
113 | cond.wait(lock);
114 | }
115 | res.result = results;
116 | processed = false;
117 | return true;
118 | }
119 |
120 |
121 | void DnnNode::image_callback(const sensor_msgs::ImageConstPtr & msg)
122 | {
123 | if (single_shot && !triggered) {
124 | return;
125 | }
126 | triggered = false;
127 |
128 | ROS_INFO("Got image %d", msg->header.seq);
129 | frame_num++;
130 |
131 | cv_bridge::CvImagePtr cv_ptr;
132 |
133 | try {
134 | cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
135 |
136 | int w = cv_ptr->image.cols;
137 | int h = cv_ptr->image.rows;
138 |
139 | if (rotate_flag >= 0) {
140 | cv::rotate(cv_ptr->image, rotated_image, rotate_flag);
141 | rotated_image.copyTo(cv_ptr->image);
142 | }
143 |
144 | cv::resize(cv_ptr->image, resized_image, cvSize(im_size, im_size));
145 | cv::Mat blob = cv::dnn::blobFromImage(resized_image, scale_factor,
146 | cvSize(im_size, im_size), mean_val, false);
147 |
148 | net.setInput(blob, "data");
149 | cv::Mat objs = net.forward("detection_out");
150 |
151 | cv::Mat detectionMat(objs.size[2], objs.size[3], CV_32F,
152 | objs.ptr());
153 |
154 | std::unique_lock lock(mutx);
155 | results.header.frame_id = msg->header.frame_id;
156 | results.objects.clear();
157 |
158 | for(int i = 0; i < detectionMat.rows; i++) {
159 |
160 | float confidence = detectionMat.at(i, 2);
161 | if (confidence > min_confidence) {
162 | int object_class = (int)(detectionMat.at(i, 1));
163 |
164 | int x_min = static_cast(detectionMat.at(i, 3) * w);
165 | int y_min = static_cast(detectionMat.at(i, 4) * h);
166 | int x_max = static_cast(detectionMat.at(i, 5) * w);
167 | int y_max = static_cast(detectionMat.at(i, 6) * h);
168 |
169 | std::string class_name;
170 | if (object_class >= class_names.size()) {
171 | class_name = "unknown";
172 | ROS_ERROR("Object class %d out of range of class names",
173 | object_class);
174 | }
175 | else {
176 | class_name = class_names[object_class];
177 | }
178 | std::string label = str(boost::format{"%1% %2%"} %
179 | class_name % confidence);
180 |
181 | ROS_INFO("%s", label.c_str());
182 | dnn_detect::DetectedObject obj;
183 | obj.class_name = class_name;
184 | obj.confidence = confidence;
185 | obj.x_min = x_min;
186 | obj.x_max = x_max;
187 | obj.y_min = y_min;
188 | obj.y_max = y_max;
189 | results.objects.push_back(obj);
190 |
191 | Rect object(x_min, y_min, x_max-x_min, y_max-y_min);
192 |
193 | rectangle(cv_ptr->image, object, Scalar(0, 255, 0));
194 | int baseline=0;
195 | cv::Size text_size = cv::getTextSize(label,
196 | FONT_HERSHEY_SIMPLEX, 0.75, 2, &baseline);
197 | putText(cv_ptr->image, label, Point(x_min, y_min-text_size.height),
198 | FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0));
199 | }
200 | }
201 |
202 | results_pub.publish(results);
203 |
204 | image_pub.publish(cv_ptr->toImageMsg());
205 |
206 | }
207 | catch(cv_bridge::Exception & e) {
208 | ROS_ERROR("cv_bridge exception: %s", e.what());
209 | }
210 | catch(cv::Exception & e) {
211 | ROS_ERROR("cv exception: %s", e.what());
212 | }
213 | ROS_DEBUG("Notifying condition variable");
214 | processed = true;
215 | cond.notify_all();
216 | }
217 |
218 | DnnNode::DnnNode(ros::NodeHandle & nh) : it(nh)
219 | {
220 | frame_num = 0;
221 |
222 | std::string dir;
223 | std::string proto_net_file;
224 | std::string caffe_model_file;
225 | std::string classes("background,"
226 | "aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,"
227 | "cow,diningtable,dog,horse,motorbike,person,pottedplant,"
228 | "sheep,sofa,train,tvmonitor");
229 |
230 | nh.param("single_shot", single_shot, false);
231 |
232 | nh.param("publish_images", publish_images, false);
233 | nh.param("data_dir", dir, "");
234 | nh.param("protonet_file", proto_net_file,
235 | "MobileNetSSD_deploy.prototxt.txt");
236 | nh.param("caffe_model_file", caffe_model_file,
237 | "MobileNetSSD_deploy.caffemodel");
238 | nh.param("min_confidence", min_confidence, 0.2);
239 | nh.param("im_size", im_size, 300);
240 | nh.param("rotate_flag", rotate_flag, -1);
241 | nh.param("scale_factor", scale_factor, 0.007843f);
242 | nh.param("mean_val", mean_val, 127.5f);
243 | nh.param("class_names", classes, classes);
244 |
245 | boost::split(class_names, classes, boost::is_any_of(","));
246 | ROS_INFO("Read %d class names", (int)class_names.size());
247 |
248 | try {
249 | net = cv::dnn::readNetFromCaffe(dir + "/" + proto_net_file,
250 | dir + "/" + caffe_model_file);
251 | }
252 | catch(cv::Exception & e) {
253 | ROS_ERROR("cv exception: %s", e.what());
254 | exit(1);
255 | }
256 |
257 | triggered = false;
258 |
259 | detect_srv = nh.advertiseService("detect", &DnnNode::trigger_callback, this);
260 |
261 | results_pub =
262 | nh.advertise("/dnn_objects", 20);
263 |
264 | image_pub = it.advertise("/dnn_images", 1);
265 |
266 | img_sub = it.subscribe("/camera", 1,
267 | &DnnNode::image_callback, this);
268 |
269 | ROS_INFO("DNN detection ready");
270 | }
271 |
272 | int main(int argc, char ** argv) {
273 | ros::init(argc, argv, "dnn_detect");
274 | ros::NodeHandle nh("~");
275 |
276 | DnnNode node = DnnNode(nh);
277 | ros::MultiThreadedSpinner spinner(2);
278 | spinner.spin();
279 |
280 | return 0;
281 | }
282 |
--------------------------------------------------------------------------------
/srv/Detect.srv:
--------------------------------------------------------------------------------
1 | ---
2 | DetectedObjectArray result
3 |
--------------------------------------------------------------------------------
/test/dnn_images.test:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/test/dnn_images_test.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | #include "dnn_detect/DetectedObject.h"
9 | #include "dnn_detect/DetectedObjectArray.h"
10 | #include "dnn_detect/Detect.h"
11 |
12 | #include
13 |
14 | #if CV_MAJOR_VERSION < 4
15 | #define IMREAD_COLOR_MODE CV_LOAD_IMAGE_COLOR
16 | #else
17 | #define IMREAD_COLOR_MODE cv::IMREAD_COLOR
18 | #endif
19 |
20 | class DnnImagesTest : public ::testing::Test {
21 | protected:
22 | virtual void SetUp() {
23 | it = new image_transport::ImageTransport(nh);
24 | image_pub = it->advertise("camera/image", 1);
25 |
26 | ros::NodeHandle nh_priv("~");
27 | nh_priv.getParam("image_directory", image_directory);
28 | object_sub = nh.subscribe("/dnn_objects", 1, &DnnImagesTest::object_callback, this);
29 | got_object = false;
30 | got_cat = false;
31 |
32 | }
33 |
34 | // Make a service request to trigger detection
35 | void trigger() {
36 | ros::NodeHandle node;
37 | ros::ServiceClient client =
38 | node.serviceClient("/dnn_detect/detect");
39 | dnn_detect::Detect d;
40 | client.call(d);
41 | }
42 |
43 | virtual void TearDown() { delete it;}
44 |
45 | void publish_image(std::string file) {
46 | boost::thread trig(&DnnImagesTest::trigger, this);
47 |
48 | sleep(1);
49 | cv::Mat image = cv::imread(image_directory+file, IMREAD_COLOR_MODE);
50 | sensor_msgs::ImagePtr msg = cv_bridge::CvImage(std_msgs::Header(), "bgr8",
51 | image).toImageMsg();
52 | image_pub.publish(msg);
53 | }
54 |
55 | ros::NodeHandle nh;
56 |
57 | // Set up Publishing of static images
58 | image_transport::ImageTransport* it;
59 | image_transport::Publisher image_pub;
60 |
61 | bool got_object;
62 | bool got_cat;
63 | ros::Subscriber object_sub;
64 |
65 | std::string image_directory;
66 |
67 | // Set up subscribing
68 | void object_callback(const dnn_detect::DetectedObjectArray& results) {
69 | got_object = true;
70 | for (const auto& obj : results.objects) {
71 | if (obj.class_name == "cat") {
72 | got_cat = true;
73 | }
74 | }
75 | }
76 | };
77 |
78 |
79 | TEST_F(DnnImagesTest, cat) {
80 | ros::Rate loop_rate(5);
81 | while (nh.ok() && !got_object && !got_cat) {
82 | publish_image("cat.jpg");
83 | ros::spinOnce();
84 | loop_rate.sleep();
85 | }
86 |
87 | ASSERT_TRUE(got_cat);
88 | }
89 |
90 | int main(int argc, char** argv)
91 | {
92 |
93 | testing::InitGoogleTest(&argc, argv);
94 | ros::init(argc, argv, "DnnImagesTest");
95 | return RUN_ALL_TESTS();
96 | }
97 |
--------------------------------------------------------------------------------
/test/test_images/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UbiquityRobotics/dnn_detect/c23161c9c1c2a2bd15618b6b3450522ac8aad2cb/test/test_images/cat.jpg
--------------------------------------------------------------------------------