├── darknet_ros ├── yolo_network_config │ ├── weights │ │ ├── .gitignore │ │ └── how_to_download_weights.txt │ └── cfg │ │ ├── yolov2-tiny-voc.cfg │ │ ├── yolov2-tiny.cfg │ │ ├── yolov2-voc.cfg │ │ ├── yolov2.cfg │ │ ├── yolov3-voc.cfg │ │ ├── yolov3.cfg │ │ └── yolov4.cfg ├── doc │ ├── test_detection.png │ ├── test_detection_anymal.png │ └── quadruped_anymal_and_person.JPG ├── test │ ├── test_main.cpp │ ├── object_detection.test │ ├── yolov2.yaml │ └── ObjectDetection.cpp ├── launch │ ├── darknet_ros_gdb.launch │ ├── yolo_v3.launch │ ├── yolo_v4.launch │ └── darknet_ros.launch ├── include │ └── darknet_ros │ │ ├── image_interface.h │ │ └── YoloObjectDetector.hpp ├── src │ ├── yolo_object_detector_node.cpp │ ├── image_interface.c │ └── YoloObjectDetector.cpp ├── config │ ├── yolov2-voc.yaml │ ├── yolov3-voc.yaml │ ├── yolov2-tiny-voc.yaml │ ├── ros.yaml │ ├── yolov2.yaml │ ├── yolov3.yaml │ ├── yolov4.yaml │ └── yolov2-tiny.yaml ├── package.xml ├── CHANGELOG.rst └── CMakeLists.txt ├── darknet_ros_msgs ├── msg │ ├── ObjectCount.msg │ ├── BoundingBoxes.msg │ └── BoundingBox.msg ├── action │ └── CheckForObjects.action ├── CMakeLists.txt ├── CHANGELOG.rst └── package.xml ├── jenkins-pipeline ├── .gitmodules ├── LICENSE └── README.md /darknet_ros/yolo_network_config/weights/.gitignore: -------------------------------------------------------------------------------- 1 | *.weights 2 | -------------------------------------------------------------------------------- /darknet_ros_msgs/msg/ObjectCount.msg: -------------------------------------------------------------------------------- 1 | Header header 2 | int8 count 3 | -------------------------------------------------------------------------------- /jenkins-pipeline: -------------------------------------------------------------------------------- 1 | library 'continuous_integration_pipeline' 2 | ciPipeline("") 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "darknet"] 2 | path = darknet 3 | url = https://github.com/Tossy0423/darknet.git 4 | -------------------------------------------------------------------------------- /darknet_ros_msgs/msg/BoundingBoxes.msg: -------------------------------------------------------------------------------- 1 | Header header 2 | Header image_header 3 | BoundingBox[] bounding_boxes 4 | -------------------------------------------------------------------------------- /darknet_ros/doc/test_detection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tossy0423/darknet_ros/HEAD/darknet_ros/doc/test_detection.png -------------------------------------------------------------------------------- /darknet_ros/doc/test_detection_anymal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tossy0423/darknet_ros/HEAD/darknet_ros/doc/test_detection_anymal.png -------------------------------------------------------------------------------- /darknet_ros/doc/quadruped_anymal_and_person.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tossy0423/darknet_ros/HEAD/darknet_ros/doc/quadruped_anymal_and_person.JPG -------------------------------------------------------------------------------- /darknet_ros_msgs/msg/BoundingBox.msg: -------------------------------------------------------------------------------- 1 | float64 probability 2 | int64 xmin 3 | int64 ymin 4 | int64 xmax 5 | int64 ymax 6 | int16 id 7 | string Class 8 | -------------------------------------------------------------------------------- /darknet_ros/test/test_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // ROS 4 | #include 5 | 6 | int main(int argc, char** argv) { 7 | ros::init(argc, argv, "darknet_ros_test"); 8 | testing::InitGoogleTest(&argc, argv); 9 | return RUN_ALL_TESTS(); 10 | } 11 | -------------------------------------------------------------------------------- /darknet_ros_msgs/action/CheckForObjects.action: -------------------------------------------------------------------------------- 1 | # Check if objects in image 2 | 3 | # Goal definition 4 | int16 id 5 | sensor_msgs/Image image 6 | 7 | --- 8 | # Result definition 9 | int16 id 10 | darknet_ros_msgs/BoundingBoxes bounding_boxes 11 | 12 | --- 13 | # Feedback definition 14 | -------------------------------------------------------------------------------- /darknet_ros/launch/darknet_ros_gdb.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /darknet_ros/include/darknet_ros/image_interface.h: -------------------------------------------------------------------------------- 1 | /* 2 | * image_interface.h 3 | * 4 | * Created on: Dec 19, 2016 5 | * Author: Marko Bjelonic 6 | * Institute: ETH Zurich, Robotic Systems Lab 7 | */ 8 | 9 | #ifndef IMAGE_INTERFACE_H 10 | #define IMAGE_INTERFACE_H 11 | 12 | #include "image.h" 13 | #include "opencv2/core/types_c.h" 14 | 15 | static float get_pixel(image m, int x, int y, int c); 16 | image** load_alphabet_with_file(char* datafile); 17 | void generate_image(image p, IplImage* disp); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /darknet_ros/src/yolo_object_detector_node.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * yolo_obstacle_detector_node.cpp 3 | * 4 | * Created on: Dec 19, 2016 5 | * Author: Marko Bjelonic 6 | * Institute: ETH Zurich, Robotic Systems Lab 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | int main(int argc, char** argv) { 13 | ros::init(argc, argv, "darknet_ros"); 14 | ros::NodeHandle nodeHandle("~"); 15 | darknet_ros::YoloObjectDetector yoloObjectDetector(nodeHandle); 16 | 17 | ros::spin(); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /darknet_ros/launch/yolo_v3.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /darknet_ros/launch/yolo_v4.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /darknet_ros/yolo_network_config/weights/how_to_download_weights.txt: -------------------------------------------------------------------------------- 1 | cd catkin_workspace/src/darknet_ros/darknet_ros/yolo_network_config/weights/ 2 | 3 | COCO data set (Yolo v2): 4 | wget http://pjreddie.com/media/files/yolov2.weights 5 | wget http://pjreddie.com/media/files/yolov2-tiny.weights 6 | 7 | VOC data set (Yolo v2): 8 | wget http://pjreddie.com/media/files/yolov2-voc.weights 9 | wget http://pjreddie.com/media/files/yolov2-tiny-voc.weights 10 | 11 | Yolo v3: 12 | wget http://pjreddie.com/media/files/yolov3.weights 13 | wget http://pjreddie.com/media/files/yolov3-voc.weights 14 | -------------------------------------------------------------------------------- /darknet_ros/config/yolov2-voc.yaml: -------------------------------------------------------------------------------- 1 | yolo_model: 2 | 3 | config_file: 4 | name: yolov2-voc.cfg 5 | weight_file: 6 | name: yolov2-voc.weights 7 | threshold: 8 | value: 0.3 9 | detection_classes: 10 | names: 11 | - aeroplane 12 | - bicycle 13 | - bird 14 | - boat 15 | - bottle 16 | - bus 17 | - car 18 | - cat 19 | - chair 20 | - cow 21 | - diningtable 22 | - dog 23 | - horse 24 | - motorbike 25 | - person 26 | - pottedplant 27 | - sheep 28 | - sofa 29 | - train 30 | - tvmonitor 31 | -------------------------------------------------------------------------------- /darknet_ros/config/yolov3-voc.yaml: -------------------------------------------------------------------------------- 1 | yolo_model: 2 | 3 | config_file: 4 | name: yolov3-voc.cfg 5 | weight_file: 6 | name: yolov3-voc.weights 7 | threshold: 8 | value: 0.3 9 | detection_classes: 10 | names: 11 | - aeroplane 12 | - bicycle 13 | - bird 14 | - boat 15 | - bottle 16 | - bus 17 | - car 18 | - cat 19 | - chair 20 | - cow 21 | - diningtable 22 | - dog 23 | - horse 24 | - motorbike 25 | - person 26 | - pottedplant 27 | - sheep 28 | - sofa 29 | - train 30 | - tvmonitor 31 | -------------------------------------------------------------------------------- /darknet_ros/config/yolov2-tiny-voc.yaml: -------------------------------------------------------------------------------- 1 | yolo_model: 2 | 3 | config_file: 4 | name: yolov2-tiny-voc.cfg 5 | weight_file: 6 | name: yolov2-tiny-voc.weights 7 | threshold: 8 | value: 0.3 9 | detection_classes: 10 | names: 11 | - aeroplane 12 | - bicycle 13 | - bird 14 | - boat 15 | - bottle 16 | - bus 17 | - car 18 | - cat 19 | - chair 20 | - cow 21 | - diningtable 22 | - dog 23 | - horse 24 | - motorbike 25 | - person 26 | - pottedplant 27 | - sheep 28 | - sofa 29 | - train 30 | - tvmonitor 31 | -------------------------------------------------------------------------------- /darknet_ros/config/ros.yaml: -------------------------------------------------------------------------------- 1 | subscribers: 2 | 3 | camera_reading: 4 | topic: /camera/rgb/image_raw 5 | queue_size: 1 6 | 7 | actions: 8 | 9 | camera_reading: 10 | name: /darknet_ros/check_for_objects 11 | 12 | publishers: 13 | 14 | object_detector: 15 | topic: /darknet_ros/found_object 16 | queue_size: 1 17 | latch: false 18 | 19 | bounding_boxes: 20 | topic: /darknet_ros/bounding_boxes 21 | queue_size: 1 22 | latch: false 23 | 24 | detection_image: 25 | topic: /darknet_ros/detection_image 26 | queue_size: 1 27 | latch: true 28 | 29 | image_view: 30 | 31 | enable_opencv: true 32 | wait_key_delay: 1 33 | enable_console_output: true 34 | -------------------------------------------------------------------------------- /darknet_ros_msgs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.12) 2 | 3 | project(darknet_ros_msgs) 4 | 5 | set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}") 6 | 7 | find_package(catkin REQUIRED 8 | COMPONENTS 9 | actionlib_msgs 10 | geometry_msgs 11 | sensor_msgs 12 | std_msgs 13 | message_generation 14 | ) 15 | 16 | add_message_files( 17 | FILES 18 | BoundingBox.msg 19 | BoundingBoxes.msg 20 | ObjectCount.msg 21 | ) 22 | 23 | add_action_files( 24 | FILES 25 | CheckForObjects.action 26 | ) 27 | 28 | generate_messages( 29 | DEPENDENCIES 30 | actionlib_msgs 31 | geometry_msgs 32 | sensor_msgs 33 | std_msgs 34 | ) 35 | 36 | catkin_package( 37 | CATKIN_DEPENDS 38 | actionlib_msgs 39 | geometry_msgs 40 | sensor_msgs 41 | message_runtime 42 | std_msgs 43 | ) 44 | -------------------------------------------------------------------------------- /darknet_ros_msgs/CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2 | Changelog for package darknet_ros_msgs 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | 5 | 1.1.4 (2019-03-03) 6 | ------------------ 7 | 8 | 1.1.3 (2018-04-26) 9 | ------------------ 10 | * Fixed formatting part 2. 11 | * Merge branch 'firephinx-master' 12 | * Merge branch 'master' of https://github.com/firephinx/darknet_ros into firephinx-master 13 | * Added rgb_image_header to BoundingBoxes msg. 14 | * Merge pull request `#57 `_ from leggedrobotics/devel/threads 15 | Devel/threads 16 | * Adapted package description. 17 | * Merge branch 'master' into devel/threads 18 | * Update package.xml 19 | * Contributors: Kevin Zhang, Marko Bjelonic 20 | 21 | 1.1.2 (2018-01-06) 22 | ------------------ 23 | * First release of darknet_ros_msgs. 24 | -------------------------------------------------------------------------------- /darknet_ros/test/object_detection.test: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /darknet_ros_msgs/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | darknet_ros_msgs 4 | 1.1.4 5 | Darknet is an open source neural network framework that runs on CPU and GPU. You only look once (YOLO) is a state-of-the-art, real-time object detection system. 6 | Marko Bjelonic 7 | BSD 8 | https://github.com/leggedrobotics/darknet_ros 9 | Marko Bjelonic 10 | 11 | catkin 12 | 13 | actionlib_msgs 14 | geometry_msgs 15 | sensor_msgs 16 | message_generation 17 | std_msgs 18 | 19 | actionlib_msgs 20 | geometry_msgs 21 | sensor_msgs 22 | message_runtime 23 | std_msgs 24 | 25 | -------------------------------------------------------------------------------- /darknet_ros/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | darknet_ros 4 | 1.1.4 5 | Darknet is an open source neural network framework that runs on CPU and GPU. You only look once (YOLO) is a state-of-the-art, real-time object detection system. 6 | Marko Bjelonic 7 | BSD 8 | https://github.com/leggedrobotics/darknet_ros 9 | Marko Bjelonic 10 | 11 | catkin 12 | boost 13 | libopencv-dev 14 | libx11 15 | libxt-dev 16 | libxext 17 | 18 | roscpp 19 | rospy 20 | std_msgs 21 | image_transport 22 | cv_bridge 23 | sensor_msgs 24 | message_generation 25 | darknet_ros_msgs 26 | actionlib 27 | 28 | 29 | rostest 30 | wget 31 | 32 | -------------------------------------------------------------------------------- /darknet_ros/src/image_interface.c: -------------------------------------------------------------------------------- 1 | /* 2 | * image_interface.c 3 | * 4 | * Created on: Dec 19, 2016 5 | * Author: Marko Bjelonic 6 | * Institute: ETH Zurich, Robotic Systems Lab 7 | */ 8 | 9 | #include "darknet_ros/image_interface.h" 10 | 11 | static float get_pixel(image m, int x, int y, int c) { 12 | assert(x < m.w && y < m.h && c < m.c); 13 | return m.data[c * m.h * m.w + y * m.w + x]; 14 | } 15 | 16 | image** load_alphabet_with_file(char* datafile) { 17 | int i, j; 18 | const int nsize = 8; 19 | image** alphabets = calloc(nsize, sizeof(image)); 20 | char* labels = "/labels/%d_%d.png"; 21 | char* files = (char*)malloc(1 + strlen(datafile) + strlen(labels)); 22 | strcpy(files, datafile); 23 | strcat(files, labels); 24 | for (j = 0; j < nsize; ++j) { 25 | alphabets[j] = calloc(128, sizeof(image)); 26 | for (i = 32; i < 127; ++i) { 27 | char buff[256]; 28 | sprintf(buff, files, i, j); 29 | alphabets[j][i] = load_image_color(buff, 0, 0); 30 | } 31 | } 32 | return alphabets; 33 | } 34 | 35 | #ifdef OPENCV 36 | void generate_image(image p, IplImage* disp) { 37 | int x, y, k; 38 | if (p.c == 3) rgbgr_image(p); 39 | // normalize_image(copy); 40 | 41 | int step = disp->widthStep; 42 | for (y = 0; y < p.h; ++y) { 43 | for (x = 0; x < p.w; ++x) { 44 | for (k = 0; k < p.c; ++k) { 45 | disp->imageData[y * step + x * p.c + k] = (unsigned char)(get_pixel(p, x, y, k) * 255); 46 | } 47 | } 48 | } 49 | } 50 | #endif 51 | -------------------------------------------------------------------------------- /darknet_ros/launch/darknet_ros.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Marko Bjelonic, Robotic Systems Lab, ETH Zurich 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the copyright holder nor the names of its 12 | contributors may be used to endorse or promote products derived 13 | from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /darknet_ros/config/yolov2.yaml: -------------------------------------------------------------------------------- 1 | yolo_model: 2 | 3 | config_file: 4 | name: yolov2.cfg 5 | weight_file: 6 | name: yolov2.weights 7 | threshold: 8 | value: 0.3 9 | detection_classes: 10 | names: 11 | - person 12 | - bicycle 13 | - car 14 | - motorbike 15 | - aeroplane 16 | - bus 17 | - train 18 | - truck 19 | - boat 20 | - traffic light 21 | - fire hydrant 22 | - stop sign 23 | - parking meter 24 | - bench 25 | - bird 26 | - cat 27 | - dog 28 | - horse 29 | - sheep 30 | - cow 31 | - elephant 32 | - bear 33 | - zebra 34 | - giraffe 35 | - backpack 36 | - umbrella 37 | - handbag 38 | - tie 39 | - suitcase 40 | - frisbee 41 | - skis 42 | - snowboard 43 | - sports ball 44 | - kite 45 | - baseball bat 46 | - baseball glove 47 | - skateboard 48 | - surfboard 49 | - tennis racket 50 | - bottle 51 | - wine glass 52 | - cup 53 | - fork 54 | - knife 55 | - spoon 56 | - bowl 57 | - banana 58 | - apple 59 | - sandwich 60 | - orange 61 | - broccoli 62 | - carrot 63 | - hot dog 64 | - pizza 65 | - donut 66 | - cake 67 | - chair 68 | - sofa 69 | - pottedplant 70 | - bed 71 | - diningtable 72 | - toilet 73 | - tvmonitor 74 | - laptop 75 | - mouse 76 | - remote 77 | - keyboard 78 | - cell phone 79 | - microwave 80 | - oven 81 | - toaster 82 | - sink 83 | - refrigerator 84 | - book 85 | - clock 86 | - vase 87 | - scissors 88 | - teddy bear 89 | - hair drier 90 | - toothbrush 91 | -------------------------------------------------------------------------------- /darknet_ros/config/yolov3.yaml: -------------------------------------------------------------------------------- 1 | yolo_model: 2 | 3 | config_file: 4 | name: yolov3.cfg 5 | weight_file: 6 | name: yolov3.weights 7 | threshold: 8 | value: 0.3 9 | detection_classes: 10 | names: 11 | - person 12 | - bicycle 13 | - car 14 | - motorbike 15 | - aeroplane 16 | - bus 17 | - train 18 | - truck 19 | - boat 20 | - traffic light 21 | - fire hydrant 22 | - stop sign 23 | - parking meter 24 | - bench 25 | - bird 26 | - cat 27 | - dog 28 | - horse 29 | - sheep 30 | - cow 31 | - elephant 32 | - bear 33 | - zebra 34 | - giraffe 35 | - backpack 36 | - umbrella 37 | - handbag 38 | - tie 39 | - suitcase 40 | - frisbee 41 | - skis 42 | - snowboard 43 | - sports ball 44 | - kite 45 | - baseball bat 46 | - baseball glove 47 | - skateboard 48 | - surfboard 49 | - tennis racket 50 | - bottle 51 | - wine glass 52 | - cup 53 | - fork 54 | - knife 55 | - spoon 56 | - bowl 57 | - banana 58 | - apple 59 | - sandwich 60 | - orange 61 | - broccoli 62 | - carrot 63 | - hot dog 64 | - pizza 65 | - donut 66 | - cake 67 | - chair 68 | - sofa 69 | - pottedplant 70 | - bed 71 | - diningtable 72 | - toilet 73 | - tvmonitor 74 | - laptop 75 | - mouse 76 | - remote 77 | - keyboard 78 | - cell phone 79 | - microwave 80 | - oven 81 | - toaster 82 | - sink 83 | - refrigerator 84 | - book 85 | - clock 86 | - vase 87 | - scissors 88 | - teddy bear 89 | - hair drier 90 | - toothbrush 91 | -------------------------------------------------------------------------------- /darknet_ros/config/yolov4.yaml: -------------------------------------------------------------------------------- 1 | yolo_model: 2 | 3 | config_file: 4 | name: yolov4.cfg 5 | weight_file: 6 | name: yolov4.weights 7 | threshold: 8 | value: 0.3 9 | detection_classes: 10 | names: 11 | - person 12 | - bicycle 13 | - car 14 | - motorbike 15 | - aeroplane 16 | - bus 17 | - train 18 | - truck 19 | - boat 20 | - traffic light 21 | - fire hydrant 22 | - stop sign 23 | - parking meter 24 | - bench 25 | - bird 26 | - cat 27 | - dog 28 | - horse 29 | - sheep 30 | - cow 31 | - elephant 32 | - bear 33 | - zebra 34 | - giraffe 35 | - backpack 36 | - umbrella 37 | - handbag 38 | - tie 39 | - suitcase 40 | - frisbee 41 | - skis 42 | - snowboard 43 | - sports ball 44 | - kite 45 | - baseball bat 46 | - baseball glove 47 | - skateboard 48 | - surfboard 49 | - tennis racket 50 | - bottle 51 | - wine glass 52 | - cup 53 | - fork 54 | - knife 55 | - spoon 56 | - bowl 57 | - banana 58 | - apple 59 | - sandwich 60 | - orange 61 | - broccoli 62 | - carrot 63 | - hot dog 64 | - pizza 65 | - donut 66 | - cake 67 | - chair 68 | - sofa 69 | - pottedplant 70 | - bed 71 | - diningtable 72 | - toilet 73 | - tvmonitor 74 | - laptop 75 | - mouse 76 | - remote 77 | - keyboard 78 | - cell phone 79 | - microwave 80 | - oven 81 | - toaster 82 | - sink 83 | - refrigerator 84 | - book 85 | - clock 86 | - vase 87 | - scissors 88 | - teddy bear 89 | - hair drier 90 | - toothbrush 91 | -------------------------------------------------------------------------------- /darknet_ros/config/yolov2-tiny.yaml: -------------------------------------------------------------------------------- 1 | yolo_model: 2 | 3 | config_file: 4 | name: yolov2-tiny.cfg 5 | weight_file: 6 | name: yolov2-tiny.weights 7 | threshold: 8 | value: 0.3 9 | detection_classes: 10 | names: 11 | - person 12 | - bicycle 13 | - car 14 | - motorbike 15 | - aeroplane 16 | - bus 17 | - train 18 | - truck 19 | - boat 20 | - traffic light 21 | - fire hydrant 22 | - stop sign 23 | - parking meter 24 | - bench 25 | - bird 26 | - cat 27 | - dog 28 | - horse 29 | - sheep 30 | - cow 31 | - elephant 32 | - bear 33 | - zebra 34 | - giraffe 35 | - backpack 36 | - umbrella 37 | - handbag 38 | - tie 39 | - suitcase 40 | - frisbee 41 | - skis 42 | - snowboard 43 | - sports ball 44 | - kite 45 | - baseball bat 46 | - baseball glove 47 | - skateboard 48 | - surfboard 49 | - tennis racket 50 | - bottle 51 | - wine glass 52 | - cup 53 | - fork 54 | - knife 55 | - spoon 56 | - bowl 57 | - banana 58 | - apple 59 | - sandwich 60 | - orange 61 | - broccoli 62 | - carrot 63 | - hot dog 64 | - pizza 65 | - donut 66 | - cake 67 | - chair 68 | - sofa 69 | - pottedplant 70 | - bed 71 | - diningtable 72 | - toilet 73 | - tvmonitor 74 | - laptop 75 | - mouse 76 | - remote 77 | - keyboard 78 | - cell phone 79 | - microwave 80 | - oven 81 | - toaster 82 | - sink 83 | - refrigerator 84 | - book 85 | - clock 86 | - vase 87 | - scissors 88 | - teddy bear 89 | - hair drier 90 | - toothbrush 91 | -------------------------------------------------------------------------------- /darknet_ros/test/yolov2.yaml: -------------------------------------------------------------------------------- 1 | image_view: 2 | 3 | enable_opencv: true 4 | wait_key_delay: 600 5 | 6 | yolo_model: 7 | 8 | config_file: 9 | name: yolov2.cfg 10 | weight_file: 11 | name: yolov2.weights 12 | threshold: 13 | value: 0.5 14 | detection_classes: 15 | names: 16 | - person 17 | - bicycle 18 | - car 19 | - motorbike 20 | - aeroplane 21 | - bus 22 | - train 23 | - truck 24 | - boat 25 | - traffic light 26 | - fire hydrant 27 | - stop sign 28 | - parking meter 29 | - bench 30 | - bird 31 | - cat 32 | - dog 33 | - horse 34 | - sheep 35 | - cow 36 | - elephant 37 | - bear 38 | - zebra 39 | - giraffe 40 | - backpack 41 | - umbrella 42 | - handbag 43 | - tie 44 | - suitcase 45 | - frisbee 46 | - skis 47 | - snowboard 48 | - sports ball 49 | - kite 50 | - baseball bat 51 | - baseball glove 52 | - skateboard 53 | - surfboard 54 | - tennis racket 55 | - bottle 56 | - wine glass 57 | - cup 58 | - fork 59 | - knife 60 | - spoon 61 | - bowl 62 | - banana 63 | - apple 64 | - sandwich 65 | - orange 66 | - broccoli 67 | - carrot 68 | - hot dog 69 | - pizza 70 | - donut 71 | - cake 72 | - chair 73 | - sofa 74 | - pottedplant 75 | - bed 76 | - diningtable 77 | - toilet 78 | - tvmonitor 79 | - laptop 80 | - mouse 81 | - remote 82 | - keyboard 83 | - cell phone 84 | - microwave 85 | - oven 86 | - toaster 87 | - sink 88 | - refrigerator 89 | - book 90 | - clock 91 | - vase 92 | - scissors 93 | - teddy bear 94 | - hair drier 95 | - toothbrush 96 | -------------------------------------------------------------------------------- /darknet_ros/yolo_network_config/cfg/yolov2-tiny-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | max_batches = 40200 20 | policy=steps 21 | steps=-1,100,20000,30000 22 | scales=.1,10,.1,.1 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=16 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [maxpool] 33 | size=2 34 | stride=2 35 | 36 | [convolutional] 37 | batch_normalize=1 38 | filters=32 39 | size=3 40 | stride=1 41 | pad=1 42 | activation=leaky 43 | 44 | [maxpool] 45 | size=2 46 | stride=2 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=64 51 | size=3 52 | stride=1 53 | pad=1 54 | activation=leaky 55 | 56 | [maxpool] 57 | size=2 58 | stride=2 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | [convolutional] 73 | batch_normalize=1 74 | filters=256 75 | size=3 76 | stride=1 77 | pad=1 78 | activation=leaky 79 | 80 | [maxpool] 81 | size=2 82 | stride=2 83 | 84 | [convolutional] 85 | batch_normalize=1 86 | filters=512 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=leaky 91 | 92 | [maxpool] 93 | size=2 94 | stride=1 95 | 96 | [convolutional] 97 | batch_normalize=1 98 | filters=1024 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | ########### 105 | 106 | [convolutional] 107 | batch_normalize=1 108 | size=3 109 | stride=1 110 | pad=1 111 | filters=1024 112 | activation=leaky 113 | 114 | [convolutional] 115 | size=1 116 | stride=1 117 | pad=1 118 | filters=125 119 | activation=linear 120 | 121 | [region] 122 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 123 | bias_match=1 124 | classes=20 125 | coords=4 126 | num=5 127 | softmax=1 128 | jitter=.2 129 | rescore=1 130 | 131 | object_scale=5 132 | noobject_scale=1 133 | class_scale=1 134 | coord_scale=1 135 | 136 | absolute=1 137 | thresh = .6 138 | random=1 139 | -------------------------------------------------------------------------------- /darknet_ros/yolo_network_config/cfg/yolov2-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | size=3 110 | stride=1 111 | pad=1 112 | filters=512 113 | activation=leaky 114 | 115 | [convolutional] 116 | size=1 117 | stride=1 118 | pad=1 119 | filters=425 120 | activation=linear 121 | 122 | [region] 123 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 124 | bias_match=1 125 | classes=80 126 | coords=4 127 | num=5 128 | softmax=1 129 | jitter=.2 130 | rescore=0 131 | 132 | object_scale=5 133 | noobject_scale=1 134 | class_scale=1 135 | coord_scale=1 136 | 137 | absolute=1 138 | thresh = .6 139 | random=1 140 | -------------------------------------------------------------------------------- /darknet_ros/CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2 | Changelog for package darknet_ros 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | 5 | 1.1.4 (2019-03-03) 6 | ------------------ 7 | * Merge pull request `#141 `_ from lorenwel/feature/launch_file_arg 8 | Added arg for launch file parameter files 9 | * Fixed synatx error 10 | * Removed unnecessary args 11 | * Adapted yolo_v3.launch to new launch file 12 | * Added launch file arguments for parameter files 13 | * Merge branch 'Texas-Aerial-Robotics-headerFixForUpsteam' 14 | * Merge branch 'headerFixForUpsteam' of https://github.com/Texas-Aerial-Robotics/darknet_ros into Texas-Aerial-Robotics-headerFixForUpsteam 15 | * Remove unused variable 16 | * Merge branch 'headerFixForUpsteam' of https://github.com/Texas-Aerial-Robotics/darknet_ros into Texas-Aerial-Robotics-headerFixForUpsteam 17 | * Multithreading mismatched image header fix 18 | * Forgot to add image. 19 | * Cropped test image. 20 | * Changed image for test. 21 | * Changed resame image. 22 | * Added new images for test. 23 | * Removed twice loading of weightfile. 24 | * Contributors: Lorenz Wellhausen, Marko Bjelonic, Umer Salman, lorenwel 25 | 26 | 1.1.3 (2018-04-26) 27 | ------------------ 28 | * Fixed iteration through detection boxes. 29 | * Merge pull request `#80 `_ from leggedrobotics/feature/yolo3 30 | Feature/yolo3 31 | * Fixed publishers. 32 | * Applied first changes for yolo v3. 33 | * Updated darknet and added launch files for yolov3. 34 | * Merge pull request `#73 `_ from leggedrobotics/fix/weights 35 | Fix/weights 36 | * Fixed weights. 37 | * Fix test. 38 | * Fixed formatting part 2. 39 | * Fixed naming. 40 | * Merge branch 'firephinx-master' 41 | * Merge branch 'master' of https://github.com/firephinx/darknet_ros into firephinx-master 42 | * Merge pull request `#62 `_ from warp1337/master 43 | Reduced window size to reasonable values 44 | * Reduced window size to reasonable values 45 | * Added rgb_image_header to BoundingBoxes msg. 46 | * Updated to the latest darknet version. 47 | * Merge pull request `#57 `_ from leggedrobotics/devel/threads 48 | Devel/threads 49 | * Rearranged. 50 | * Fixed action with new threads. 51 | * Adapted package description. 52 | * Added publisher. 53 | * Merge branch 'master' into devel/threads 54 | * Rearranged code. 55 | * Update package.xml 56 | * Fixed image_view if x11 is not running. 57 | * COmment runYolo(). 58 | * Update object_detector_demo.cpp 59 | * Changed ros config. 60 | * Node is shutting down properly. 61 | * Rearranged code and added threads. 62 | * Contributors: Kevin Zhang, Marko Bjelonic, fl 63 | 64 | 1.1.2 (2018-01-06) 65 | ------------------ 66 | * First release of darknet_ros. 67 | -------------------------------------------------------------------------------- /darknet_ros/yolo_network_config/cfg/yolov2-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | height=416 9 | width=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 80200 21 | policy=steps 22 | steps=40000,60000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=125 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 243 | bias_match=1 244 | classes=20 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /darknet_ros/yolo_network_config/cfg/yolov2.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /darknet_ros/include/darknet_ros/YoloObjectDetector.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * YoloObjectDetector.h 3 | * 4 | * Created on: Dec 19, 2016 5 | * Author: Marko Bjelonic 6 | * Institute: ETH Zurich, Robotic Systems Lab 7 | */ 8 | 9 | #pragma once 10 | 11 | // c++ 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | // ROS 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | // OpenCv 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | // darknet_ros_msgs 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "../../../darknet/src/blas.h" 41 | 42 | 43 | // Darknet. 44 | #ifdef GPU 45 | #include "cublas_v2.h" 46 | #include "cuda_runtime.h" 47 | #include "curand.h" 48 | #endif 49 | 50 | extern "C" { 51 | #include 52 | #include "box.h" 53 | #include "cost_layer.h" 54 | #include "darknet_ros/image_interface.h" 55 | #include "detection_layer.h" 56 | #include "network.h" 57 | #include "parser.h" 58 | #include "region_layer.h" 59 | #include "utils.h" 60 | } 61 | 62 | extern "C" void ipl_into_image(IplImage* src, image im); 63 | extern "C" image ipl_to_image(IplImage* src); 64 | // extern "C" void show_image_cv(image p, const char* name, IplImage* disp); 65 | 66 | namespace darknet_ros { 67 | 68 | //! Bounding box of the detected object. 69 | typedef struct { 70 | float x, y, w, h, prob; 71 | int num, Class; 72 | } RosBox_; 73 | 74 | typedef struct { 75 | IplImage* image; 76 | std_msgs::Header header; 77 | } IplImageWithHeader_; 78 | 79 | class YoloObjectDetector { 80 | public: 81 | /*! 82 | * Constructor. 83 | */ 84 | explicit YoloObjectDetector(ros::NodeHandle nh); 85 | 86 | /*! 87 | * Destructor. 88 | */ 89 | ~YoloObjectDetector(); 90 | 91 | private: 92 | /*! 93 | * Reads and verifies the ROS parameters. 94 | * @return true if successful. 95 | */ 96 | bool readParameters(); 97 | 98 | /*! 99 | * Initialize the ROS connections. 100 | */ 101 | void init(); 102 | 103 | /*! 104 | * Callback of camera. 105 | * @param[in] msg image pointer. 106 | */ 107 | void cameraCallback(const sensor_msgs::ImageConstPtr& msg); 108 | 109 | /*! 110 | * Check for objects action goal callback. 111 | */ 112 | void checkForObjectsActionGoalCB(); 113 | 114 | /*! 115 | * Check for objects action preempt callback. 116 | */ 117 | void checkForObjectsActionPreemptCB(); 118 | 119 | /*! 120 | * Check if a preempt for the check for objects action has been requested. 121 | * @return false if preempt has been requested or inactive. 122 | */ 123 | bool isCheckingForObjects() const; 124 | 125 | /*! 126 | * Publishes the detection image. 127 | * @return true if successful. 128 | */ 129 | bool publishDetectionImage(const cv::Mat& detectionImage); 130 | 131 | //! Using. 132 | using CheckForObjectsActionServer = actionlib::SimpleActionServer; 133 | using CheckForObjectsActionServerPtr = std::shared_ptr; 134 | 135 | //! ROS node handle. 136 | ros::NodeHandle nodeHandle_; 137 | 138 | //! Class labels. 139 | int numClasses_; 140 | std::vector classLabels_; 141 | 142 | //! Check for objects action server. 143 | CheckForObjectsActionServerPtr checkForObjectsActionServer_; 144 | 145 | //! Advertise and subscribe to image topics. 146 | image_transport::ImageTransport imageTransport_; 147 | 148 | //! ROS subscriber and publisher. 149 | image_transport::Subscriber imageSubscriber_; 150 | ros::Publisher objectPublisher_; 151 | ros::Publisher boundingBoxesPublisher_; 152 | 153 | //! Detected objects. 154 | std::vector > rosBoxes_; 155 | std::vector rosBoxCounter_; 156 | darknet_ros_msgs::BoundingBoxes boundingBoxesResults_; 157 | 158 | //! Camera related parameters. 159 | int frameWidth_; 160 | int frameHeight_; 161 | 162 | //! Publisher of the bounding box image. 163 | ros::Publisher detectionImagePublisher_; 164 | 165 | // Yolo running on thread. 166 | std::thread yoloThread_; 167 | 168 | // Darknet. 169 | char** demoNames_; 170 | image** demoAlphabet_; 171 | int demoClasses_; 172 | 173 | network* net_; 174 | std_msgs::Header headerBuff_[3]; 175 | image buff_[3]; 176 | image buffLetter_[3]; 177 | int buffId_[3]; 178 | int buffIndex_ = 0; 179 | IplImage* ipl_; 180 | float fps_ = 0; 181 | float demoThresh_ = 0; 182 | float demoHier_ = .5; 183 | int running_ = 0; 184 | 185 | int demoDelay_ = 0; 186 | int demoFrame_ = 3; 187 | float** predictions_; 188 | int demoIndex_ = 0; 189 | int demoDone_ = 0; 190 | float* lastAvg2_; 191 | float* lastAvg_; 192 | float* avg_; 193 | int demoTotal_ = 0; 194 | double demoTime_; 195 | 196 | RosBox_* roiBoxes_; 197 | bool viewImage_; 198 | bool enableConsoleOutput_; 199 | int waitKeyDelay_; 200 | int fullScreen_; 201 | char* demoPrefix_; 202 | 203 | std_msgs::Header imageHeader_; 204 | cv::Mat camImageCopy_; 205 | boost::shared_mutex mutexImageCallback_; 206 | 207 | bool imageStatus_ = false; 208 | boost::shared_mutex mutexImageStatus_; 209 | 210 | bool isNodeRunning_ = true; 211 | boost::shared_mutex mutexNodeStatus_; 212 | 213 | int actionId_; 214 | boost::shared_mutex mutexActionStatus_; 215 | 216 | // double getWallTime(); 217 | 218 | int sizeNetwork(network* net); 219 | 220 | void rememberNetwork(network* net); 221 | 222 | detection* avgPredictions(network* net, int* nboxes); 223 | 224 | void* detectInThread(); 225 | 226 | void* fetchInThread(); 227 | 228 | void* displayInThread(void* ptr); 229 | 230 | void* displayLoop(void* ptr); 231 | 232 | void* detectLoop(void* ptr); 233 | 234 | void setupNetwork(char* cfgfile, char* weightfile, char* datafile, float thresh, char** names, int classes, int delay, char* prefix, 235 | int avg_frames, float hier, int w, int h, int frames, int fullscreen); 236 | 237 | void yolo(); 238 | 239 | IplImageWithHeader_ getIplImageWithHeader(); 240 | 241 | bool getImageStatus(void); 242 | 243 | bool isNodeRunning(void); 244 | 245 | void* publishInThread(); 246 | }; 247 | 248 | } /* namespace darknet_ros*/ 249 | -------------------------------------------------------------------------------- /darknet_ros/test/ObjectDetection.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ObjectDetection.cpp 3 | * 4 | * Created on: Jan 07, 2017 5 | * Author: Marko Bjelonic 6 | * Institute: ETH Zurich, Robotic Systems Lab 7 | */ 8 | 9 | // Google Test 10 | #include 11 | 12 | // ROS 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | // boost 19 | #include 20 | 21 | // OpenCV2. 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | // Actions. 28 | #include 29 | 30 | using CheckForObjectsActionClient = actionlib::SimpleActionClient; 31 | using CheckForObjectsActionClientPtr = std::shared_ptr; 32 | 33 | // c++ 34 | #include 35 | #include 36 | 37 | #ifdef DARKNET_FILE_PATH 38 | std::string darknetFilePath_ = DARKNET_FILE_PATH; 39 | #else 40 | #error Path of darknet repository is not defined in CMakeLists.txt. 41 | #endif 42 | 43 | darknet_ros_msgs::BoundingBoxes boundingBoxesResults_; 44 | 45 | /*! 46 | * Done-callback for CheckForObjects action client. 47 | * @param[in] state 48 | * @param[in] result 49 | */ 50 | void checkForObjectsResultCB(const actionlib::SimpleClientGoalState& state, const darknet_ros_msgs::CheckForObjectsResultConstPtr& result) { 51 | std::cout << "[ObjectDetectionTest] Received bounding boxes." << std::endl; 52 | 53 | boundingBoxesResults_ = result->bounding_boxes; 54 | } 55 | 56 | bool sendImageToYolo(ros::NodeHandle nh, const std::string& pathToTestImage) { 57 | //! Check for objects action client. 58 | CheckForObjectsActionClientPtr checkForObjectsActionClient; 59 | 60 | // Action clients. 61 | std::string checkForObjectsActionName; 62 | nh.param("/darknet_ros/camera_action", checkForObjectsActionName, std::string("/darknet_ros/check_for_objects")); 63 | checkForObjectsActionClient.reset(new CheckForObjectsActionClient(nh, checkForObjectsActionName, true)); 64 | 65 | // Wait till action server launches. 66 | if (!checkForObjectsActionClient->waitForServer(ros::Duration(20.0))) { 67 | std::cout << "[ObjectDetectionTest] sendImageToYolo(): checkForObjects action server has not been advertised." << std::endl; 68 | return false; 69 | } 70 | 71 | // Get test image 72 | cv_bridge::CvImagePtr cv_ptr(new cv_bridge::CvImage); 73 | cv_ptr->image = cv::imread(pathToTestImage, CV_LOAD_IMAGE_COLOR); 74 | cv_ptr->encoding = sensor_msgs::image_encodings::RGB8; 75 | sensor_msgs::ImagePtr image = cv_ptr->toImageMsg(); 76 | 77 | // Generate goal. 78 | darknet_ros_msgs::CheckForObjectsGoal goal; 79 | goal.image = *image; 80 | 81 | // Send goal. 82 | ros::Time beginYolo = ros::Time::now(); 83 | checkForObjectsActionClient->sendGoal(goal, boost::bind(&checkForObjectsResultCB, _1, _2), 84 | CheckForObjectsActionClient::SimpleActiveCallback(), 85 | CheckForObjectsActionClient::SimpleFeedbackCallback()); 86 | 87 | if (!checkForObjectsActionClient->waitForResult(ros::Duration(100.0))) { 88 | std::cout << "[ObjectDetectionTest] sendImageToYolo(): checkForObjects action server took to long to send back result." << std::endl; 89 | return false; 90 | } 91 | ros::Time endYolo = ros::Time::now(); 92 | std::cout << "[ObjectDetectionTest] Object detection for one image took " << endYolo - beginYolo << " seconds." << std::endl; 93 | return true; 94 | } 95 | 96 | TEST(ObjectDetection, DISABLED_DetectDog) { 97 | srand(static_cast(time(nullptr))); 98 | ros::NodeHandle nodeHandle("~"); 99 | 100 | // Path to test image. 101 | std::string pathToTestImage = darknetFilePath_; 102 | pathToTestImage += "/data/"; 103 | pathToTestImage += "dog"; 104 | pathToTestImage += ".jpg"; 105 | 106 | // Send dog image to yolo. 107 | ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage)); 108 | ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage)); 109 | 110 | // Evaluate if yolo was able to detect the three objects: dog, bicycle and car. 111 | bool detectedDog = false; 112 | double centerErrorDog; 113 | bool detectedBicycle = false; 114 | double centerErrorBicycle; 115 | bool detectedCar = false; 116 | double centerErrorCar; 117 | 118 | for (auto& boundingBox : boundingBoxesResults_.bounding_boxes) { 119 | double xPosCenter = boundingBox.xmin + (boundingBox.xmax - boundingBox.xmin) * 0.5; 120 | double yPosCenter = boundingBox.ymin + (boundingBox.ymax - boundingBox.ymin) * 0.5; 121 | 122 | if (boundingBox.Class == "dog") { 123 | detectedDog = true; 124 | // std::cout << "centerErrorDog " << xPosCenter << ", " << yPosCenter << std::endl; 125 | centerErrorDog = std::sqrt(std::pow(xPosCenter - 222.5, 2) + std::pow(yPosCenter - 361.5, 2)); 126 | } 127 | if (boundingBox.Class == "bicycle") { 128 | detectedBicycle = true; 129 | // std::cout << "centerErrorBicycle " << xPosCenter << ", " << yPosCenter << std::endl; 130 | centerErrorBicycle = std::sqrt(std::pow(xPosCenter - 338.0, 2) + std::pow(yPosCenter - 289.0, 2)); 131 | } 132 | if (boundingBox.Class == "truck") { 133 | detectedCar = true; 134 | // std::cout << "centerErrorCar " << xPosCenter << ", " << yPosCenter << std::endl; 135 | centerErrorCar = std::sqrt(std::pow(xPosCenter - 561.0, 2) + std::pow(yPosCenter - 126.5, 2)); 136 | } 137 | } 138 | 139 | ASSERT_TRUE(detectedDog); 140 | EXPECT_LT(centerErrorDog, 40.0); 141 | ASSERT_TRUE(detectedBicycle); 142 | EXPECT_LT(centerErrorBicycle, 40.0); 143 | ASSERT_TRUE(detectedCar); 144 | EXPECT_LT(centerErrorCar, 40.0); 145 | } 146 | 147 | TEST(ObjectDetection, DetectANYmal) { 148 | srand(static_cast(time(nullptr))); 149 | ros::NodeHandle nodeHandle("~"); 150 | 151 | // Path to test image. 152 | std::string pathToTestImage = ros::package::getPath("darknet_ros"); 153 | pathToTestImage += "/doc/"; 154 | pathToTestImage += "quadruped_anymal_and_person"; 155 | pathToTestImage += ".JPG"; 156 | 157 | // Send dog image to yolo. 158 | ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage)); 159 | ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage)); 160 | 161 | // Evaluate if yolo was able to detect the three objects: dog, bicycle and car. 162 | bool detectedPerson = false; 163 | double centerErrorPersonX; 164 | double centerErrorPersonY; 165 | 166 | for (auto& boundingBox : boundingBoxesResults_.bounding_boxes) { 167 | double xPosCenter = boundingBox.xmin + (boundingBox.xmax - boundingBox.xmin) * 0.5; 168 | double yPosCenter = boundingBox.ymin + (boundingBox.ymax - boundingBox.ymin) * 0.5; 169 | 170 | if (boundingBox.Class == "person") { 171 | detectedPerson = true; 172 | centerErrorPersonX = std::sqrt(std::pow(xPosCenter - 1650.0, 2)); 173 | centerErrorPersonY = std::sqrt(std::pow(xPosCenter - 1675.0, 2)); 174 | } 175 | } 176 | 177 | ASSERT_TRUE(detectedPerson); 178 | EXPECT_LT(centerErrorPersonX, 30); 179 | EXPECT_LT(centerErrorPersonY, 30); 180 | } 181 | 182 | TEST(ObjectDetection, DISABLED_DetectPerson) { 183 | srand(static_cast(time(nullptr))); 184 | ros::NodeHandle nodeHandle("~"); 185 | 186 | // Path to test image. 187 | std::string pathToTestImage = darknetFilePath_; 188 | pathToTestImage += "/data/"; 189 | pathToTestImage += "person"; 190 | pathToTestImage += ".jpg"; 191 | 192 | ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage)); 193 | ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage)); 194 | 195 | // Evaluate if yolo was able to detect the person. 196 | bool detectedPerson = false; 197 | double centerErrorPerson; 198 | 199 | for (auto& boundingBox : boundingBoxesResults_.bounding_boxes) { 200 | double xPosCenter = boundingBox.xmin + (boundingBox.xmax - boundingBox.xmin) * 0.5; 201 | double yPosCenter = boundingBox.ymin + (boundingBox.ymax - boundingBox.ymin) * 0.5; 202 | 203 | if (boundingBox.Class == "person") { 204 | detectedPerson = true; 205 | // std::cout << "centerErrorPerson " << xPosCenter << ", " << yPosCenter << std::endl; 206 | centerErrorPerson = std::sqrt(std::pow(xPosCenter - 228.0, 2) + std::pow(yPosCenter - 238.0, 2)); 207 | } 208 | } 209 | 210 | ASSERT_TRUE(detectedPerson); 211 | EXPECT_LT(centerErrorPerson, 40.0); 212 | } 213 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLO ROS: Real-Time Object Detection for ROS 2 | 3 | ## Overview 4 | 5 | This is a ROS package developed for object detection in camera images. You only look once (YOLO) is a state-of-the-art, real-time object detection system. In the following ROS package you are able to use YOLO (V3) on GPU and CPU. The pre-trained model of the convolutional neural network is able to detect pre-trained classes including the data set from VOC and COCO, or you can also create a network with your own detection objects. For more information about YOLO, Darknet, available training data and training YOLO see the following link: [YOLO: Real-Time Object Detection](http://pjreddie.com/darknet/yolo/). 6 | 7 | The YOLO packages have been tested under ROS Melodic and Ubuntu 18.04. This is research code, expect that it changes often and any fitness for a particular purpose is disclaimed. 8 | 9 | **Author: [Marko Bjelonic](https://www.markobjelonic.com), marko.bjelonic@mavt.ethz.ch** 10 | 11 | **Affiliation: [Robotic Systems Lab](http://www.rsl.ethz.ch/), ETH Zurich** 12 | 13 | ![Darknet Ros example: Detection image](darknet_ros/doc/test_detection.png) 14 | ![Darknet Ros example: Detection image](darknet_ros/doc/test_detection_anymal.png) 15 | 16 | Based on the [Pascal VOC](https://pjreddie.com/projects/pascal-voc-dataset-mirror/) 2012 dataset, YOLO can detect the 20 Pascal object classes: 17 | 18 | - person 19 | - bird, cat, cow, dog, horse, sheep 20 | - aeroplane, bicycle, boat, bus, car, motorbike, train 21 | - bottle, chair, dining table, potted plant, sofa, tv/monitor 22 | 23 | Based on the [COCO](http://cocodataset.org/#home) dataset, YOLO can detect the 80 COCO object classes: 24 | 25 | - person 26 | - bicycle, car, motorbike, aeroplane, bus, train, truck, boat 27 | - traffic light, fire hydrant, stop sign, parking meter, bench 28 | - cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe 29 | - backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket 30 | - bottle, wine glass, cup, fork, knife, spoon, bowl 31 | - banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake 32 | - chair, sofa, pottedplant, bed, diningtable, toilet, tvmonitor, laptop, mouse, remote, keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair drier, toothbrush 33 | 34 | ## Citing 35 | 36 | The YOLO methods used in this software are described in the paper: [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640). 37 | 38 | If you are using YOLO V3 for ROS, please add the following citation to your publication: 39 | 40 | M. Bjelonic 41 | **"YOLO ROS: Real-Time Object Detection for ROS"**, 42 | URL: https://github.com/leggedrobotics/darknet_ros, 2018. 43 | 44 | @misc{bjelonicYolo2018, 45 | author = {Marko Bjelonic}, 46 | title = {{YOLO ROS}: Real-Time Object Detection for {ROS}}, 47 | howpublished = {\url{https://github.com/leggedrobotics/darknet_ros}}, 48 | year = {2016--2018}, 49 | } 50 | 51 | ## Installation 52 | 53 | ### Dependencies 54 | 55 | This software is built on the Robotic Operating System ([ROS]), which needs to be [installed](http://wiki.ros.org) first. Additionally, YOLO for ROS depends on following software: 56 | 57 | - [OpenCV](http://opencv.org/) (computer vision library), 58 | - [boost](http://www.boost.org/) (c++ library), 59 | 60 | ### Building 61 | 62 | [![Build Status](https://ci.leggedrobotics.com/buildStatus/icon?job=github_leggedrobotics/darknet_ros/master)](https://ci.leggedrobotics.com/job/github_leggedrobotics/job/darknet_ros/job/master/) 63 | 64 | In order to install darknet_ros, clone the latest version using SSH (see [how to set up an SSH key](https://confluence.atlassian.com/bitbucket/set-up-an-ssh-key-728138079.html)) from this repository into your catkin workspace and compile the package using ROS. 65 | 66 | cd catkin_workspace/src 67 | git clone --recursive git@github.com:leggedrobotics/darknet_ros.git 68 | cd ../ 69 | 70 | To maximize performance, make sure to build in *Release* mode. You can specify the build type by setting 71 | 72 | catkin_make -DCMAKE_BUILD_TYPE=Release 73 | 74 | or using the [Catkin Command Line Tools](http://catkin-tools.readthedocs.io/en/latest/index.html#) 75 | 76 | catkin build darknet_ros -DCMAKE_BUILD_TYPE=Release 77 | 78 | Darknet on the CPU is fast (approximately 1.5 seconds on an Intel Core i7-6700HQ CPU @ 2.60GHz × 8) but it's like 500 times faster on GPU! You'll have to have an Nvidia GPU and you'll have to install CUDA. The CMakeLists.txt file automatically detects if you have CUDA installed or not. CUDA is a parallel computing platform and application programming interface (API) model created by Nvidia. If you do not have CUDA on your System the build process will switch to the CPU version of YOLO. If you are compiling with CUDA, you might receive the following build error: 79 | 80 | nvcc fatal : Unsupported gpu architecture 'compute_61'. 81 | 82 | This means that you need to check the compute capability (version) of your GPU. You can find a list of supported GPUs in CUDA here: [CUDA - WIKIPEDIA](https://en.wikipedia.org/wiki/CUDA#Supported_GPUs). Simply find the compute capability of your GPU and add it into darknet_ros/CMakeLists.txt. Simply add a similar line like 83 | 84 | -O3 -gencode arch=compute_62,code=sm_62 85 | 86 | ### Download weights 87 | 88 | The yolo-voc.weights and tiny-yolo-voc.weights are downloaded automatically in the CMakeLists.txt file. If you need to download them again, go into the weights folder and download the two pre-trained weights from the COCO data set: 89 | 90 | cd catkin_workspace/src/darknet_ros/darknet_ros/yolo_network_config/weights/ 91 | wget http://pjreddie.com/media/files/yolov2.weights 92 | wget http://pjreddie.com/media/files/yolov2-tiny.weights 93 | 94 | And weights from the VOC data set can be found here: 95 | 96 | wget http://pjreddie.com/media/files/yolov2-voc.weights 97 | wget http://pjreddie.com/media/files/yolov2-tiny-voc.weights 98 | 99 | And the pre-trained weight from YOLO v3 can be found here: 100 | 101 | wget http://pjreddie.com/media/files/yolov3-tiny.weights 102 | wget http://pjreddie.com/media/files/yolov3.weights 103 | 104 | There are more pre-trained weights from different data sets reported [here](https://pjreddie.com/darknet/yolo/). 105 | 106 | ### Use your own detection objects 107 | 108 | In order to use your own detection objects you need to provide your weights and your cfg file inside the directories: 109 | 110 | catkin_workspace/src/darknet_ros/darknet_ros/yolo_network_config/weights/ 111 | catkin_workspace/src/darknet_ros/darknet_ros/yolo_network_config/cfg/ 112 | 113 | In addition, you need to create your config file for ROS where you define the names of the detection objects. You need to include it inside: 114 | 115 | catkin_workspace/src/darknet_ros/darknet_ros/config/ 116 | 117 | Then in the launch file you have to point to your new config file in the line: 118 | 119 | 120 | 121 | ### Unit Tests 122 | 123 | Run the unit tests using the [Catkin Command Line Tools](http://catkin-tools.readthedocs.io/en/latest/index.html#) 124 | 125 | catkin build darknet_ros --no-deps --verbose --catkin-make-args run_tests 126 | 127 | You will see the image above popping up. 128 | 129 | ## Basic Usage 130 | 131 | In order to get YOLO ROS: Real-Time Object Detection for ROS to run with your robot, you will need to adapt a few parameters. It is the easiest if duplicate and adapt all the parameter files that you need to change from the `darknet_ros` package. These are specifically the parameter files in `config` and the launch file from the `launch` folder. 132 | 133 | ## Nodes 134 | 135 | ### Node: darknet_ros 136 | 137 | This is the main YOLO ROS: Real-Time Object Detection for ROS node. It uses the camera measurements to detect pre-learned objects in the frames. 138 | 139 | ### ROS related parameters 140 | 141 | You can change the names and other parameters of the publishers, subscribers and actions inside `darknet_ros/config/ros.yaml`. 142 | 143 | #### Subscribed Topics 144 | 145 | * **`/camera_reading`** ([sensor_msgs/Image]) 146 | 147 | The camera measurements. 148 | 149 | #### Published Topics 150 | 151 | * **`object_detector`** ([std_msgs::Int8]) 152 | 153 | Publishes the number of detected objects. 154 | 155 | * **`bounding_boxes`** ([darknet_ros_msgs::BoundingBoxes]) 156 | 157 | Publishes an array of bounding boxes that gives information of the position and size of the bounding box in pixel coordinates. 158 | 159 | * **`detection_image`** ([sensor_msgs::Image]) 160 | 161 | Publishes an image of the detection image including the bounding boxes. 162 | 163 | #### Actions 164 | 165 | * **`camera_reading`** ([sensor_msgs::Image]) 166 | 167 | Sends an action with an image and the result is an array of bounding boxes. 168 | 169 | ### Detection related parameters 170 | 171 | You can change the parameters that are related to the detection by adding a new config file that looks similar to `darknet_ros/config/yolo.yaml`. 172 | 173 | * **`image_view/enable_opencv`** (bool) 174 | 175 | Enable or disable the open cv view of the detection image including the bounding boxes. 176 | 177 | * **`image_view/wait_key_delay`** (int) 178 | 179 | Wait key delay in ms of the open cv window. 180 | 181 | * **`yolo_model/config_file/name`** (string) 182 | 183 | Name of the cfg file of the network that is used for detection. The code searches for this name inside `darknet_ros/yolo_network_config/cfg/`. 184 | 185 | * **`yolo_model/weight_file/name`** (string) 186 | 187 | Name of the weights file of the network that is used for detection. The code searches for this name inside `darknet_ros/yolo_network_config/weights/`. 188 | 189 | * **`yolo_model/threshold/value`** (float) 190 | 191 | Threshold of the detection algorithm. It is defined between 0 and 1. 192 | 193 | * **`yolo_model/detection_classes/names`** (array of strings) 194 | 195 | Detection names of the network used by the cfg and weights file inside `darknet_ros/yolo_network_config/`. 196 | -------------------------------------------------------------------------------- /darknet_ros/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.12) 2 | project(darknet_ros) 3 | 4 | # Set c++11 cmake flags 5 | set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}") 6 | set(CMAKE_C_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wno-unused-variable -Wfatal-errors -fPIC ${CMAKE_C_FLAGS}") 7 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 8 | 9 | # Define path of darknet folder here. 10 | find_path(DARKNET_PATH 11 | NAMES "README.md" 12 | HINTS "${CMAKE_CURRENT_SOURCE_DIR}/../darknet/") 13 | message(STATUS "Darknet path dir = ${DARKNET_PATH}") 14 | add_definitions(-DDARKNET_FILE_PATH="${DARKNET_PATH}") 15 | 16 | # Find CUDA 17 | find_package(CUDA QUIET) 18 | if (CUDA_FOUND) 19 | find_package(CUDA REQUIRED) 20 | message(STATUS "CUDA Version: ${CUDA_VERSION_STRINGS}") 21 | message(STATUS "CUDA Libararies: ${CUDA_LIBRARIES}") 22 | set( 23 | CUDA_NVCC_FLAGS 24 | ${CUDA_NVCC_FLAGS}; 25 | -O3 26 | # -gencode arch=compute_30,code=sm_30 27 | -gencode arch=compute_35,code=sm_35 28 | -gencode arch=compute_50,code=[sm_50,compute_50] 29 | -gencode arch=compute_52,code=[sm_52,compute_52] 30 | -gencode arch=compute_61,code=sm_61 31 | -gencode arch=compute_62,code=sm_62 32 | ) 33 | add_definitions(-DGPU) 34 | else() 35 | list(APPEND LIBRARIES "m") 36 | endif() 37 | 38 | # Find X11 39 | message ( STATUS "Searching for X11..." ) 40 | find_package ( X11 REQUIRED ) 41 | if ( X11_FOUND ) 42 | include_directories ( ${X11_INCLUDE_DIR} ) 43 | link_libraries ( ${X11_LIBRARIES} ) 44 | message ( STATUS " X11_INCLUDE_DIR: " ${X11_INCLUDE_DIR} ) 45 | message ( STATUS " X11_LIBRARIES: " ${X11_LIBRARIES} ) 46 | endif ( X11_FOUND ) 47 | 48 | # Find rquired packeges 49 | find_package(Boost REQUIRED COMPONENTS thread) 50 | find_package(OpenCV REQUIRED) 51 | include_directories(${OpenCV_INCLUDE_DIRS}) 52 | find_package(catkin REQUIRED 53 | COMPONENTS 54 | cv_bridge 55 | roscpp 56 | rospy 57 | std_msgs 58 | actionlib 59 | darknet_ros_msgs 60 | image_transport 61 | ) 62 | 63 | # Enable OPENCV in darknet 64 | add_definitions(-DOPENCV) 65 | add_definitions(-O4 -g) 66 | 67 | catkin_package( 68 | INCLUDE_DIRS 69 | include 70 | LIBRARIES 71 | ${PROJECT_NAME}_lib 72 | CATKIN_DEPENDS 73 | cv_bridge 74 | roscpp 75 | actionlib 76 | rospy 77 | std_msgs 78 | darknet_ros_msgs 79 | image_transport 80 | DEPENDS 81 | Boost 82 | ) 83 | 84 | include_directories( 85 | ${DARKNET_PATH}/src 86 | ${DARKNET_PATH}/include 87 | include 88 | ${Boost_INCLUDE_DIRS} 89 | ${catkin_INCLUDE_DIRS} 90 | ) 91 | 92 | set(PROJECT_LIB_FILES 93 | src/YoloObjectDetector.cpp src/image_interface.c 94 | ) 95 | 96 | set(DARKNET_CORE_FILES 97 | ${DARKNET_PATH}/src/activation_layer.c ${DARKNET_PATH}/src/im2col.c 98 | ${DARKNET_PATH}/src/activations.c ${DARKNET_PATH}/src/image.c 99 | ${DARKNET_PATH}/src/avgpool_layer.c ${DARKNET_PATH}/src/layer.c 100 | ${DARKNET_PATH}/src/batchnorm_layer.c ${DARKNET_PATH}/src/list.c 101 | ${DARKNET_PATH}/src/blas.c ${DARKNET_PATH}/src/local_layer.c 102 | ${DARKNET_PATH}/src/box.c ${DARKNET_PATH}/src/lstm_layer.c 103 | ${DARKNET_PATH}/src/col2im.c ${DARKNET_PATH}/src/matrix.c 104 | ${DARKNET_PATH}/src/connected_layer.c ${DARKNET_PATH}/src/maxpool_layer.c 105 | ${DARKNET_PATH}/src/convolutional_layer.c ${DARKNET_PATH}/src/network.c 106 | ${DARKNET_PATH}/src/cost_layer.c ${DARKNET_PATH}/src/normalization_layer.c 107 | ${DARKNET_PATH}/src/crnn_layer.c ${DARKNET_PATH}/src/option_list.c 108 | ${DARKNET_PATH}/src/crop_layer.c ${DARKNET_PATH}/src/parser.c 109 | ${DARKNET_PATH}/src/dark_cuda.c ${DARKNET_PATH}/src/region_layer.c 110 | ${DARKNET_PATH}/src/data.c ${DARKNET_PATH}/src/reorg_layer.c 111 | ${DARKNET_PATH}/src/deconvolutional_layer.c ${DARKNET_PATH}/src/rnn_layer.c 112 | ${DARKNET_PATH}/src/demo.c ${DARKNET_PATH}/src/route_layer.c 113 | ${DARKNET_PATH}/src/detection_layer.c ${DARKNET_PATH}/src/shortcut_layer.c 114 | ${DARKNET_PATH}/src/dropout_layer.c ${DARKNET_PATH}/src/softmax_layer.c 115 | ${DARKNET_PATH}/src/gemm.c ${DARKNET_PATH}/src/tree.c 116 | ${DARKNET_PATH}/src/gru_layer.c ${DARKNET_PATH}/src/utils.c 117 | ${DARKNET_PATH}/src/upsample_layer.c # ${DARKNET_PATH}/src/logistic_layer.c 118 | # ${DARKNET_PATH}/src/l2norm_layer.c ${DARKNET_PATH}/src/yolo_layer.c 119 | ${DARKNET_PATH}/src/representation_layer.c 120 | 121 | ${DARKNET_PATH}/src/art.c # ${DARKNET_PATH}/src/lsd.c 122 | # ${DARKNET_PATH}/src/attention.c ${DARKNET_PATH}/src/nightmare.c 123 | ${DARKNET_PATH}/src/captcha.c # ${DARKNET_PATH}/src/regressor.c 124 | ${DARKNET_PATH}/src/cifar.c ${DARKNET_PATH}/src/rnn.c 125 | ${DARKNET_PATH}/src/classifier.c # ${DARKNET_PATH}/src/segmenter.c 126 | ${DARKNET_PATH}/src/coco.c ${DARKNET_PATH}/src/super.c 127 | ${DARKNET_PATH}/src/darknet.c ${DARKNET_PATH}/src/tag.c 128 | ${DARKNET_PATH}/src/detector.c ${DARKNET_PATH}/src/yolo.c 129 | ${DARKNET_PATH}/src/go.c 130 | 131 | ${DARKNET_PATH}/src/image_opencv.cpp 132 | ${DARKNET_PATH}/src/conv_lstm_layer.c 133 | ${DARKNET_PATH}/src/sam_layer.c 134 | ${DARKNET_PATH}/src/gaussian_yolo_layer.c 135 | ${DARKNET_PATH}/src/http_stream.cpp 136 | ${DARKNET_PATH}/src/scale_channels_layer.c 137 | ${DARKNET_PATH}/src/nightmare.c 138 | ${DARKNET_PATH}/src/voxel.c 139 | ${DARKNET_PATH}/src/compare.c 140 | ${DARKNET_PATH}/src/reorg_old_layer.c 141 | ${DARKNET_PATH}/src/writing.c 142 | ${DARKNET_PATH}/src/yolo_layer.c 143 | ${DARKNET_PATH}/src/dice.c 144 | ${DARKNET_PATH}/src/rnn_vid.c 145 | ) 146 | 147 | set(DARKNET_CUDA_FILES 148 | ${DARKNET_PATH}/src/activation_kernels.cu ${DARKNET_PATH}/src/crop_layer_kernels.cu 149 | ${DARKNET_PATH}/src/avgpool_layer_kernels.cu ${DARKNET_PATH}/src/deconvolutional_kernels.cu 150 | ${DARKNET_PATH}/src/blas_kernels.cu ${DARKNET_PATH}/src/dropout_layer_kernels.cu 151 | ${DARKNET_PATH}/src/col2im_kernels.cu ${DARKNET_PATH}/src/im2col_kernels.cu 152 | ${DARKNET_PATH}/src/convolutional_kernels.cu ${DARKNET_PATH}/src/maxpool_layer_kernels.cu 153 | 154 | ${DARKNET_PATH}/src/network_kernels.cu 155 | 156 | ) 157 | 158 | if (CUDA_FOUND) 159 | 160 | link_directories( 161 | ${CUDA_TOOLKIT_ROOT_DIR}/lib64 162 | ) 163 | 164 | cuda_add_library(${PROJECT_NAME}_lib 165 | ${PROJECT_LIB_FILES} ${DARKNET_CORE_FILES} 166 | ${DARKNET_CUDA_FILES} 167 | ) 168 | 169 | target_link_libraries(${PROJECT_NAME}_lib 170 | cuda 171 | cudart 172 | cublas 173 | curand 174 | ) 175 | 176 | cuda_add_executable(${PROJECT_NAME} 177 | src/yolo_object_detector_node.cpp 178 | ) 179 | 180 | else() 181 | 182 | add_library(${PROJECT_NAME}_lib 183 | ${PROJECT_LIB_FILES} ${DARKNET_CORE_FILES} 184 | ) 185 | 186 | add_executable(${PROJECT_NAME} 187 | src/yolo_object_detector_node.cpp 188 | ) 189 | 190 | endif() 191 | 192 | target_link_libraries(${PROJECT_NAME}_lib 193 | m 194 | pthread 195 | stdc++ 196 | ${Boost_LIBRARIES} 197 | ${OpenCV_LIBRARIES} 198 | ${catkin_LIBRARIES} 199 | ${OpenCV_LIBS} 200 | ) 201 | 202 | target_link_libraries(${PROJECT_NAME} 203 | ${PROJECT_NAME}_lib 204 | ) 205 | 206 | add_dependencies(${PROJECT_NAME}_lib 207 | darknet_ros_msgs_generate_messages_cpp 208 | ) 209 | 210 | install(TARGETS ${PROJECT_NAME}_lib 211 | ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 212 | LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 213 | RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 214 | ) 215 | 216 | install(TARGETS ${PROJECT_NAME} 217 | RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 218 | ) 219 | 220 | install( 221 | DIRECTORY include/${PROJECT_NAME}/ 222 | DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} 223 | FILES_MATCHING PATTERN "*.h" 224 | ) 225 | 226 | install(DIRECTORY config launch yolo_network_config 227 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 228 | ) 229 | 230 | # Download yolov2-tiny.weights 231 | set(PATH "${CMAKE_CURRENT_SOURCE_DIR}/yolo_network_config/weights") 232 | set(FILE "${PATH}/yolov2-tiny.weights") 233 | message(STATUS "Checking and downloading yolov2-tiny.weights if needed ...") 234 | if (NOT EXISTS "${FILE}") 235 | message(STATUS "... file does not exist. Downloading now ...") 236 | execute_process(COMMAND wget -q https://github.com/leggedrobotics/darknet_ros/releases/download/1.1.4/yolov2-tiny.weights -P ${PATH}) 237 | endif() 238 | 239 | # Download yolov3.weights 240 | set(FILE "${PATH}/yolov3.weights") 241 | message(STATUS "Checking and downloading yolov3.weights if needed ...") 242 | if (NOT EXISTS "${FILE}") 243 | message(STATUS "... file does not exist. Downloading now ...") 244 | execute_process(COMMAND wget -q https://github.com/leggedrobotics/darknet_ros/releases/download/1.1.4/yolov3.weights -P ${PATH}) 245 | endif() 246 | 247 | # Download yolov4.weights 248 | set(FILE "${PATH}/yolov4.weights") 249 | message(STATUS "Checking and downloading yolov4.weights if needed ...") 250 | if (NOT EXISTS "${FILE}") 251 | message(STATUS "... file does not exist. Downloading now ...") 252 | execute_process(COMMAND wget -q https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -P ${PATH}) 253 | endif() 254 | 255 | ############# 256 | ## Testing ## 257 | ############# 258 | 259 | if(CATKIN_ENABLE_TESTING) 260 | # Download yolov2.weights 261 | set(PATH "${CMAKE_CURRENT_SOURCE_DIR}/yolo_network_config/weights") 262 | set(FILE "${PATH}/yolov2.weights") 263 | message(STATUS "Checking and downloading yolov2.weights if needed ...") 264 | if (NOT EXISTS "${FILE}") 265 | message(STATUS "... file does not exist. Downloading now ...") 266 | execute_process(COMMAND wget -q https://github.com/leggedrobotics/darknet_ros/releases/download/1.1.4/yolov2.weights -P ${PATH}) 267 | endif() 268 | 269 | find_package(rostest REQUIRED) 270 | 271 | # Object detection in images. 272 | add_rostest_gtest(${PROJECT_NAME}_object_detection-test 273 | test/object_detection.test 274 | test/test_main.cpp 275 | test/ObjectDetection.cpp 276 | ) 277 | target_link_libraries(${PROJECT_NAME}_object_detection-test 278 | ${catkin_LIBRARIES} 279 | ) 280 | endif() 281 | 282 | ######################### 283 | ### CLANG TOOLING ### 284 | ######################### 285 | find_package(cmake_clang_tools QUIET) 286 | if (cmake_clang_tools_FOUND) 287 | message(STATUS "Run clang tooling") 288 | add_clang_tooling( 289 | TARGETS ${PROJECT_NAME} 290 | SOURCE_DIRS ${CMAKE_CURRENT_LIST_DIR}/src ${CMAKE_CURRENT_LIST_DIR}/include ${CMAKE_CURRENT_LIST_DIR}/test 291 | CT_HEADER_DIRS ${CMAKE_CURRENT_LIST_DIR}/include 292 | CF_WERROR 293 | ) 294 | endif (cmake_clang_tools_FOUND) 295 | -------------------------------------------------------------------------------- /darknet_ros/yolo_network_config/cfg/yolov3-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 50200 21 | policy=steps 22 | steps=40000,45000 23 | scales=.1,.1 24 | 25 | 26 | 27 | [convolutional] 28 | batch_normalize=1 29 | filters=32 30 | size=3 31 | stride=1 32 | pad=1 33 | activation=leaky 34 | 35 | # Downsample 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=2 42 | pad=1 43 | activation=leaky 44 | 45 | [convolutional] 46 | batch_normalize=1 47 | filters=32 48 | size=1 49 | stride=1 50 | pad=1 51 | activation=leaky 52 | 53 | [convolutional] 54 | batch_normalize=1 55 | filters=64 56 | size=3 57 | stride=1 58 | pad=1 59 | activation=leaky 60 | 61 | [shortcut] 62 | from=-3 63 | activation=linear 64 | 65 | # Downsample 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=128 70 | size=3 71 | stride=2 72 | pad=1 73 | activation=leaky 74 | 75 | [convolutional] 76 | batch_normalize=1 77 | filters=64 78 | size=1 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [convolutional] 84 | batch_normalize=1 85 | filters=128 86 | size=3 87 | stride=1 88 | pad=1 89 | activation=leaky 90 | 91 | [shortcut] 92 | from=-3 93 | activation=linear 94 | 95 | [convolutional] 96 | batch_normalize=1 97 | filters=64 98 | size=1 99 | stride=1 100 | pad=1 101 | activation=leaky 102 | 103 | [convolutional] 104 | batch_normalize=1 105 | filters=128 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [shortcut] 112 | from=-3 113 | activation=linear 114 | 115 | # Downsample 116 | 117 | [convolutional] 118 | batch_normalize=1 119 | filters=256 120 | size=3 121 | stride=2 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | batch_normalize=1 127 | filters=128 128 | size=1 129 | stride=1 130 | pad=1 131 | activation=leaky 132 | 133 | [convolutional] 134 | batch_normalize=1 135 | filters=256 136 | size=3 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [shortcut] 142 | from=-3 143 | activation=linear 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [convolutional] 154 | batch_normalize=1 155 | filters=256 156 | size=3 157 | stride=1 158 | pad=1 159 | activation=leaky 160 | 161 | [shortcut] 162 | from=-3 163 | activation=linear 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=256 176 | size=3 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [shortcut] 182 | from=-3 183 | activation=linear 184 | 185 | [convolutional] 186 | batch_normalize=1 187 | filters=128 188 | size=1 189 | stride=1 190 | pad=1 191 | activation=leaky 192 | 193 | [convolutional] 194 | batch_normalize=1 195 | filters=256 196 | size=3 197 | stride=1 198 | pad=1 199 | activation=leaky 200 | 201 | [shortcut] 202 | from=-3 203 | activation=linear 204 | 205 | 206 | [convolutional] 207 | batch_normalize=1 208 | filters=128 209 | size=1 210 | stride=1 211 | pad=1 212 | activation=leaky 213 | 214 | [convolutional] 215 | batch_normalize=1 216 | filters=256 217 | size=3 218 | stride=1 219 | pad=1 220 | activation=leaky 221 | 222 | [shortcut] 223 | from=-3 224 | activation=linear 225 | 226 | [convolutional] 227 | batch_normalize=1 228 | filters=128 229 | size=1 230 | stride=1 231 | pad=1 232 | activation=leaky 233 | 234 | [convolutional] 235 | batch_normalize=1 236 | filters=256 237 | size=3 238 | stride=1 239 | pad=1 240 | activation=leaky 241 | 242 | [shortcut] 243 | from=-3 244 | activation=linear 245 | 246 | [convolutional] 247 | batch_normalize=1 248 | filters=128 249 | size=1 250 | stride=1 251 | pad=1 252 | activation=leaky 253 | 254 | [convolutional] 255 | batch_normalize=1 256 | filters=256 257 | size=3 258 | stride=1 259 | pad=1 260 | activation=leaky 261 | 262 | [shortcut] 263 | from=-3 264 | activation=linear 265 | 266 | [convolutional] 267 | batch_normalize=1 268 | filters=128 269 | size=1 270 | stride=1 271 | pad=1 272 | activation=leaky 273 | 274 | [convolutional] 275 | batch_normalize=1 276 | filters=256 277 | size=3 278 | stride=1 279 | pad=1 280 | activation=leaky 281 | 282 | [shortcut] 283 | from=-3 284 | activation=linear 285 | 286 | # Downsample 287 | 288 | [convolutional] 289 | batch_normalize=1 290 | filters=512 291 | size=3 292 | stride=2 293 | pad=1 294 | activation=leaky 295 | 296 | [convolutional] 297 | batch_normalize=1 298 | filters=256 299 | size=1 300 | stride=1 301 | pad=1 302 | activation=leaky 303 | 304 | [convolutional] 305 | batch_normalize=1 306 | filters=512 307 | size=3 308 | stride=1 309 | pad=1 310 | activation=leaky 311 | 312 | [shortcut] 313 | from=-3 314 | activation=linear 315 | 316 | 317 | [convolutional] 318 | batch_normalize=1 319 | filters=256 320 | size=1 321 | stride=1 322 | pad=1 323 | activation=leaky 324 | 325 | [convolutional] 326 | batch_normalize=1 327 | filters=512 328 | size=3 329 | stride=1 330 | pad=1 331 | activation=leaky 332 | 333 | [shortcut] 334 | from=-3 335 | activation=linear 336 | 337 | 338 | [convolutional] 339 | batch_normalize=1 340 | filters=256 341 | size=1 342 | stride=1 343 | pad=1 344 | activation=leaky 345 | 346 | [convolutional] 347 | batch_normalize=1 348 | filters=512 349 | size=3 350 | stride=1 351 | pad=1 352 | activation=leaky 353 | 354 | [shortcut] 355 | from=-3 356 | activation=linear 357 | 358 | 359 | [convolutional] 360 | batch_normalize=1 361 | filters=256 362 | size=1 363 | stride=1 364 | pad=1 365 | activation=leaky 366 | 367 | [convolutional] 368 | batch_normalize=1 369 | filters=512 370 | size=3 371 | stride=1 372 | pad=1 373 | activation=leaky 374 | 375 | [shortcut] 376 | from=-3 377 | activation=linear 378 | 379 | [convolutional] 380 | batch_normalize=1 381 | filters=256 382 | size=1 383 | stride=1 384 | pad=1 385 | activation=leaky 386 | 387 | [convolutional] 388 | batch_normalize=1 389 | filters=512 390 | size=3 391 | stride=1 392 | pad=1 393 | activation=leaky 394 | 395 | [shortcut] 396 | from=-3 397 | activation=linear 398 | 399 | 400 | [convolutional] 401 | batch_normalize=1 402 | filters=256 403 | size=1 404 | stride=1 405 | pad=1 406 | activation=leaky 407 | 408 | [convolutional] 409 | batch_normalize=1 410 | filters=512 411 | size=3 412 | stride=1 413 | pad=1 414 | activation=leaky 415 | 416 | [shortcut] 417 | from=-3 418 | activation=linear 419 | 420 | 421 | [convolutional] 422 | batch_normalize=1 423 | filters=256 424 | size=1 425 | stride=1 426 | pad=1 427 | activation=leaky 428 | 429 | [convolutional] 430 | batch_normalize=1 431 | filters=512 432 | size=3 433 | stride=1 434 | pad=1 435 | activation=leaky 436 | 437 | [shortcut] 438 | from=-3 439 | activation=linear 440 | 441 | [convolutional] 442 | batch_normalize=1 443 | filters=256 444 | size=1 445 | stride=1 446 | pad=1 447 | activation=leaky 448 | 449 | [convolutional] 450 | batch_normalize=1 451 | filters=512 452 | size=3 453 | stride=1 454 | pad=1 455 | activation=leaky 456 | 457 | [shortcut] 458 | from=-3 459 | activation=linear 460 | 461 | # Downsample 462 | 463 | [convolutional] 464 | batch_normalize=1 465 | filters=1024 466 | size=3 467 | stride=2 468 | pad=1 469 | activation=leaky 470 | 471 | [convolutional] 472 | batch_normalize=1 473 | filters=512 474 | size=1 475 | stride=1 476 | pad=1 477 | activation=leaky 478 | 479 | [convolutional] 480 | batch_normalize=1 481 | filters=1024 482 | size=3 483 | stride=1 484 | pad=1 485 | activation=leaky 486 | 487 | [shortcut] 488 | from=-3 489 | activation=linear 490 | 491 | [convolutional] 492 | batch_normalize=1 493 | filters=512 494 | size=1 495 | stride=1 496 | pad=1 497 | activation=leaky 498 | 499 | [convolutional] 500 | batch_normalize=1 501 | filters=1024 502 | size=3 503 | stride=1 504 | pad=1 505 | activation=leaky 506 | 507 | [shortcut] 508 | from=-3 509 | activation=linear 510 | 511 | [convolutional] 512 | batch_normalize=1 513 | filters=512 514 | size=1 515 | stride=1 516 | pad=1 517 | activation=leaky 518 | 519 | [convolutional] 520 | batch_normalize=1 521 | filters=1024 522 | size=3 523 | stride=1 524 | pad=1 525 | activation=leaky 526 | 527 | [shortcut] 528 | from=-3 529 | activation=linear 530 | 531 | [convolutional] 532 | batch_normalize=1 533 | filters=512 534 | size=1 535 | stride=1 536 | pad=1 537 | activation=leaky 538 | 539 | [convolutional] 540 | batch_normalize=1 541 | filters=1024 542 | size=3 543 | stride=1 544 | pad=1 545 | activation=leaky 546 | 547 | [shortcut] 548 | from=-3 549 | activation=linear 550 | 551 | ###################### 552 | 553 | [convolutional] 554 | batch_normalize=1 555 | filters=512 556 | size=1 557 | stride=1 558 | pad=1 559 | activation=leaky 560 | 561 | [convolutional] 562 | batch_normalize=1 563 | size=3 564 | stride=1 565 | pad=1 566 | filters=1024 567 | activation=leaky 568 | 569 | [convolutional] 570 | batch_normalize=1 571 | filters=512 572 | size=1 573 | stride=1 574 | pad=1 575 | activation=leaky 576 | 577 | [convolutional] 578 | batch_normalize=1 579 | size=3 580 | stride=1 581 | pad=1 582 | filters=1024 583 | activation=leaky 584 | 585 | [convolutional] 586 | batch_normalize=1 587 | filters=512 588 | size=1 589 | stride=1 590 | pad=1 591 | activation=leaky 592 | 593 | [convolutional] 594 | batch_normalize=1 595 | size=3 596 | stride=1 597 | pad=1 598 | filters=1024 599 | activation=leaky 600 | 601 | [convolutional] 602 | size=1 603 | stride=1 604 | pad=1 605 | filters=75 606 | activation=linear 607 | 608 | [yolo] 609 | mask = 6,7,8 610 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 611 | classes=20 612 | num=9 613 | jitter=.3 614 | ignore_thresh = .5 615 | truth_thresh = 1 616 | random=1 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=75 690 | activation=linear 691 | 692 | [yolo] 693 | mask = 3,4,5 694 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 695 | classes=20 696 | num=9 697 | jitter=.3 698 | ignore_thresh = .5 699 | truth_thresh = 1 700 | random=1 701 | 702 | [route] 703 | layers = -4 704 | 705 | [convolutional] 706 | batch_normalize=1 707 | filters=128 708 | size=1 709 | stride=1 710 | pad=1 711 | activation=leaky 712 | 713 | [upsample] 714 | stride=2 715 | 716 | [route] 717 | layers = -1, 36 718 | 719 | 720 | 721 | [convolutional] 722 | batch_normalize=1 723 | filters=128 724 | size=1 725 | stride=1 726 | pad=1 727 | activation=leaky 728 | 729 | [convolutional] 730 | batch_normalize=1 731 | size=3 732 | stride=1 733 | pad=1 734 | filters=256 735 | activation=leaky 736 | 737 | [convolutional] 738 | batch_normalize=1 739 | filters=128 740 | size=1 741 | stride=1 742 | pad=1 743 | activation=leaky 744 | 745 | [convolutional] 746 | batch_normalize=1 747 | size=3 748 | stride=1 749 | pad=1 750 | filters=256 751 | activation=leaky 752 | 753 | [convolutional] 754 | batch_normalize=1 755 | filters=128 756 | size=1 757 | stride=1 758 | pad=1 759 | activation=leaky 760 | 761 | [convolutional] 762 | batch_normalize=1 763 | size=3 764 | stride=1 765 | pad=1 766 | filters=256 767 | activation=leaky 768 | 769 | [convolutional] 770 | size=1 771 | stride=1 772 | pad=1 773 | filters=75 774 | activation=linear 775 | 776 | [yolo] 777 | mask = 0,1,2 778 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 779 | classes=20 780 | num=9 781 | jitter=.3 782 | ignore_thresh = .5 783 | truth_thresh = 1 784 | random=1 785 | 786 | -------------------------------------------------------------------------------- /darknet_ros/yolo_network_config/cfg/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .5 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .5 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .5 787 | truth_thresh = 1 788 | random=1 789 | 790 | -------------------------------------------------------------------------------- /darknet_ros/yolo_network_config/cfg/yolov4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # batch=64 3 | batch=1 4 | subdivisions=8 5 | # Training 6 | #width=512 7 | #height=512 8 | width=608 9 | height=608 10 | # width=416 11 | # height=416 12 | channels=3 13 | momentum=0.949 14 | decay=0.0005 15 | angle=0 16 | saturation = 1.5 17 | exposure = 1.5 18 | hue=.1 19 | 20 | learning_rate=0.0013 21 | burn_in=1000 22 | max_batches = 500500 23 | policy=steps 24 | steps=400000,450000 25 | scales=.1,.1 26 | 27 | #cutmix=1 28 | mosaic=1 29 | 30 | #:104x104 54:52x52 85:26x26 104:13x13 for 416 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=mish 39 | 40 | # Downsample 41 | 42 | [convolutional] 43 | batch_normalize=1 44 | filters=64 45 | size=3 46 | stride=2 47 | pad=1 48 | activation=mish 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=mish 57 | 58 | [route] 59 | layers = -2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=64 64 | size=1 65 | stride=1 66 | pad=1 67 | activation=mish 68 | 69 | [convolutional] 70 | batch_normalize=1 71 | filters=32 72 | size=1 73 | stride=1 74 | pad=1 75 | activation=mish 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=64 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=mish 84 | 85 | [shortcut] 86 | from=-3 87 | activation=linear 88 | 89 | [convolutional] 90 | batch_normalize=1 91 | filters=64 92 | size=1 93 | stride=1 94 | pad=1 95 | activation=mish 96 | 97 | [route] 98 | layers = -1,-7 99 | 100 | [convolutional] 101 | batch_normalize=1 102 | filters=64 103 | size=1 104 | stride=1 105 | pad=1 106 | activation=mish 107 | 108 | # Downsample 109 | 110 | [convolutional] 111 | batch_normalize=1 112 | filters=128 113 | size=3 114 | stride=2 115 | pad=1 116 | activation=mish 117 | 118 | [convolutional] 119 | batch_normalize=1 120 | filters=64 121 | size=1 122 | stride=1 123 | pad=1 124 | activation=mish 125 | 126 | [route] 127 | layers = -2 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=64 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=mish 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=64 140 | size=1 141 | stride=1 142 | pad=1 143 | activation=mish 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=64 148 | size=3 149 | stride=1 150 | pad=1 151 | activation=mish 152 | 153 | [shortcut] 154 | from=-3 155 | activation=linear 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=64 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=mish 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=64 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=mish 172 | 173 | [shortcut] 174 | from=-3 175 | activation=linear 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=64 180 | size=1 181 | stride=1 182 | pad=1 183 | activation=mish 184 | 185 | [route] 186 | layers = -1,-10 187 | 188 | [convolutional] 189 | batch_normalize=1 190 | filters=128 191 | size=1 192 | stride=1 193 | pad=1 194 | activation=mish 195 | 196 | # Downsample 197 | 198 | [convolutional] 199 | batch_normalize=1 200 | filters=256 201 | size=3 202 | stride=2 203 | pad=1 204 | activation=mish 205 | 206 | [convolutional] 207 | batch_normalize=1 208 | filters=128 209 | size=1 210 | stride=1 211 | pad=1 212 | activation=mish 213 | 214 | [route] 215 | layers = -2 216 | 217 | [convolutional] 218 | batch_normalize=1 219 | filters=128 220 | size=1 221 | stride=1 222 | pad=1 223 | activation=mish 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | filters=128 228 | size=1 229 | stride=1 230 | pad=1 231 | activation=mish 232 | 233 | [convolutional] 234 | batch_normalize=1 235 | filters=128 236 | size=3 237 | stride=1 238 | pad=1 239 | activation=mish 240 | 241 | [shortcut] 242 | from=-3 243 | activation=linear 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=128 248 | size=1 249 | stride=1 250 | pad=1 251 | activation=mish 252 | 253 | [convolutional] 254 | batch_normalize=1 255 | filters=128 256 | size=3 257 | stride=1 258 | pad=1 259 | activation=mish 260 | 261 | [shortcut] 262 | from=-3 263 | activation=linear 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=128 268 | size=1 269 | stride=1 270 | pad=1 271 | activation=mish 272 | 273 | [convolutional] 274 | batch_normalize=1 275 | filters=128 276 | size=3 277 | stride=1 278 | pad=1 279 | activation=mish 280 | 281 | [shortcut] 282 | from=-3 283 | activation=linear 284 | 285 | [convolutional] 286 | batch_normalize=1 287 | filters=128 288 | size=1 289 | stride=1 290 | pad=1 291 | activation=mish 292 | 293 | [convolutional] 294 | batch_normalize=1 295 | filters=128 296 | size=3 297 | stride=1 298 | pad=1 299 | activation=mish 300 | 301 | [shortcut] 302 | from=-3 303 | activation=linear 304 | 305 | 306 | [convolutional] 307 | batch_normalize=1 308 | filters=128 309 | size=1 310 | stride=1 311 | pad=1 312 | activation=mish 313 | 314 | [convolutional] 315 | batch_normalize=1 316 | filters=128 317 | size=3 318 | stride=1 319 | pad=1 320 | activation=mish 321 | 322 | [shortcut] 323 | from=-3 324 | activation=linear 325 | 326 | [convolutional] 327 | batch_normalize=1 328 | filters=128 329 | size=1 330 | stride=1 331 | pad=1 332 | activation=mish 333 | 334 | [convolutional] 335 | batch_normalize=1 336 | filters=128 337 | size=3 338 | stride=1 339 | pad=1 340 | activation=mish 341 | 342 | [shortcut] 343 | from=-3 344 | activation=linear 345 | 346 | [convolutional] 347 | batch_normalize=1 348 | filters=128 349 | size=1 350 | stride=1 351 | pad=1 352 | activation=mish 353 | 354 | [convolutional] 355 | batch_normalize=1 356 | filters=128 357 | size=3 358 | stride=1 359 | pad=1 360 | activation=mish 361 | 362 | [shortcut] 363 | from=-3 364 | activation=linear 365 | 366 | [convolutional] 367 | batch_normalize=1 368 | filters=128 369 | size=1 370 | stride=1 371 | pad=1 372 | activation=mish 373 | 374 | [convolutional] 375 | batch_normalize=1 376 | filters=128 377 | size=3 378 | stride=1 379 | pad=1 380 | activation=mish 381 | 382 | [shortcut] 383 | from=-3 384 | activation=linear 385 | 386 | [convolutional] 387 | batch_normalize=1 388 | filters=128 389 | size=1 390 | stride=1 391 | pad=1 392 | activation=mish 393 | 394 | [route] 395 | layers = -1,-28 396 | 397 | [convolutional] 398 | batch_normalize=1 399 | filters=256 400 | size=1 401 | stride=1 402 | pad=1 403 | activation=mish 404 | 405 | # Downsample 406 | 407 | [convolutional] 408 | batch_normalize=1 409 | filters=512 410 | size=3 411 | stride=2 412 | pad=1 413 | activation=mish 414 | 415 | [convolutional] 416 | batch_normalize=1 417 | filters=256 418 | size=1 419 | stride=1 420 | pad=1 421 | activation=mish 422 | 423 | [route] 424 | layers = -2 425 | 426 | [convolutional] 427 | batch_normalize=1 428 | filters=256 429 | size=1 430 | stride=1 431 | pad=1 432 | activation=mish 433 | 434 | [convolutional] 435 | batch_normalize=1 436 | filters=256 437 | size=1 438 | stride=1 439 | pad=1 440 | activation=mish 441 | 442 | [convolutional] 443 | batch_normalize=1 444 | filters=256 445 | size=3 446 | stride=1 447 | pad=1 448 | activation=mish 449 | 450 | [shortcut] 451 | from=-3 452 | activation=linear 453 | 454 | 455 | [convolutional] 456 | batch_normalize=1 457 | filters=256 458 | size=1 459 | stride=1 460 | pad=1 461 | activation=mish 462 | 463 | [convolutional] 464 | batch_normalize=1 465 | filters=256 466 | size=3 467 | stride=1 468 | pad=1 469 | activation=mish 470 | 471 | [shortcut] 472 | from=-3 473 | activation=linear 474 | 475 | 476 | [convolutional] 477 | batch_normalize=1 478 | filters=256 479 | size=1 480 | stride=1 481 | pad=1 482 | activation=mish 483 | 484 | [convolutional] 485 | batch_normalize=1 486 | filters=256 487 | size=3 488 | stride=1 489 | pad=1 490 | activation=mish 491 | 492 | [shortcut] 493 | from=-3 494 | activation=linear 495 | 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=256 500 | size=1 501 | stride=1 502 | pad=1 503 | activation=mish 504 | 505 | [convolutional] 506 | batch_normalize=1 507 | filters=256 508 | size=3 509 | stride=1 510 | pad=1 511 | activation=mish 512 | 513 | [shortcut] 514 | from=-3 515 | activation=linear 516 | 517 | 518 | [convolutional] 519 | batch_normalize=1 520 | filters=256 521 | size=1 522 | stride=1 523 | pad=1 524 | activation=mish 525 | 526 | [convolutional] 527 | batch_normalize=1 528 | filters=256 529 | size=3 530 | stride=1 531 | pad=1 532 | activation=mish 533 | 534 | [shortcut] 535 | from=-3 536 | activation=linear 537 | 538 | 539 | [convolutional] 540 | batch_normalize=1 541 | filters=256 542 | size=1 543 | stride=1 544 | pad=1 545 | activation=mish 546 | 547 | [convolutional] 548 | batch_normalize=1 549 | filters=256 550 | size=3 551 | stride=1 552 | pad=1 553 | activation=mish 554 | 555 | [shortcut] 556 | from=-3 557 | activation=linear 558 | 559 | 560 | [convolutional] 561 | batch_normalize=1 562 | filters=256 563 | size=1 564 | stride=1 565 | pad=1 566 | activation=mish 567 | 568 | [convolutional] 569 | batch_normalize=1 570 | filters=256 571 | size=3 572 | stride=1 573 | pad=1 574 | activation=mish 575 | 576 | [shortcut] 577 | from=-3 578 | activation=linear 579 | 580 | [convolutional] 581 | batch_normalize=1 582 | filters=256 583 | size=1 584 | stride=1 585 | pad=1 586 | activation=mish 587 | 588 | [convolutional] 589 | batch_normalize=1 590 | filters=256 591 | size=3 592 | stride=1 593 | pad=1 594 | activation=mish 595 | 596 | [shortcut] 597 | from=-3 598 | activation=linear 599 | 600 | [convolutional] 601 | batch_normalize=1 602 | filters=256 603 | size=1 604 | stride=1 605 | pad=1 606 | activation=mish 607 | 608 | [route] 609 | layers = -1,-28 610 | 611 | [convolutional] 612 | batch_normalize=1 613 | filters=512 614 | size=1 615 | stride=1 616 | pad=1 617 | activation=mish 618 | 619 | # Downsample 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=1024 624 | size=3 625 | stride=2 626 | pad=1 627 | activation=mish 628 | 629 | [convolutional] 630 | batch_normalize=1 631 | filters=512 632 | size=1 633 | stride=1 634 | pad=1 635 | activation=mish 636 | 637 | [route] 638 | layers = -2 639 | 640 | [convolutional] 641 | batch_normalize=1 642 | filters=512 643 | size=1 644 | stride=1 645 | pad=1 646 | activation=mish 647 | 648 | [convolutional] 649 | batch_normalize=1 650 | filters=512 651 | size=1 652 | stride=1 653 | pad=1 654 | activation=mish 655 | 656 | [convolutional] 657 | batch_normalize=1 658 | filters=512 659 | size=3 660 | stride=1 661 | pad=1 662 | activation=mish 663 | 664 | [shortcut] 665 | from=-3 666 | activation=linear 667 | 668 | [convolutional] 669 | batch_normalize=1 670 | filters=512 671 | size=1 672 | stride=1 673 | pad=1 674 | activation=mish 675 | 676 | [convolutional] 677 | batch_normalize=1 678 | filters=512 679 | size=3 680 | stride=1 681 | pad=1 682 | activation=mish 683 | 684 | [shortcut] 685 | from=-3 686 | activation=linear 687 | 688 | [convolutional] 689 | batch_normalize=1 690 | filters=512 691 | size=1 692 | stride=1 693 | pad=1 694 | activation=mish 695 | 696 | [convolutional] 697 | batch_normalize=1 698 | filters=512 699 | size=3 700 | stride=1 701 | pad=1 702 | activation=mish 703 | 704 | [shortcut] 705 | from=-3 706 | activation=linear 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=512 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=mish 715 | 716 | [convolutional] 717 | batch_normalize=1 718 | filters=512 719 | size=3 720 | stride=1 721 | pad=1 722 | activation=mish 723 | 724 | [shortcut] 725 | from=-3 726 | activation=linear 727 | 728 | [convolutional] 729 | batch_normalize=1 730 | filters=512 731 | size=1 732 | stride=1 733 | pad=1 734 | activation=mish 735 | 736 | [route] 737 | layers = -1,-16 738 | 739 | [convolutional] 740 | batch_normalize=1 741 | filters=1024 742 | size=1 743 | stride=1 744 | pad=1 745 | activation=mish 746 | 747 | ########################## 748 | 749 | [convolutional] 750 | batch_normalize=1 751 | filters=512 752 | size=1 753 | stride=1 754 | pad=1 755 | activation=leaky 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | size=3 760 | stride=1 761 | pad=1 762 | filters=1024 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | filters=512 768 | size=1 769 | stride=1 770 | pad=1 771 | activation=leaky 772 | 773 | ### SPP ### 774 | [maxpool] 775 | stride=1 776 | size=5 777 | 778 | [route] 779 | layers=-2 780 | 781 | [maxpool] 782 | stride=1 783 | size=9 784 | 785 | [route] 786 | layers=-4 787 | 788 | [maxpool] 789 | stride=1 790 | size=13 791 | 792 | [route] 793 | layers=-1,-3,-5,-6 794 | ### End SPP ### 795 | 796 | [convolutional] 797 | batch_normalize=1 798 | filters=512 799 | size=1 800 | stride=1 801 | pad=1 802 | activation=leaky 803 | 804 | [convolutional] 805 | batch_normalize=1 806 | size=3 807 | stride=1 808 | pad=1 809 | filters=1024 810 | activation=leaky 811 | 812 | [convolutional] 813 | batch_normalize=1 814 | filters=512 815 | size=1 816 | stride=1 817 | pad=1 818 | activation=leaky 819 | 820 | [convolutional] 821 | batch_normalize=1 822 | filters=256 823 | size=1 824 | stride=1 825 | pad=1 826 | activation=leaky 827 | 828 | [upsample] 829 | stride=2 830 | 831 | [route] 832 | layers = 85 833 | 834 | [convolutional] 835 | batch_normalize=1 836 | filters=256 837 | size=1 838 | stride=1 839 | pad=1 840 | activation=leaky 841 | 842 | [route] 843 | layers = -1, -3 844 | 845 | [convolutional] 846 | batch_normalize=1 847 | filters=256 848 | size=1 849 | stride=1 850 | pad=1 851 | activation=leaky 852 | 853 | [convolutional] 854 | batch_normalize=1 855 | size=3 856 | stride=1 857 | pad=1 858 | filters=512 859 | activation=leaky 860 | 861 | [convolutional] 862 | batch_normalize=1 863 | filters=256 864 | size=1 865 | stride=1 866 | pad=1 867 | activation=leaky 868 | 869 | [convolutional] 870 | batch_normalize=1 871 | size=3 872 | stride=1 873 | pad=1 874 | filters=512 875 | activation=leaky 876 | 877 | [convolutional] 878 | batch_normalize=1 879 | filters=256 880 | size=1 881 | stride=1 882 | pad=1 883 | activation=leaky 884 | 885 | [convolutional] 886 | batch_normalize=1 887 | filters=128 888 | size=1 889 | stride=1 890 | pad=1 891 | activation=leaky 892 | 893 | [upsample] 894 | stride=2 895 | 896 | [route] 897 | layers = 54 898 | 899 | [convolutional] 900 | batch_normalize=1 901 | filters=128 902 | size=1 903 | stride=1 904 | pad=1 905 | activation=leaky 906 | 907 | [route] 908 | layers = -1, -3 909 | 910 | [convolutional] 911 | batch_normalize=1 912 | filters=128 913 | size=1 914 | stride=1 915 | pad=1 916 | activation=leaky 917 | 918 | [convolutional] 919 | batch_normalize=1 920 | size=3 921 | stride=1 922 | pad=1 923 | filters=256 924 | activation=leaky 925 | 926 | [convolutional] 927 | batch_normalize=1 928 | filters=128 929 | size=1 930 | stride=1 931 | pad=1 932 | activation=leaky 933 | 934 | [convolutional] 935 | batch_normalize=1 936 | size=3 937 | stride=1 938 | pad=1 939 | filters=256 940 | activation=leaky 941 | 942 | [convolutional] 943 | batch_normalize=1 944 | filters=128 945 | size=1 946 | stride=1 947 | pad=1 948 | activation=leaky 949 | 950 | ########################## 951 | 952 | [convolutional] 953 | batch_normalize=1 954 | size=3 955 | stride=1 956 | pad=1 957 | filters=256 958 | activation=leaky 959 | 960 | [convolutional] 961 | size=1 962 | stride=1 963 | pad=1 964 | filters=255 965 | activation=linear 966 | 967 | 968 | [yolo] 969 | mask = 0,1,2 970 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 971 | classes=80 972 | num=9 973 | jitter=.3 974 | ignore_thresh = .7 975 | truth_thresh = 1 976 | scale_x_y = 1.2 977 | iou_thresh=0.213 978 | cls_normalizer=1.0 979 | iou_normalizer=0.07 980 | iou_loss=ciou 981 | nms_kind=greedynms 982 | beta_nms=0.6 983 | max_delta=5 984 | 985 | 986 | [route] 987 | layers = -4 988 | 989 | [convolutional] 990 | batch_normalize=1 991 | size=3 992 | stride=2 993 | pad=1 994 | filters=256 995 | activation=leaky 996 | 997 | [route] 998 | layers = -1, -16 999 | 1000 | [convolutional] 1001 | batch_normalize=1 1002 | filters=256 1003 | size=1 1004 | stride=1 1005 | pad=1 1006 | activation=leaky 1007 | 1008 | [convolutional] 1009 | batch_normalize=1 1010 | size=3 1011 | stride=1 1012 | pad=1 1013 | filters=512 1014 | activation=leaky 1015 | 1016 | [convolutional] 1017 | batch_normalize=1 1018 | filters=256 1019 | size=1 1020 | stride=1 1021 | pad=1 1022 | activation=leaky 1023 | 1024 | [convolutional] 1025 | batch_normalize=1 1026 | size=3 1027 | stride=1 1028 | pad=1 1029 | filters=512 1030 | activation=leaky 1031 | 1032 | [convolutional] 1033 | batch_normalize=1 1034 | filters=256 1035 | size=1 1036 | stride=1 1037 | pad=1 1038 | activation=leaky 1039 | 1040 | [convolutional] 1041 | batch_normalize=1 1042 | size=3 1043 | stride=1 1044 | pad=1 1045 | filters=512 1046 | activation=leaky 1047 | 1048 | [convolutional] 1049 | size=1 1050 | stride=1 1051 | pad=1 1052 | filters=255 1053 | activation=linear 1054 | 1055 | 1056 | [yolo] 1057 | mask = 3,4,5 1058 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 1059 | classes=80 1060 | num=9 1061 | jitter=.3 1062 | ignore_thresh = .7 1063 | truth_thresh = 1 1064 | scale_x_y = 1.1 1065 | iou_thresh=0.213 1066 | cls_normalizer=1.0 1067 | iou_normalizer=0.07 1068 | iou_loss=ciou 1069 | nms_kind=greedynms 1070 | beta_nms=0.6 1071 | max_delta=5 1072 | 1073 | 1074 | [route] 1075 | layers = -4 1076 | 1077 | [convolutional] 1078 | batch_normalize=1 1079 | size=3 1080 | stride=2 1081 | pad=1 1082 | filters=512 1083 | activation=leaky 1084 | 1085 | [route] 1086 | layers = -1, -37 1087 | 1088 | [convolutional] 1089 | batch_normalize=1 1090 | filters=512 1091 | size=1 1092 | stride=1 1093 | pad=1 1094 | activation=leaky 1095 | 1096 | [convolutional] 1097 | batch_normalize=1 1098 | size=3 1099 | stride=1 1100 | pad=1 1101 | filters=1024 1102 | activation=leaky 1103 | 1104 | [convolutional] 1105 | batch_normalize=1 1106 | filters=512 1107 | size=1 1108 | stride=1 1109 | pad=1 1110 | activation=leaky 1111 | 1112 | [convolutional] 1113 | batch_normalize=1 1114 | size=3 1115 | stride=1 1116 | pad=1 1117 | filters=1024 1118 | activation=leaky 1119 | 1120 | [convolutional] 1121 | batch_normalize=1 1122 | filters=512 1123 | size=1 1124 | stride=1 1125 | pad=1 1126 | activation=leaky 1127 | 1128 | [convolutional] 1129 | batch_normalize=1 1130 | size=3 1131 | stride=1 1132 | pad=1 1133 | filters=1024 1134 | activation=leaky 1135 | 1136 | [convolutional] 1137 | size=1 1138 | stride=1 1139 | pad=1 1140 | filters=255 1141 | activation=linear 1142 | 1143 | 1144 | [yolo] 1145 | mask = 6,7,8 1146 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 1147 | classes=80 1148 | num=9 1149 | jitter=.3 1150 | ignore_thresh = .7 1151 | truth_thresh = 1 1152 | random=1 1153 | scale_x_y = 1.05 1154 | iou_thresh=0.213 1155 | cls_normalizer=1.0 1156 | iou_normalizer=0.07 1157 | iou_loss=ciou 1158 | nms_kind=greedynms 1159 | beta_nms=0.6 1160 | max_delta=5 1161 | 1162 | -------------------------------------------------------------------------------- /darknet_ros/src/YoloObjectDetector.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * YoloObjectDetector.cpp 3 | * 4 | * Created on: Dec 19, 2016 5 | * Author: Marko Bjelonic 6 | * Institute: ETH Zurich, Robotic Systems Lab 7 | */ 8 | 9 | // yolo object detector 10 | #include "darknet_ros/YoloObjectDetector.hpp" 11 | 12 | // Check for xServer 13 | #include 14 | 15 | #ifdef DARKNET_FILE_PATH 16 | std::string darknetFilePath_ = DARKNET_FILE_PATH; 17 | #else 18 | #error Path of darknet repository is not defined in CMakeLists.txt. 19 | #endif 20 | 21 | namespace darknet_ros { 22 | 23 | char* cfg; 24 | char* weights; 25 | char* data; 26 | char** detectionNames; 27 | 28 | YoloObjectDetector::YoloObjectDetector(ros::NodeHandle nh) 29 | : nodeHandle_(nh), imageTransport_(nodeHandle_), numClasses_(0), classLabels_(0), rosBoxes_(0), rosBoxCounter_(0) { 30 | ROS_INFO("[YoloObjectDetector] Node started."); 31 | 32 | // Read parameters from config file. 33 | if (!readParameters()) { 34 | ros::requestShutdown(); 35 | } 36 | 37 | init(); 38 | } 39 | 40 | YoloObjectDetector::~YoloObjectDetector() { 41 | { 42 | boost::unique_lock lockNodeStatus(mutexNodeStatus_); 43 | isNodeRunning_ = false; 44 | } 45 | yoloThread_.join(); 46 | } 47 | 48 | bool YoloObjectDetector::readParameters() { 49 | // Load common parameters. 50 | nodeHandle_.param("image_view/enable_opencv", viewImage_, true); 51 | nodeHandle_.param("image_view/wait_key_delay", waitKeyDelay_, 3); 52 | nodeHandle_.param("image_view/enable_console_output", enableConsoleOutput_, false); 53 | 54 | // Check if Xserver is running on Linux. 55 | if (XOpenDisplay(NULL)) { 56 | // Do nothing! 57 | ROS_INFO("[YoloObjectDetector] Xserver is running."); 58 | } else { 59 | ROS_INFO("[YoloObjectDetector] Xserver is not running."); 60 | viewImage_ = false; 61 | } 62 | 63 | // Set vector sizes. 64 | nodeHandle_.param("yolo_model/detection_classes/names", classLabels_, std::vector(0)); 65 | numClasses_ = classLabels_.size(); 66 | rosBoxes_ = std::vector >(numClasses_); 67 | rosBoxCounter_ = std::vector(numClasses_); 68 | 69 | return true; 70 | } 71 | 72 | void YoloObjectDetector::init() { 73 | ROS_INFO("[YoloObjectDetector] init()."); 74 | 75 | // Initialize deep network of darknet. 76 | std::string weightsPath; 77 | std::string configPath; 78 | std::string dataPath; 79 | std::string configModel; 80 | std::string weightsModel; 81 | 82 | // Threshold of object detection. 83 | float thresh; 84 | nodeHandle_.param("yolo_model/threshold/value", thresh, (float)0.3); 85 | 86 | // Path to weights file. 87 | nodeHandle_.param("yolo_model/weight_file/name", weightsModel, std::string("yolov2-tiny.weights")); 88 | nodeHandle_.param("weights_path", weightsPath, std::string("/default")); 89 | weightsPath += "/" + weightsModel; 90 | weights = new char[weightsPath.length() + 1]; 91 | strcpy(weights, weightsPath.c_str()); 92 | 93 | // Path to config file. 94 | nodeHandle_.param("yolo_model/config_file/name", configModel, std::string("yolov2-tiny.cfg")); 95 | nodeHandle_.param("config_path", configPath, std::string("/default")); 96 | configPath += "/" + configModel; 97 | cfg = new char[configPath.length() + 1]; 98 | strcpy(cfg, configPath.c_str()); 99 | 100 | // Path to data folder. 101 | dataPath = darknetFilePath_; 102 | dataPath += "/data"; 103 | data = new char[dataPath.length() + 1]; 104 | strcpy(data, dataPath.c_str()); 105 | 106 | // Get classes. 107 | detectionNames = (char**)realloc((void*)detectionNames, (numClasses_ + 1) * sizeof(char*)); 108 | for (int i = 0; i < numClasses_; i++) { 109 | detectionNames[i] = new char[classLabels_[i].length() + 1]; 110 | strcpy(detectionNames[i], classLabels_[i].c_str()); 111 | } 112 | 113 | // Load network. 114 | setupNetwork(cfg, weights, data, thresh, detectionNames, numClasses_, 0, 0, 1, 0.5, 0, 0, 0, 0); 115 | yoloThread_ = std::thread(&YoloObjectDetector::yolo, this); 116 | 117 | // Initialize publisher and subscriber. 118 | std::string cameraTopicName; 119 | int cameraQueueSize; 120 | std::string objectDetectorTopicName; 121 | int objectDetectorQueueSize; 122 | bool objectDetectorLatch; 123 | std::string boundingBoxesTopicName; 124 | int boundingBoxesQueueSize; 125 | bool boundingBoxesLatch; 126 | std::string detectionImageTopicName; 127 | int detectionImageQueueSize; 128 | bool detectionImageLatch; 129 | 130 | nodeHandle_.param("subscribers/camera_reading/topic", cameraTopicName, std::string("/camera/image_raw")); 131 | nodeHandle_.param("subscribers/camera_reading/queue_size", cameraQueueSize, 1); 132 | nodeHandle_.param("publishers/object_detector/topic", objectDetectorTopicName, std::string("found_object")); 133 | nodeHandle_.param("publishers/object_detector/queue_size", objectDetectorQueueSize, 1); 134 | nodeHandle_.param("publishers/object_detector/latch", objectDetectorLatch, false); 135 | nodeHandle_.param("publishers/bounding_boxes/topic", boundingBoxesTopicName, std::string("bounding_boxes")); 136 | nodeHandle_.param("publishers/bounding_boxes/queue_size", boundingBoxesQueueSize, 1); 137 | nodeHandle_.param("publishers/bounding_boxes/latch", boundingBoxesLatch, false); 138 | nodeHandle_.param("publishers/detection_image/topic", detectionImageTopicName, std::string("detection_image")); 139 | nodeHandle_.param("publishers/detection_image/queue_size", detectionImageQueueSize, 1); 140 | nodeHandle_.param("publishers/detection_image/latch", detectionImageLatch, true); 141 | 142 | imageSubscriber_ = imageTransport_.subscribe(cameraTopicName, cameraQueueSize, &YoloObjectDetector::cameraCallback, this); 143 | objectPublisher_ = 144 | nodeHandle_.advertise(objectDetectorTopicName, objectDetectorQueueSize, objectDetectorLatch); 145 | boundingBoxesPublisher_ = 146 | nodeHandle_.advertise(boundingBoxesTopicName, boundingBoxesQueueSize, boundingBoxesLatch); 147 | detectionImagePublisher_ = 148 | nodeHandle_.advertise(detectionImageTopicName, detectionImageQueueSize, detectionImageLatch); 149 | 150 | // Action servers. 151 | std::string checkForObjectsActionName; 152 | nodeHandle_.param("actions/camera_reading/topic", checkForObjectsActionName, std::string("check_for_objects")); 153 | checkForObjectsActionServer_.reset(new CheckForObjectsActionServer(nodeHandle_, checkForObjectsActionName, false)); 154 | checkForObjectsActionServer_->registerGoalCallback(boost::bind(&YoloObjectDetector::checkForObjectsActionGoalCB, this)); 155 | checkForObjectsActionServer_->registerPreemptCallback(boost::bind(&YoloObjectDetector::checkForObjectsActionPreemptCB, this)); 156 | checkForObjectsActionServer_->start(); 157 | } 158 | 159 | void YoloObjectDetector::cameraCallback(const sensor_msgs::ImageConstPtr& msg) { 160 | ROS_DEBUG("[YoloObjectDetector] USB image received."); 161 | 162 | cv_bridge::CvImagePtr cam_image; 163 | 164 | try { 165 | cam_image = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8); 166 | } catch (cv_bridge::Exception& e) { 167 | ROS_ERROR("cv_bridge exception: %s", e.what()); 168 | return; 169 | } 170 | 171 | if (cam_image) { 172 | { 173 | boost::unique_lock lockImageCallback(mutexImageCallback_); 174 | imageHeader_ = msg->header; 175 | camImageCopy_ = cam_image->image.clone(); 176 | } 177 | { 178 | boost::unique_lock lockImageStatus(mutexImageStatus_); 179 | imageStatus_ = true; 180 | } 181 | frameWidth_ = cam_image->image.size().width; 182 | frameHeight_ = cam_image->image.size().height; 183 | } 184 | return; 185 | } 186 | 187 | void YoloObjectDetector::checkForObjectsActionGoalCB() { 188 | ROS_DEBUG("[YoloObjectDetector] Start check for objects action."); 189 | 190 | boost::shared_ptr imageActionPtr = checkForObjectsActionServer_->acceptNewGoal(); 191 | sensor_msgs::Image imageAction = imageActionPtr->image; 192 | 193 | cv_bridge::CvImagePtr cam_image; 194 | 195 | try { 196 | cam_image = cv_bridge::toCvCopy(imageAction, sensor_msgs::image_encodings::BGR8); 197 | } catch (cv_bridge::Exception& e) { 198 | ROS_ERROR("cv_bridge exception: %s", e.what()); 199 | return; 200 | } 201 | 202 | if (cam_image) { 203 | { 204 | boost::unique_lock lockImageCallback(mutexImageCallback_); 205 | camImageCopy_ = cam_image->image.clone(); 206 | } 207 | { 208 | boost::unique_lock lockImageCallback(mutexActionStatus_); 209 | actionId_ = imageActionPtr->id; 210 | } 211 | { 212 | boost::unique_lock lockImageStatus(mutexImageStatus_); 213 | imageStatus_ = true; 214 | } 215 | frameWidth_ = cam_image->image.size().width; 216 | frameHeight_ = cam_image->image.size().height; 217 | } 218 | return; 219 | } 220 | 221 | void YoloObjectDetector::checkForObjectsActionPreemptCB() { 222 | ROS_DEBUG("[YoloObjectDetector] Preempt check for objects action."); 223 | checkForObjectsActionServer_->setPreempted(); 224 | } 225 | 226 | bool YoloObjectDetector::isCheckingForObjects() const { 227 | return (ros::ok() && checkForObjectsActionServer_->isActive() && !checkForObjectsActionServer_->isPreemptRequested()); 228 | } 229 | 230 | bool YoloObjectDetector::publishDetectionImage(const cv::Mat& detectionImage) { 231 | if (detectionImagePublisher_.getNumSubscribers() < 1) return false; 232 | cv_bridge::CvImage cvImage; 233 | cvImage.header.stamp = ros::Time::now(); 234 | cvImage.header.frame_id = "detection_image"; 235 | cvImage.encoding = sensor_msgs::image_encodings::BGR8; 236 | cvImage.image = detectionImage; 237 | detectionImagePublisher_.publish(*cvImage.toImageMsg()); 238 | ROS_DEBUG("Detection image has been published."); 239 | return true; 240 | } 241 | 242 | // double YoloObjectDetector::getWallTime() 243 | // { 244 | // struct timeval time; 245 | // if (gettimeofday(&time, NULL)) { 246 | // return 0; 247 | // } 248 | // return (double) time.tv_sec + (double) time.tv_usec * .000001; 249 | // } 250 | 251 | int YoloObjectDetector::sizeNetwork(network* net) { 252 | int i; 253 | int count = 0; 254 | for (i = 0; i < net->n; ++i) { 255 | layer l = net->layers[i]; 256 | if (l.type == YOLO || l.type == REGION || l.type == DETECTION) { 257 | count += l.outputs; 258 | } 259 | } 260 | return count; 261 | } 262 | 263 | void YoloObjectDetector::rememberNetwork(network* net) { 264 | int i; 265 | int count = 0; 266 | for (i = 0; i < net->n; ++i) { 267 | layer l = net->layers[i]; 268 | if (l.type == YOLO || l.type == REGION || l.type == DETECTION) { 269 | memcpy(predictions_[demoIndex_] + count, net->layers[i].output, sizeof(float) * l.outputs); 270 | count += l.outputs; 271 | } 272 | } 273 | } 274 | 275 | detection* YoloObjectDetector::avgPredictions(network* net, int* nboxes) { 276 | int i, j; 277 | int count = 0; 278 | fill_cpu(demoTotal_, 0, avg_, 1); 279 | for (j = 0; j < demoFrame_; ++j) { 280 | axpy_cpu(demoTotal_, 1. / demoFrame_, predictions_[j], 1, avg_, 1); 281 | } 282 | for (i = 0; i < net->n; ++i) { 283 | layer l = net->layers[i]; 284 | if (l.type == YOLO || l.type == REGION || l.type == DETECTION) { 285 | memcpy(l.output, avg_ + count, sizeof(float) * l.outputs); 286 | count += l.outputs; 287 | } 288 | } 289 | // detection* dets = get_network_boxes(net, buff_[0].w, buff_[0].h, demoThresh_, demoHier_, 0, 1, nboxes); 290 | detection* dets = get_network_boxes(net, buff_[0].w, buff_[0].h, demoThresh_, demoHier_, 0, 1, nboxes, 1); 291 | return dets; 292 | } 293 | 294 | void* YoloObjectDetector::detectInThread() { 295 | running_ = 1; 296 | float nms = .4; 297 | 298 | layer l = net_->layers[net_->n - 1]; 299 | float* X = buffLetter_[(buffIndex_ + 2) % 3].data; 300 | float* prediction = network_predict(*net_, X); 301 | 302 | rememberNetwork(net_); 303 | detection* dets = 0; 304 | int nboxes = 0; 305 | dets = avgPredictions(net_, &nboxes); 306 | 307 | if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms); 308 | 309 | if (enableConsoleOutput_) { 310 | printf("\033[2J"); 311 | printf("\033[1;1H"); 312 | printf("\nFPS:%.1f\n", fps_); 313 | printf("Objects:\n\n"); 314 | } 315 | image display = buff_[(buffIndex_ + 2) % 3]; 316 | // draw_detections(display, dets, nboxes, demoThresh_, demoNames_, demoAlphabet_, demoClasses_, 1); 317 | draw_detections_v3(display, dets, nboxes, demoThresh_, demoNames_, demoAlphabet_, demoClasses_, 1); 318 | 319 | 320 | // extract the bounding boxes and send them to ROS 321 | int i, j; 322 | int count = 0; 323 | for (i = 0; i < nboxes; ++i) { 324 | float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; 325 | float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; 326 | float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; 327 | float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; 328 | 329 | if (xmin < 0) xmin = 0; 330 | if (ymin < 0) ymin = 0; 331 | if (xmax > 1) xmax = 1; 332 | if (ymax > 1) ymax = 1; 333 | 334 | // iterate through possible boxes and collect the bounding boxes 335 | for (j = 0; j < demoClasses_; ++j) { 336 | if (dets[i].prob[j]) { 337 | float x_center = (xmin + xmax) / 2; 338 | float y_center = (ymin + ymax) / 2; 339 | float BoundingBox_width = xmax - xmin; 340 | float BoundingBox_height = ymax - ymin; 341 | 342 | // define bounding box 343 | // BoundingBox must be 1% size of frame (3.2x2.4 pixels) 344 | if (BoundingBox_width > 0.01 && BoundingBox_height > 0.01) { 345 | roiBoxes_[count].x = x_center; 346 | roiBoxes_[count].y = y_center; 347 | roiBoxes_[count].w = BoundingBox_width; 348 | roiBoxes_[count].h = BoundingBox_height; 349 | roiBoxes_[count].Class = j; 350 | roiBoxes_[count].prob = dets[i].prob[j]; 351 | count++; 352 | } 353 | } 354 | } 355 | } 356 | 357 | // create array to store found bounding boxes 358 | // if no object detected, make sure that ROS knows that num = 0 359 | if (count == 0) { 360 | roiBoxes_[0].num = 0; 361 | } else { 362 | roiBoxes_[0].num = count; 363 | } 364 | 365 | free_detections(dets, nboxes); 366 | demoIndex_ = (demoIndex_ + 1) % demoFrame_; 367 | running_ = 0; 368 | return 0; 369 | } 370 | 371 | void* YoloObjectDetector::fetchInThread() { 372 | { 373 | boost::shared_lock lock(mutexImageCallback_); 374 | IplImageWithHeader_ imageAndHeader = getIplImageWithHeader(); 375 | IplImage* ROS_img = imageAndHeader.image; 376 | ipl_into_image(ROS_img, buff_[buffIndex_]); 377 | headerBuff_[buffIndex_] = imageAndHeader.header; 378 | buffId_[buffIndex_] = actionId_; 379 | } 380 | rgbgr_image(buff_[buffIndex_]); 381 | letterbox_image_into(buff_[buffIndex_], net_->w, net_->h, buffLetter_[buffIndex_]); 382 | return 0; 383 | } 384 | 385 | void* YoloObjectDetector::displayInThread(void* ptr) { 386 | show_image_cv(buff_[(buffIndex_ + 1) % 3], "YOLO V4"); 387 | int c = cv::waitKey(waitKeyDelay_); 388 | if (c != -1) c = c % 256; 389 | if (c == 27) { 390 | demoDone_ = 1; 391 | return 0; 392 | } else if (c == 82) { 393 | demoThresh_ += .02; 394 | } else if (c == 84) { 395 | demoThresh_ -= .02; 396 | if (demoThresh_ <= .02) demoThresh_ = .02; 397 | } else if (c == 83) { 398 | demoHier_ += .02; 399 | } else if (c == 81) { 400 | demoHier_ -= .02; 401 | if (demoHier_ <= .0) demoHier_ = .0; 402 | } 403 | return 0; 404 | } 405 | 406 | void* YoloObjectDetector::displayLoop(void* ptr) { 407 | while (1) { 408 | displayInThread(0); 409 | } 410 | } 411 | 412 | void* YoloObjectDetector::detectLoop(void* ptr) { 413 | while (1) { 414 | detectInThread(); 415 | } 416 | } 417 | 418 | void YoloObjectDetector::setupNetwork(char* cfgfile, char* weightfile, char* datafile, float thresh, char** names, int classes, int delay, 419 | char* prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) { 420 | demoPrefix_ = prefix; 421 | demoDelay_ = delay; 422 | demoFrame_ = avg_frames; 423 | image** alphabet = load_alphabet_with_file(datafile); 424 | demoNames_ = names; 425 | demoAlphabet_ = alphabet; 426 | demoClasses_ = classes; 427 | demoThresh_ = thresh; 428 | demoHier_ = hier; 429 | fullScreen_ = fullscreen; 430 | printf("YOLO V4\n"); 431 | net_ = load_network(cfgfile, weightfile, 0); 432 | set_batch_network(net_, 1); 433 | } 434 | 435 | void YoloObjectDetector::yolo() { 436 | const auto wait_duration = std::chrono::milliseconds(2000); 437 | while (!getImageStatus()) { 438 | printf("Waiting for image.\n"); 439 | if (!isNodeRunning()) { 440 | return; 441 | } 442 | std::this_thread::sleep_for(wait_duration); 443 | } 444 | 445 | std::thread detect_thread; 446 | std::thread fetch_thread; 447 | 448 | srand(2222222); 449 | 450 | int i; 451 | demoTotal_ = sizeNetwork(net_); 452 | predictions_ = (float**)calloc(demoFrame_, sizeof(float*)); 453 | for (i = 0; i < demoFrame_; ++i) { 454 | predictions_[i] = (float*)calloc(demoTotal_, sizeof(float)); 455 | } 456 | avg_ = (float*)calloc(demoTotal_, sizeof(float)); 457 | 458 | layer l = net_->layers[net_->n - 1]; 459 | roiBoxes_ = (darknet_ros::RosBox_*)calloc(l.w * l.h * l.n, sizeof(darknet_ros::RosBox_)); 460 | 461 | { 462 | boost::shared_lock lock(mutexImageCallback_); 463 | IplImageWithHeader_ imageAndHeader = getIplImageWithHeader(); 464 | IplImage* ROS_img = imageAndHeader.image; 465 | buff_[0] = ipl_to_image(ROS_img); 466 | headerBuff_[0] = imageAndHeader.header; 467 | } 468 | buff_[1] = copy_image(buff_[0]); 469 | buff_[2] = copy_image(buff_[0]); 470 | headerBuff_[1] = headerBuff_[0]; 471 | headerBuff_[2] = headerBuff_[0]; 472 | buffLetter_[0] = letterbox_image(buff_[0], net_->w, net_->h); 473 | buffLetter_[1] = letterbox_image(buff_[0], net_->w, net_->h); 474 | buffLetter_[2] = letterbox_image(buff_[0], net_->w, net_->h); 475 | ipl_ = cvCreateImage(cvSize(buff_[0].w, buff_[0].h), IPL_DEPTH_8U, buff_[0].c); 476 | 477 | int count = 0; 478 | 479 | if (!demoPrefix_ && viewImage_) { 480 | cv::namedWindow("YOLO V4", cv::WINDOW_NORMAL); 481 | if (fullScreen_) { 482 | cv::setWindowProperty("YOLO V4", cv::WND_PROP_FULLSCREEN, cv::WINDOW_FULLSCREEN); 483 | } else { 484 | cv::moveWindow("YOLO V4", 0, 0); 485 | cv::resizeWindow("YOLO V4", 640, 480); 486 | } 487 | } 488 | 489 | demoTime_ = what_time_is_it_now(); 490 | 491 | while (!demoDone_) { 492 | buffIndex_ = (buffIndex_ + 1) % 3; 493 | fetch_thread = std::thread(&YoloObjectDetector::fetchInThread, this); 494 | detect_thread = std::thread(&YoloObjectDetector::detectInThread, this); 495 | if (!demoPrefix_) { 496 | fps_ = 1. / (what_time_is_it_now() - demoTime_); 497 | demoTime_ = what_time_is_it_now(); 498 | if (viewImage_) { 499 | displayInThread(0); 500 | } else { 501 | generate_image(buff_[(buffIndex_ + 1) % 3], ipl_); 502 | } 503 | publishInThread(); 504 | } else { 505 | char name[256]; 506 | sprintf(name, "%s_%08d", demoPrefix_, count); 507 | save_image(buff_[(buffIndex_ + 1) % 3], name); 508 | } 509 | fetch_thread.join(); 510 | detect_thread.join(); 511 | ++count; 512 | if (!isNodeRunning()) { 513 | demoDone_ = true; 514 | } 515 | } 516 | } 517 | 518 | IplImageWithHeader_ YoloObjectDetector::getIplImageWithHeader() { 519 | IplImage* ROS_img = new IplImage(camImageCopy_); 520 | IplImageWithHeader_ header = {.image = ROS_img, .header = imageHeader_}; 521 | return header; 522 | } 523 | 524 | bool YoloObjectDetector::getImageStatus(void) { 525 | boost::shared_lock lock(mutexImageStatus_); 526 | return imageStatus_; 527 | } 528 | 529 | bool YoloObjectDetector::isNodeRunning(void) { 530 | boost::shared_lock lock(mutexNodeStatus_); 531 | return isNodeRunning_; 532 | } 533 | 534 | void* YoloObjectDetector::publishInThread() { 535 | // Publish image. 536 | cv::Mat cvImage = cv::cvarrToMat(ipl_); 537 | if (!publishDetectionImage(cv::Mat(cvImage))) { 538 | ROS_DEBUG("Detection image has not been broadcasted."); 539 | } 540 | 541 | // Publish bounding boxes and detection result. 542 | int num = roiBoxes_[0].num; 543 | if (num > 0 && num <= 100) { 544 | for (int i = 0; i < num; i++) { 545 | for (int j = 0; j < numClasses_; j++) { 546 | if (roiBoxes_[i].Class == j) { 547 | rosBoxes_[j].push_back(roiBoxes_[i]); 548 | rosBoxCounter_[j]++; 549 | } 550 | } 551 | } 552 | 553 | darknet_ros_msgs::ObjectCount msg; 554 | msg.header.stamp = ros::Time::now(); 555 | msg.header.frame_id = "detection"; 556 | msg.count = num; 557 | objectPublisher_.publish(msg); 558 | 559 | for (int i = 0; i < numClasses_; i++) { 560 | if (rosBoxCounter_[i] > 0) { 561 | darknet_ros_msgs::BoundingBox boundingBox; 562 | 563 | for (int j = 0; j < rosBoxCounter_[i]; j++) { 564 | int xmin = (rosBoxes_[i][j].x - rosBoxes_[i][j].w / 2) * frameWidth_; 565 | int ymin = (rosBoxes_[i][j].y - rosBoxes_[i][j].h / 2) * frameHeight_; 566 | int xmax = (rosBoxes_[i][j].x + rosBoxes_[i][j].w / 2) * frameWidth_; 567 | int ymax = (rosBoxes_[i][j].y + rosBoxes_[i][j].h / 2) * frameHeight_; 568 | 569 | boundingBox.Class = classLabels_[i]; 570 | boundingBox.id = i; 571 | boundingBox.probability = rosBoxes_[i][j].prob; 572 | boundingBox.xmin = xmin; 573 | boundingBox.ymin = ymin; 574 | boundingBox.xmax = xmax; 575 | boundingBox.ymax = ymax; 576 | boundingBoxesResults_.bounding_boxes.push_back(boundingBox); 577 | } 578 | } 579 | } 580 | boundingBoxesResults_.header.stamp = ros::Time::now(); 581 | boundingBoxesResults_.header.frame_id = "detection"; 582 | boundingBoxesResults_.image_header = headerBuff_[(buffIndex_ + 1) % 3]; 583 | boundingBoxesPublisher_.publish(boundingBoxesResults_); 584 | } else { 585 | darknet_ros_msgs::ObjectCount msg; 586 | msg.header.stamp = ros::Time::now(); 587 | msg.header.frame_id = "detection"; 588 | msg.count = 0; 589 | objectPublisher_.publish(msg); 590 | } 591 | if (isCheckingForObjects()) { 592 | ROS_DEBUG("[YoloObjectDetector] check for objects in image."); 593 | darknet_ros_msgs::CheckForObjectsResult objectsActionResult; 594 | objectsActionResult.id = buffId_[0]; 595 | objectsActionResult.bounding_boxes = boundingBoxesResults_; 596 | checkForObjectsActionServer_->setSucceeded(objectsActionResult, "Send bounding boxes."); 597 | } 598 | boundingBoxesResults_.bounding_boxes.clear(); 599 | for (int i = 0; i < numClasses_; i++) { 600 | rosBoxes_[i].clear(); 601 | rosBoxCounter_[i] = 0; 602 | } 603 | 604 | return 0; 605 | } 606 | 607 | } /* namespace darknet_ros*/ 608 | --------------------------------------------------------------------------------