├── darknet_ros
    ├── yolo_network_config
    │   ├── weights
    │   │   ├── .gitignore
    │   │   └── how_to_download_weights.txt
    │   └── cfg
    │   │   ├── yolov2-tiny-voc.cfg
    │   │   ├── yolov2-tiny.cfg
    │   │   ├── yolov2-voc.cfg
    │   │   ├── yolov2.cfg
    │   │   ├── yolov3-voc.cfg
    │   │   ├── yolov3.cfg
    │   │   └── yolov4.cfg
    ├── doc
    │   ├── test_detection.png
    │   ├── test_detection_anymal.png
    │   └── quadruped_anymal_and_person.JPG
    ├── test
    │   ├── test_main.cpp
    │   ├── object_detection.test
    │   ├── yolov2.yaml
    │   └── ObjectDetection.cpp
    ├── launch
    │   ├── darknet_ros_gdb.launch
    │   ├── yolo_v3.launch
    │   ├── yolo_v4.launch
    │   └── darknet_ros.launch
    ├── include
    │   └── darknet_ros
    │   │   ├── image_interface.h
    │   │   └── YoloObjectDetector.hpp
    ├── src
    │   ├── yolo_object_detector_node.cpp
    │   ├── image_interface.c
    │   └── YoloObjectDetector.cpp
    ├── config
    │   ├── yolov2-voc.yaml
    │   ├── yolov3-voc.yaml
    │   ├── yolov2-tiny-voc.yaml
    │   ├── ros.yaml
    │   ├── yolov2.yaml
    │   ├── yolov3.yaml
    │   ├── yolov4.yaml
    │   └── yolov2-tiny.yaml
    ├── package.xml
    ├── CHANGELOG.rst
    └── CMakeLists.txt
├── darknet_ros_msgs
    ├── msg
    │   ├── ObjectCount.msg
    │   ├── BoundingBoxes.msg
    │   └── BoundingBox.msg
    ├── action
    │   └── CheckForObjects.action
    ├── CMakeLists.txt
    ├── CHANGELOG.rst
    └── package.xml
├── jenkins-pipeline
├── .gitmodules
├── LICENSE
└── README.md


/darknet_ros/yolo_network_config/weights/.gitignore:
--------------------------------------------------------------------------------
1 | *.weights
2 | 


--------------------------------------------------------------------------------
/darknet_ros_msgs/msg/ObjectCount.msg:
--------------------------------------------------------------------------------
1 | Header header
2 | int8 count
3 | 


--------------------------------------------------------------------------------
/jenkins-pipeline:
--------------------------------------------------------------------------------
1 | library 'continuous_integration_pipeline'
2 | ciPipeline("")
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "darknet"]
2 | 	path = darknet
3 | 	url = https://github.com/Tossy0423/darknet.git
4 | 


--------------------------------------------------------------------------------
/darknet_ros_msgs/msg/BoundingBoxes.msg:
--------------------------------------------------------------------------------
1 | Header header
2 | Header image_header
3 | BoundingBox[] bounding_boxes
4 | 


--------------------------------------------------------------------------------
/darknet_ros/doc/test_detection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tossy0423/darknet_ros/HEAD/darknet_ros/doc/test_detection.png


--------------------------------------------------------------------------------
/darknet_ros/doc/test_detection_anymal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tossy0423/darknet_ros/HEAD/darknet_ros/doc/test_detection_anymal.png


--------------------------------------------------------------------------------
/darknet_ros/doc/quadruped_anymal_and_person.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tossy0423/darknet_ros/HEAD/darknet_ros/doc/quadruped_anymal_and_person.JPG


--------------------------------------------------------------------------------
/darknet_ros_msgs/msg/BoundingBox.msg:
--------------------------------------------------------------------------------
1 | float64 probability
2 | int64 xmin
3 | int64 ymin
4 | int64 xmax
5 | int64 ymax
6 | int16 id
7 | string Class
8 | 


--------------------------------------------------------------------------------
/darknet_ros/test/test_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <gtest/gtest.h>
 2 | 
 3 | // ROS
 4 | #include <ros/ros.h>
 5 | 
 6 | int main(int argc, char** argv) {
 7 |   ros::init(argc, argv, "darknet_ros_test");
 8 |   testing::InitGoogleTest(&argc, argv);
 9 |   return RUN_ALL_TESTS();
10 | }
11 | 


--------------------------------------------------------------------------------
/darknet_ros_msgs/action/CheckForObjects.action:
--------------------------------------------------------------------------------
 1 | # Check if objects in image
 2 | 
 3 | # Goal definition
 4 | int16 id
 5 | sensor_msgs/Image image
 6 | 
 7 | ---
 8 | # Result definition
 9 | int16 id
10 | darknet_ros_msgs/BoundingBoxes bounding_boxes
11 | 
12 | ---
13 | # Feedback definition
14 | 


--------------------------------------------------------------------------------
/darknet_ros/launch/darknet_ros_gdb.launch:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | 
 3 | <launch>
 4 | 
 5 |   <!-- Launch darknet ros with gdb launch prefix -->
 6 |   <include file="$(find darknet_ros)/launch/darknet_ros.launch">
 7 |     <arg name="launch_prefix"   value="gdb -ex run --args"/>
 8 |   </include>
 9 | 
10 | </launch>
11 | 


--------------------------------------------------------------------------------
/darknet_ros/include/darknet_ros/image_interface.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * image_interface.h
 3 |  *
 4 |  *  Created on: Dec 19, 2016
 5 |  *      Author: Marko Bjelonic
 6 |  *   Institute: ETH Zurich, Robotic Systems Lab
 7 |  */
 8 | 
 9 | #ifndef IMAGE_INTERFACE_H
10 | #define IMAGE_INTERFACE_H
11 | 
12 | #include "image.h"
13 | #include "opencv2/core/types_c.h"
14 | 
15 | static float get_pixel(image m, int x, int y, int c);
16 | image** load_alphabet_with_file(char* datafile);
17 | void generate_image(image p, IplImage* disp);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/darknet_ros/src/yolo_object_detector_node.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * yolo_obstacle_detector_node.cpp
 3 |  *
 4 |  *  Created on: Dec 19, 2016
 5 |  *      Author: Marko Bjelonic
 6 |  *   Institute: ETH Zurich, Robotic Systems Lab
 7 |  */
 8 | 
 9 | #include <ros/ros.h>
10 | #include <darknet_ros/YoloObjectDetector.hpp>
11 | 
12 | int main(int argc, char** argv) {
13 |   ros::init(argc, argv, "darknet_ros");
14 |   ros::NodeHandle nodeHandle("~");
15 |   darknet_ros::YoloObjectDetector yoloObjectDetector(nodeHandle);
16 | 
17 |   ros::spin();
18 |   return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/darknet_ros/launch/yolo_v3.launch:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | 
 3 | <launch>
 4 |   
 5 |   <!-- Use YOLOv3 -->
 6 |   <arg name="network_param_file"         default="$(find darknet_ros)/config/yolov3.yaml"/>
 7 |   <arg name="image" default="camera/rgb/image_raw" />
 8 | 
 9 | 
10 |   <!-- Include main launch file -->
11 |   <include file="$(find darknet_ros)/launch/darknet_ros.launch">
12 |     <arg name="network_param_file"    value="$(arg network_param_file)"/>
13 |     <arg name="image" value="$(arg image)" />
14 |   </include>
15 | 
16 | </launch>
17 | 


--------------------------------------------------------------------------------
/darknet_ros/launch/yolo_v4.launch:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | 
 3 | <launch>
 4 | 
 5 |   <!-- Use YOLOv3 -->
 6 |   <arg name="network_param_file"         default="$(find darknet_ros)/config/yolov4.yaml"/>
 7 |   <arg name="image" default="camera/rgb/image_raw" />
 8 | 
 9 | 
10 |   <!-- Include main launch file -->
11 |   <include file="$(find darknet_ros)/launch/darknet_ros.launch">
12 |     <arg name="network_param_file"    value="$(arg network_param_file)"/>
13 |     <arg name="image" value="$(arg image)" />
14 |   </include>
15 | 
16 | </launch>
17 | 


--------------------------------------------------------------------------------
/darknet_ros/yolo_network_config/weights/how_to_download_weights.txt:
--------------------------------------------------------------------------------
 1 | cd catkin_workspace/src/darknet_ros/darknet_ros/yolo_network_config/weights/
 2 | 
 3 | COCO data set (Yolo v2):
 4 |   wget http://pjreddie.com/media/files/yolov2.weights
 5 |   wget http://pjreddie.com/media/files/yolov2-tiny.weights
 6 | 
 7 | VOC data set (Yolo v2):
 8 |   wget http://pjreddie.com/media/files/yolov2-voc.weights
 9 |   wget http://pjreddie.com/media/files/yolov2-tiny-voc.weights
10 | 
11 | Yolo v3:
12 |   wget http://pjreddie.com/media/files/yolov3.weights
13 |   wget http://pjreddie.com/media/files/yolov3-voc.weights
14 | 


--------------------------------------------------------------------------------
/darknet_ros/config/yolov2-voc.yaml:
--------------------------------------------------------------------------------
 1 | yolo_model:
 2 | 
 3 |   config_file:
 4 |     name: yolov2-voc.cfg
 5 |   weight_file:
 6 |     name: yolov2-voc.weights
 7 |   threshold:
 8 |     value: 0.3
 9 |   detection_classes:
10 |     names:
11 |       - aeroplane
12 |       - bicycle
13 |       - bird
14 |       - boat
15 |       - bottle
16 |       - bus
17 |       - car
18 |       - cat
19 |       - chair
20 |       - cow
21 |       - diningtable
22 |       - dog
23 |       - horse
24 |       - motorbike
25 |       - person
26 |       - pottedplant
27 |       - sheep
28 |       - sofa
29 |       - train
30 |       - tvmonitor
31 | 


--------------------------------------------------------------------------------
/darknet_ros/config/yolov3-voc.yaml:
--------------------------------------------------------------------------------
 1 | yolo_model:
 2 | 
 3 |   config_file:
 4 |     name: yolov3-voc.cfg
 5 |   weight_file:
 6 |     name: yolov3-voc.weights
 7 |   threshold:
 8 |     value: 0.3
 9 |   detection_classes:
10 |     names:
11 |       - aeroplane
12 |       - bicycle
13 |       - bird
14 |       - boat
15 |       - bottle
16 |       - bus
17 |       - car
18 |       - cat
19 |       - chair
20 |       - cow
21 |       - diningtable
22 |       - dog
23 |       - horse
24 |       - motorbike
25 |       - person
26 |       - pottedplant
27 |       - sheep
28 |       - sofa
29 |       - train
30 |       - tvmonitor
31 | 


--------------------------------------------------------------------------------
/darknet_ros/config/yolov2-tiny-voc.yaml:
--------------------------------------------------------------------------------
 1 | yolo_model:
 2 | 
 3 |   config_file:
 4 |     name: yolov2-tiny-voc.cfg
 5 |   weight_file:
 6 |     name: yolov2-tiny-voc.weights
 7 |   threshold:
 8 |     value: 0.3
 9 |   detection_classes:
10 |     names:
11 |       - aeroplane
12 |       - bicycle
13 |       - bird
14 |       - boat
15 |       - bottle
16 |       - bus
17 |       - car
18 |       - cat
19 |       - chair
20 |       - cow
21 |       - diningtable
22 |       - dog
23 |       - horse
24 |       - motorbike
25 |       - person
26 |       - pottedplant
27 |       - sheep
28 |       - sofa
29 |       - train
30 |       - tvmonitor
31 | 


--------------------------------------------------------------------------------
/darknet_ros/config/ros.yaml:
--------------------------------------------------------------------------------
 1 | subscribers:
 2 | 
 3 |   camera_reading:
 4 |     topic: /camera/rgb/image_raw
 5 |     queue_size: 1
 6 | 
 7 | actions:
 8 | 
 9 |   camera_reading:
10 |     name: /darknet_ros/check_for_objects
11 | 
12 | publishers:
13 | 
14 |   object_detector:
15 |     topic: /darknet_ros/found_object
16 |     queue_size: 1
17 |     latch: false
18 | 
19 |   bounding_boxes:
20 |     topic: /darknet_ros/bounding_boxes
21 |     queue_size: 1
22 |     latch: false
23 | 
24 |   detection_image:
25 |     topic: /darknet_ros/detection_image
26 |     queue_size: 1
27 |     latch: true
28 | 
29 | image_view:
30 | 
31 |   enable_opencv: true
32 |   wait_key_delay: 1
33 |   enable_console_output: true
34 | 


--------------------------------------------------------------------------------
/darknet_ros_msgs/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.12)
 2 | 
 3 | project(darknet_ros_msgs)
 4 | 
 5 | set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
 6 | 
 7 | find_package(catkin REQUIRED
 8 |   COMPONENTS
 9 |     actionlib_msgs
10 |     geometry_msgs
11 |     sensor_msgs
12 |     std_msgs
13 |     message_generation
14 | )
15 | 
16 | add_message_files(
17 |   FILES
18 |     BoundingBox.msg
19 |     BoundingBoxes.msg
20 |     ObjectCount.msg
21 | )
22 | 
23 | add_action_files(
24 |   FILES
25 |     CheckForObjects.action
26 | )
27 | 
28 | generate_messages(
29 |   DEPENDENCIES
30 |     actionlib_msgs
31 |     geometry_msgs
32 |     sensor_msgs
33 |     std_msgs
34 | )
35 | 
36 | catkin_package(
37 |   CATKIN_DEPENDS
38 |     actionlib_msgs
39 |     geometry_msgs
40 |     sensor_msgs
41 |     message_runtime
42 |     std_msgs
43 | )
44 | 


--------------------------------------------------------------------------------
/darknet_ros_msgs/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 2 | Changelog for package darknet_ros_msgs
 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 4 | 
 5 | 1.1.4 (2019-03-03)
 6 | ------------------
 7 | 
 8 | 1.1.3 (2018-04-26)
 9 | ------------------
10 | * Fixed formatting part 2.
11 | * Merge branch 'firephinx-master'
12 | * Merge branch 'master' of https://github.com/firephinx/darknet_ros into firephinx-master
13 | * Added rgb_image_header to BoundingBoxes msg.
14 | * Merge pull request `#57 <https://github.com/leggedrobotics/darknet_ros/issues/57>`_ from leggedrobotics/devel/threads
15 |   Devel/threads
16 | * Adapted package description.
17 | * Merge branch 'master' into devel/threads
18 | * Update package.xml
19 | * Contributors: Kevin Zhang, Marko Bjelonic
20 | 
21 | 1.1.2 (2018-01-06)
22 | ------------------
23 | * First release of darknet_ros_msgs.
24 | 


--------------------------------------------------------------------------------
/darknet_ros/test/object_detection.test:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | 
 3 | <launch>
 4 | 
 5 |   <!-- Config and weights folder. -->
 6 |   <arg name="yolo_weights_path"          default="$(find darknet_ros)/yolo_network_config/weights"/>
 7 |   <arg name="yolo_config_path"           default="$(find darknet_ros)/yolo_network_config/cfg"/>
 8 | 
 9 |   <!-- Load parameters -->
10 |   <rosparam command="load" ns="darknet_ros" file="$(find darknet_ros)/config/ros.yaml"/>
11 |   <rosparam command="load" ns="darknet_ros" file="$(find darknet_ros)/test/yolov2.yaml"/>
12 | 
13 |   <!-- Start darknet and ros wrapper -->
14 |   <node pkg="darknet_ros" type="darknet_ros" name="darknet_ros" output="screen">
15 |     <param name="weights_path"          value="$(arg yolo_weights_path)" />
16 |     <param name="config_path"           value="$(arg yolo_config_path)" />
17 |   </node>
18 | 
19 |   <test pkg="darknet_ros" test-name="darknet_ros_object_detection" type="darknet_ros_object_detection-test" time-limit="500.0"/>
20 | </launch>
21 | 


--------------------------------------------------------------------------------
/darknet_ros_msgs/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package>
 3 |   <name>darknet_ros_msgs</name>
 4 |   <version>1.1.4</version>
 5 |   <description>Darknet is an open source neural network framework that runs on CPU and GPU. You only look once (YOLO) is a state-of-the-art, real-time object detection system.</description>
 6 |   <maintainer email="marko.bjelonic@mavt.ethz.ch">Marko Bjelonic</maintainer>
 7 |   <license>BSD</license>
 8 |   <url type="website">https://github.com/leggedrobotics/darknet_ros</url>
 9 |   <author email="marko.bjelonic@mavt.ethz.ch">Marko Bjelonic</author>
10 | 
11 |   <buildtool_depend>catkin</buildtool_depend>
12 | 
13 |   <build_depend>actionlib_msgs</build_depend>
14 |   <build_depend>geometry_msgs</build_depend>
15 |   <build_depend>sensor_msgs</build_depend>
16 |   <build_depend>message_generation</build_depend>
17 |   <build_depend>std_msgs</build_depend>
18 | 
19 |   <run_depend>actionlib_msgs</run_depend>
20 |   <run_depend>geometry_msgs</run_depend>
21 |   <run_depend>sensor_msgs</run_depend>
22 |   <run_depend>message_runtime</run_depend>
23 |   <run_depend>std_msgs</run_depend>
24 | </package>
25 | 


--------------------------------------------------------------------------------
/darknet_ros/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package format="2">
 3 |   <name>darknet_ros</name>
 4 |   <version>1.1.4</version>
 5 |   <description>Darknet is an open source neural network framework that runs on CPU and GPU. You only look once (YOLO) is a state-of-the-art, real-time object detection system.</description>
 6 |   <maintainer email="marko.bjelonic@mavt.ethz.ch">Marko Bjelonic</maintainer>
 7 |   <license>BSD</license>
 8 |   <url type="website">https://github.com/leggedrobotics/darknet_ros</url>
 9 |   <author email="marko.bjelonic@mavt.ethz.ch">Marko Bjelonic</author>
10 | 
11 |   <buildtool_depend>catkin</buildtool_depend>
12 |   <depend>boost</depend>
13 |   <depend>libopencv-dev</depend>
14 |   <depend>libx11</depend>
15 |   <depend>libxt-dev</depend>
16 |   <depend>libxext</depend>
17 | 
18 |   <depend>roscpp</depend>
19 |   <depend>rospy</depend>
20 |   <depend>std_msgs</depend>
21 |   <depend>image_transport</depend>
22 |   <depend>cv_bridge</depend>
23 |   <depend>sensor_msgs</depend>
24 |   <depend>message_generation</depend>
25 |   <depend>darknet_ros_msgs</depend>
26 |   <depend>actionlib</depend>
27 | 
28 |   <!-- Test dependencies -->
29 |   <test_depend>rostest</test_depend>
30 |   <test_depend>wget</test_depend>
31 | </package>
32 | 


--------------------------------------------------------------------------------
/darknet_ros/src/image_interface.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * image_interface.c
 3 |  *
 4 |  *  Created on: Dec 19, 2016
 5 |  *      Author: Marko Bjelonic
 6 |  *   Institute: ETH Zurich, Robotic Systems Lab
 7 |  */
 8 | 
 9 | #include "darknet_ros/image_interface.h"
10 | 
11 | static float get_pixel(image m, int x, int y, int c) {
12 |   assert(x < m.w && y < m.h && c < m.c);
13 |   return m.data[c * m.h * m.w + y * m.w + x];
14 | }
15 | 
16 | image** load_alphabet_with_file(char* datafile) {
17 |   int i, j;
18 |   const int nsize = 8;
19 |   image** alphabets = calloc(nsize, sizeof(image));
20 |   char* labels = "/labels/%d_%d.png";
21 |   char* files = (char*)malloc(1 + strlen(datafile) + strlen(labels));
22 |   strcpy(files, datafile);
23 |   strcat(files, labels);
24 |   for (j = 0; j < nsize; ++j) {
25 |     alphabets[j] = calloc(128, sizeof(image));
26 |     for (i = 32; i < 127; ++i) {
27 |       char buff[256];
28 |       sprintf(buff, files, i, j);
29 |       alphabets[j][i] = load_image_color(buff, 0, 0);
30 |     }
31 |   }
32 |   return alphabets;
33 | }
34 | 
35 | #ifdef OPENCV
36 | void generate_image(image p, IplImage* disp) {
37 |   int x, y, k;
38 |   if (p.c == 3) rgbgr_image(p);
39 |   // normalize_image(copy);
40 | 
41 |   int step = disp->widthStep;
42 |   for (y = 0; y < p.h; ++y) {
43 |     for (x = 0; x < p.w; ++x) {
44 |       for (k = 0; k < p.c; ++k) {
45 |         disp->imageData[y * step + x * p.c + k] = (unsigned char)(get_pixel(p, x, y, k) * 255);
46 |       }
47 |     }
48 |   }
49 | }
50 | #endif
51 | 


--------------------------------------------------------------------------------
/darknet_ros/launch/darknet_ros.launch:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | 
 3 | <launch>
 4 |   <!-- Console launch prefix -->
 5 |   <arg name="launch_prefix" default=""/>
 6 |   <arg name="image" default="/camera/rgb/image_raw" />
 7 | 
 8 |   <!-- Config and weights folder. -->
 9 |   <arg name="yolo_weights_path"          default="$(find darknet_ros)/yolo_network_config/weights"/>
10 |   <arg name="yolo_config_path"           default="$(find darknet_ros)/yolo_network_config/cfg"/>
11 | 
12 |   <!-- ROS and network parameter files -->
13 |   <arg name="ros_param_file"             default="$(find darknet_ros)/config/ros.yaml"/>
14 |   <arg name="network_param_file"         default="$(find darknet_ros)/config/yolov2-tiny.yaml"/>
15 | 
16 |   <!-- Load parameters -->
17 |   <rosparam command="load" ns="darknet_ros" file="$(arg ros_param_file)"/>
18 |   <rosparam command="load" ns="darknet_ros" file="$(arg network_param_file)"/>
19 | 
20 |   <!-- Start darknet and ros wrapper -->
21 |   <node pkg="darknet_ros" type="darknet_ros" name="darknet_ros" output="screen" launch-prefix="$(arg launch_prefix)">
22 |     <param name="weights_path"          value="$(arg yolo_weights_path)" />
23 |     <param name="config_path"           value="$(arg yolo_config_path)" />
24 |     <remap from="camera/rgb/image_raw"  to="$(arg image)" />
25 |   </node>
26 | 
27 |  <!--<node name="republish" type="republish" pkg="image_transport" output="screen" 	args="compressed in:=/front_camera/image_raw raw out:=/camera/image_raw" /> -->
28 | </launch>
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017, Marko Bjelonic, Robotic Systems Lab, ETH Zurich
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the copyright holder nor the names of its
12 |       contributors may be used to endorse or promote products derived
13 |       from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/darknet_ros/config/yolov2.yaml:
--------------------------------------------------------------------------------
 1 | yolo_model:
 2 | 
 3 |   config_file:
 4 |     name: yolov2.cfg
 5 |   weight_file:
 6 |     name: yolov2.weights
 7 |   threshold:
 8 |     value: 0.3
 9 |   detection_classes:
10 |     names:
11 |       - person
12 |       - bicycle
13 |       - car
14 |       - motorbike
15 |       - aeroplane
16 |       - bus
17 |       - train
18 |       - truck
19 |       - boat
20 |       - traffic light
21 |       - fire hydrant
22 |       - stop sign
23 |       - parking meter
24 |       - bench
25 |       - bird
26 |       - cat
27 |       - dog
28 |       - horse
29 |       - sheep
30 |       - cow
31 |       - elephant
32 |       - bear
33 |       - zebra
34 |       - giraffe
35 |       - backpack
36 |       - umbrella
37 |       - handbag
38 |       - tie
39 |       - suitcase
40 |       - frisbee
41 |       - skis
42 |       - snowboard
43 |       - sports ball
44 |       - kite
45 |       - baseball bat
46 |       - baseball glove
47 |       - skateboard
48 |       - surfboard
49 |       - tennis racket
50 |       - bottle
51 |       - wine glass
52 |       - cup
53 |       - fork
54 |       - knife
55 |       - spoon
56 |       - bowl
57 |       - banana
58 |       - apple
59 |       - sandwich
60 |       - orange
61 |       - broccoli
62 |       - carrot
63 |       - hot dog
64 |       - pizza
65 |       - donut
66 |       - cake
67 |       - chair
68 |       - sofa
69 |       - pottedplant
70 |       - bed
71 |       - diningtable
72 |       - toilet
73 |       - tvmonitor
74 |       - laptop
75 |       - mouse
76 |       - remote
77 |       - keyboard
78 |       - cell phone
79 |       - microwave
80 |       - oven
81 |       - toaster
82 |       - sink
83 |       - refrigerator
84 |       - book
85 |       - clock
86 |       - vase
87 |       - scissors
88 |       - teddy bear
89 |       - hair drier
90 |       - toothbrush
91 | 


--------------------------------------------------------------------------------
/darknet_ros/config/yolov3.yaml:
--------------------------------------------------------------------------------
 1 | yolo_model:
 2 | 
 3 |   config_file:
 4 |     name: yolov3.cfg
 5 |   weight_file:
 6 |     name: yolov3.weights
 7 |   threshold:
 8 |     value: 0.3
 9 |   detection_classes:
10 |     names:
11 |       - person
12 |       - bicycle
13 |       - car
14 |       - motorbike
15 |       - aeroplane
16 |       - bus
17 |       - train
18 |       - truck
19 |       - boat
20 |       - traffic light
21 |       - fire hydrant
22 |       - stop sign
23 |       - parking meter
24 |       - bench
25 |       - bird
26 |       - cat
27 |       - dog
28 |       - horse
29 |       - sheep
30 |       - cow
31 |       - elephant
32 |       - bear
33 |       - zebra
34 |       - giraffe
35 |       - backpack
36 |       - umbrella
37 |       - handbag
38 |       - tie
39 |       - suitcase
40 |       - frisbee
41 |       - skis
42 |       - snowboard
43 |       - sports ball
44 |       - kite
45 |       - baseball bat
46 |       - baseball glove
47 |       - skateboard
48 |       - surfboard
49 |       - tennis racket
50 |       - bottle
51 |       - wine glass
52 |       - cup
53 |       - fork
54 |       - knife
55 |       - spoon
56 |       - bowl
57 |       - banana
58 |       - apple
59 |       - sandwich
60 |       - orange
61 |       - broccoli
62 |       - carrot
63 |       - hot dog
64 |       - pizza
65 |       - donut
66 |       - cake
67 |       - chair
68 |       - sofa
69 |       - pottedplant
70 |       - bed
71 |       - diningtable
72 |       - toilet
73 |       - tvmonitor
74 |       - laptop
75 |       - mouse
76 |       - remote
77 |       - keyboard
78 |       - cell phone
79 |       - microwave
80 |       - oven
81 |       - toaster
82 |       - sink
83 |       - refrigerator
84 |       - book
85 |       - clock
86 |       - vase
87 |       - scissors
88 |       - teddy bear
89 |       - hair drier
90 |       - toothbrush
91 | 


--------------------------------------------------------------------------------
/darknet_ros/config/yolov4.yaml:
--------------------------------------------------------------------------------
 1 | yolo_model:
 2 | 
 3 |   config_file:
 4 |     name: yolov4.cfg
 5 |   weight_file:
 6 |     name: yolov4.weights
 7 |   threshold:
 8 |     value: 0.3
 9 |   detection_classes:
10 |     names:
11 |       - person
12 |       - bicycle
13 |       - car
14 |       - motorbike
15 |       - aeroplane
16 |       - bus
17 |       - train
18 |       - truck
19 |       - boat
20 |       - traffic light
21 |       - fire hydrant
22 |       - stop sign
23 |       - parking meter
24 |       - bench
25 |       - bird
26 |       - cat
27 |       - dog
28 |       - horse
29 |       - sheep
30 |       - cow
31 |       - elephant
32 |       - bear
33 |       - zebra
34 |       - giraffe
35 |       - backpack
36 |       - umbrella
37 |       - handbag
38 |       - tie
39 |       - suitcase
40 |       - frisbee
41 |       - skis
42 |       - snowboard
43 |       - sports ball
44 |       - kite
45 |       - baseball bat
46 |       - baseball glove
47 |       - skateboard
48 |       - surfboard
49 |       - tennis racket
50 |       - bottle
51 |       - wine glass
52 |       - cup
53 |       - fork
54 |       - knife
55 |       - spoon
56 |       - bowl
57 |       - banana
58 |       - apple
59 |       - sandwich
60 |       - orange
61 |       - broccoli
62 |       - carrot
63 |       - hot dog
64 |       - pizza
65 |       - donut
66 |       - cake
67 |       - chair
68 |       - sofa
69 |       - pottedplant
70 |       - bed
71 |       - diningtable
72 |       - toilet
73 |       - tvmonitor
74 |       - laptop
75 |       - mouse
76 |       - remote
77 |       - keyboard
78 |       - cell phone
79 |       - microwave
80 |       - oven
81 |       - toaster
82 |       - sink
83 |       - refrigerator
84 |       - book
85 |       - clock
86 |       - vase
87 |       - scissors
88 |       - teddy bear
89 |       - hair drier
90 |       - toothbrush
91 | 


--------------------------------------------------------------------------------
/darknet_ros/config/yolov2-tiny.yaml:
--------------------------------------------------------------------------------
 1 | yolo_model:
 2 | 
 3 |   config_file:
 4 |     name: yolov2-tiny.cfg
 5 |   weight_file:
 6 |     name: yolov2-tiny.weights
 7 |   threshold:
 8 |     value: 0.3
 9 |   detection_classes:
10 |     names:
11 |       - person
12 |       - bicycle
13 |       - car
14 |       - motorbike
15 |       - aeroplane
16 |       - bus
17 |       - train
18 |       - truck
19 |       - boat
20 |       - traffic light
21 |       - fire hydrant
22 |       - stop sign
23 |       - parking meter
24 |       - bench
25 |       - bird
26 |       - cat
27 |       - dog
28 |       - horse
29 |       - sheep
30 |       - cow
31 |       - elephant
32 |       - bear
33 |       - zebra
34 |       - giraffe
35 |       - backpack
36 |       - umbrella
37 |       - handbag
38 |       - tie
39 |       - suitcase
40 |       - frisbee
41 |       - skis
42 |       - snowboard
43 |       - sports ball
44 |       - kite
45 |       - baseball bat
46 |       - baseball glove
47 |       - skateboard
48 |       - surfboard
49 |       - tennis racket
50 |       - bottle
51 |       - wine glass
52 |       - cup
53 |       - fork
54 |       - knife
55 |       - spoon
56 |       - bowl
57 |       - banana
58 |       - apple
59 |       - sandwich
60 |       - orange
61 |       - broccoli
62 |       - carrot
63 |       - hot dog
64 |       - pizza
65 |       - donut
66 |       - cake
67 |       - chair
68 |       - sofa
69 |       - pottedplant
70 |       - bed
71 |       - diningtable
72 |       - toilet
73 |       - tvmonitor
74 |       - laptop
75 |       - mouse
76 |       - remote
77 |       - keyboard
78 |       - cell phone
79 |       - microwave
80 |       - oven
81 |       - toaster
82 |       - sink
83 |       - refrigerator
84 |       - book
85 |       - clock
86 |       - vase
87 |       - scissors
88 |       - teddy bear
89 |       - hair drier
90 |       - toothbrush
91 | 


--------------------------------------------------------------------------------
/darknet_ros/test/yolov2.yaml:
--------------------------------------------------------------------------------
 1 | image_view:
 2 | 
 3 |   enable_opencv: true
 4 |   wait_key_delay: 600
 5 | 
 6 | yolo_model:
 7 | 
 8 |   config_file:
 9 |     name: yolov2.cfg
10 |   weight_file:
11 |     name: yolov2.weights
12 |   threshold:
13 |     value: 0.5
14 |   detection_classes:
15 |     names:
16 |       - person
17 |       - bicycle
18 |       - car
19 |       - motorbike
20 |       - aeroplane
21 |       - bus
22 |       - train
23 |       - truck
24 |       - boat
25 |       - traffic light
26 |       - fire hydrant
27 |       - stop sign
28 |       - parking meter
29 |       - bench
30 |       - bird
31 |       - cat
32 |       - dog
33 |       - horse
34 |       - sheep
35 |       - cow
36 |       - elephant
37 |       - bear
38 |       - zebra
39 |       - giraffe
40 |       - backpack
41 |       - umbrella
42 |       - handbag
43 |       - tie
44 |       - suitcase
45 |       - frisbee
46 |       - skis
47 |       - snowboard
48 |       - sports ball
49 |       - kite
50 |       - baseball bat
51 |       - baseball glove
52 |       - skateboard
53 |       - surfboard
54 |       - tennis racket
55 |       - bottle
56 |       - wine glass
57 |       - cup
58 |       - fork
59 |       - knife
60 |       - spoon
61 |       - bowl
62 |       - banana
63 |       - apple
64 |       - sandwich
65 |       - orange
66 |       - broccoli
67 |       - carrot
68 |       - hot dog
69 |       - pizza
70 |       - donut
71 |       - cake
72 |       - chair
73 |       - sofa
74 |       - pottedplant
75 |       - bed
76 |       - diningtable
77 |       - toilet
78 |       - tvmonitor
79 |       - laptop
80 |       - mouse
81 |       - remote
82 |       - keyboard
83 |       - cell phone
84 |       - microwave
85 |       - oven
86 |       - toaster
87 |       - sink
88 |       - refrigerator
89 |       - book
90 |       - clock
91 |       - vase
92 |       - scissors
93 |       - teddy bear
94 |       - hair drier
95 |       - toothbrush
96 | 


--------------------------------------------------------------------------------
/darknet_ros/yolo_network_config/cfg/yolov2-tiny-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | max_batches = 40200
 20 | policy=steps
 21 | steps=-1,100,20000,30000
 22 | scales=.1,10,.1,.1
 23 | 
 24 | [convolutional]
 25 | batch_normalize=1
 26 | filters=16
 27 | size=3
 28 | stride=1
 29 | pad=1
 30 | activation=leaky
 31 | 
 32 | [maxpool]
 33 | size=2
 34 | stride=2
 35 | 
 36 | [convolutional]
 37 | batch_normalize=1
 38 | filters=32
 39 | size=3
 40 | stride=1
 41 | pad=1
 42 | activation=leaky
 43 | 
 44 | [maxpool]
 45 | size=2
 46 | stride=2
 47 | 
 48 | [convolutional]
 49 | batch_normalize=1
 50 | filters=64
 51 | size=3
 52 | stride=1
 53 | pad=1
 54 | activation=leaky
 55 | 
 56 | [maxpool]
 57 | size=2
 58 | stride=2
 59 | 
 60 | [convolutional]
 61 | batch_normalize=1
 62 | filters=128
 63 | size=3
 64 | stride=1
 65 | pad=1
 66 | activation=leaky
 67 | 
 68 | [maxpool]
 69 | size=2
 70 | stride=2
 71 | 
 72 | [convolutional]
 73 | batch_normalize=1
 74 | filters=256
 75 | size=3
 76 | stride=1
 77 | pad=1
 78 | activation=leaky
 79 | 
 80 | [maxpool]
 81 | size=2
 82 | stride=2
 83 | 
 84 | [convolutional]
 85 | batch_normalize=1
 86 | filters=512
 87 | size=3
 88 | stride=1
 89 | pad=1
 90 | activation=leaky
 91 | 
 92 | [maxpool]
 93 | size=2
 94 | stride=1
 95 | 
 96 | [convolutional]
 97 | batch_normalize=1
 98 | filters=1024
 99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 | 
104 | ###########
105 | 
106 | [convolutional]
107 | batch_normalize=1
108 | size=3
109 | stride=1
110 | pad=1
111 | filters=1024
112 | activation=leaky
113 | 
114 | [convolutional]
115 | size=1
116 | stride=1
117 | pad=1
118 | filters=125
119 | activation=linear
120 | 
121 | [region]
122 | anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
123 | bias_match=1
124 | classes=20
125 | coords=4
126 | num=5
127 | softmax=1
128 | jitter=.2
129 | rescore=1
130 | 
131 | object_scale=5
132 | noobject_scale=1
133 | class_scale=1
134 | coord_scale=1
135 | 
136 | absolute=1
137 | thresh = .6
138 | random=1
139 | 


--------------------------------------------------------------------------------
/darknet_ros/yolo_network_config/cfg/yolov2-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | size=3
110 | stride=1
111 | pad=1
112 | filters=512
113 | activation=leaky
114 | 
115 | [convolutional]
116 | size=1
117 | stride=1
118 | pad=1
119 | filters=425
120 | activation=linear
121 | 
122 | [region]
123 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
124 | bias_match=1
125 | classes=80
126 | coords=4
127 | num=5
128 | softmax=1
129 | jitter=.2
130 | rescore=0
131 | 
132 | object_scale=5
133 | noobject_scale=1
134 | class_scale=1
135 | coord_scale=1
136 | 
137 | absolute=1
138 | thresh = .6
139 | random=1
140 | 


--------------------------------------------------------------------------------
/darknet_ros/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 2 | Changelog for package darknet_ros
 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 4 | 
 5 | 1.1.4 (2019-03-03)
 6 | ------------------
 7 | * Merge pull request `#141 <https://github.com/leggedrobotics/darknet_ros/issues/141>`_ from lorenwel/feature/launch_file_arg
 8 |   Added arg for launch file parameter files
 9 | * Fixed synatx error
10 | * Removed unnecessary args
11 | * Adapted yolo_v3.launch to new launch file
12 | * Added launch file arguments for parameter files
13 | * Merge branch 'Texas-Aerial-Robotics-headerFixForUpsteam'
14 | * Merge branch 'headerFixForUpsteam' of https://github.com/Texas-Aerial-Robotics/darknet_ros into Texas-Aerial-Robotics-headerFixForUpsteam
15 | * Remove unused variable
16 | * Merge branch 'headerFixForUpsteam' of https://github.com/Texas-Aerial-Robotics/darknet_ros into Texas-Aerial-Robotics-headerFixForUpsteam
17 | * Multithreading mismatched image header fix
18 | * Forgot to add image.
19 | * Cropped test image.
20 | * Changed image for test.
21 | * Changed resame image.
22 | * Added new images for test.
23 | * Removed twice loading of weightfile.
24 | * Contributors: Lorenz Wellhausen, Marko Bjelonic, Umer Salman, lorenwel
25 | 
26 | 1.1.3 (2018-04-26)
27 | ------------------
28 | * Fixed iteration through detection boxes.
29 | * Merge pull request `#80 <https://github.com/leggedrobotics/darknet_ros/issues/80>`_ from leggedrobotics/feature/yolo3
30 |   Feature/yolo3
31 | * Fixed publishers.
32 | * Applied first changes for yolo v3.
33 | * Updated darknet and added launch files for yolov3.
34 | * Merge pull request `#73 <https://github.com/leggedrobotics/darknet_ros/issues/73>`_ from leggedrobotics/fix/weights
35 |   Fix/weights
36 | * Fixed weights.
37 | * Fix test.
38 | * Fixed formatting part 2.
39 | * Fixed naming.
40 | * Merge branch 'firephinx-master'
41 | * Merge branch 'master' of https://github.com/firephinx/darknet_ros into firephinx-master
42 | * Merge pull request `#62 <https://github.com/leggedrobotics/darknet_ros/issues/62>`_ from warp1337/master
43 |   Reduced window size to reasonable values
44 | * Reduced window size to reasonable values
45 | * Added rgb_image_header to BoundingBoxes msg.
46 | * Updated to the latest darknet version.
47 | * Merge pull request `#57 <https://github.com/leggedrobotics/darknet_ros/issues/57>`_ from leggedrobotics/devel/threads
48 |   Devel/threads
49 | * Rearranged.
50 | * Fixed action with new threads.
51 | * Adapted package description.
52 | * Added publisher.
53 | * Merge branch 'master' into devel/threads
54 | * Rearranged code.
55 | * Update package.xml
56 | * Fixed image_view if x11 is not running.
57 | * COmment runYolo().
58 | * Update object_detector_demo.cpp
59 | * Changed ros config.
60 | * Node is shutting down properly.
61 | * Rearranged code and added threads.
62 | * Contributors: Kevin Zhang, Marko Bjelonic, fl
63 | 
64 | 1.1.2 (2018-01-06)
65 | ------------------
66 | * First release of darknet_ros.
67 | 


--------------------------------------------------------------------------------
/darknet_ros/yolo_network_config/cfg/yolov2-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | height=416
  9 | width=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 80200
 21 | policy=steps
 22 | steps=40000,60000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=125
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
243 | bias_match=1
244 | classes=20
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/darknet_ros/yolo_network_config/cfg/yolov2.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/darknet_ros/include/darknet_ros/YoloObjectDetector.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * YoloObjectDetector.h
  3 |  *
  4 |  *  Created on: Dec 19, 2016
  5 |  *      Author: Marko Bjelonic
  6 |  *   Institute: ETH Zurich, Robotic Systems Lab
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | // c++
 12 | #include <pthread.h>
 13 | #include <chrono>
 14 | #include <cmath>
 15 | #include <iostream>
 16 | #include <string>
 17 | #include <thread>
 18 | #include <vector>
 19 | 
 20 | // ROS
 21 | #include <actionlib/server/simple_action_server.h>
 22 | #include <geometry_msgs/Point.h>
 23 | #include <image_transport/image_transport.h>
 24 | #include <ros/ros.h>
 25 | #include <sensor_msgs/Image.h>
 26 | #include <sensor_msgs/image_encodings.h>
 27 | #include <std_msgs/Header.h>
 28 | 
 29 | // OpenCv
 30 | #include <cv_bridge/cv_bridge.h>
 31 | #include <opencv2/highgui/highgui.hpp>
 32 | #include <opencv2/imgproc/imgproc.hpp>
 33 | #include <opencv2/objdetect/objdetect.hpp>
 34 | 
 35 | // darknet_ros_msgs
 36 | #include <darknet_ros_msgs/BoundingBox.h>
 37 | #include <darknet_ros_msgs/BoundingBoxes.h>
 38 | #include <darknet_ros_msgs/CheckForObjectsAction.h>
 39 | #include <darknet_ros_msgs/ObjectCount.h>
 40 | #include "../../../darknet/src/blas.h"
 41 | 
 42 | 
 43 | // Darknet.
 44 | #ifdef GPU
 45 | #include "cublas_v2.h"
 46 | #include "cuda_runtime.h"
 47 | #include "curand.h"
 48 | #endif
 49 | 
 50 | extern "C" {
 51 | #include <sys/time.h>
 52 | #include "box.h"
 53 | #include "cost_layer.h"
 54 | #include "darknet_ros/image_interface.h"
 55 | #include "detection_layer.h"
 56 | #include "network.h"
 57 | #include "parser.h"
 58 | #include "region_layer.h"
 59 | #include "utils.h"
 60 | }
 61 | 
 62 | extern "C" void ipl_into_image(IplImage* src, image im);
 63 | extern "C" image ipl_to_image(IplImage* src);
 64 | // extern "C" void show_image_cv(image p, const char* name, IplImage* disp);
 65 | 
 66 | namespace darknet_ros {
 67 | 
 68 | //! Bounding box of the detected object.
 69 | typedef struct {
 70 |   float x, y, w, h, prob;
 71 |   int num, Class;
 72 | } RosBox_;
 73 | 
 74 | typedef struct {
 75 |   IplImage* image;
 76 |   std_msgs::Header header;
 77 | } IplImageWithHeader_;
 78 | 
 79 | class YoloObjectDetector {
 80 |  public:
 81 |   /*!
 82 |    * Constructor.
 83 |    */
 84 |   explicit YoloObjectDetector(ros::NodeHandle nh);
 85 | 
 86 |   /*!
 87 |    * Destructor.
 88 |    */
 89 |   ~YoloObjectDetector();
 90 | 
 91 |  private:
 92 |   /*!
 93 |    * Reads and verifies the ROS parameters.
 94 |    * @return true if successful.
 95 |    */
 96 |   bool readParameters();
 97 | 
 98 |   /*!
 99 |    * Initialize the ROS connections.
100 |    */
101 |   void init();
102 | 
103 |   /*!
104 |    * Callback of camera.
105 |    * @param[in] msg image pointer.
106 |    */
107 |   void cameraCallback(const sensor_msgs::ImageConstPtr& msg);
108 | 
109 |   /*!
110 |    * Check for objects action goal callback.
111 |    */
112 |   void checkForObjectsActionGoalCB();
113 | 
114 |   /*!
115 |    * Check for objects action preempt callback.
116 |    */
117 |   void checkForObjectsActionPreemptCB();
118 | 
119 |   /*!
120 |    * Check if a preempt for the check for objects action has been requested.
121 |    * @return false if preempt has been requested or inactive.
122 |    */
123 |   bool isCheckingForObjects() const;
124 | 
125 |   /*!
126 |    * Publishes the detection image.
127 |    * @return true if successful.
128 |    */
129 |   bool publishDetectionImage(const cv::Mat& detectionImage);
130 | 
131 |   //! Using.
132 |   using CheckForObjectsActionServer = actionlib::SimpleActionServer<darknet_ros_msgs::CheckForObjectsAction>;
133 |   using CheckForObjectsActionServerPtr = std::shared_ptr<CheckForObjectsActionServer>;
134 | 
135 |   //! ROS node handle.
136 |   ros::NodeHandle nodeHandle_;
137 | 
138 |   //! Class labels.
139 |   int numClasses_;
140 |   std::vector<std::string> classLabels_;
141 | 
142 |   //! Check for objects action server.
143 |   CheckForObjectsActionServerPtr checkForObjectsActionServer_;
144 | 
145 |   //! Advertise and subscribe to image topics.
146 |   image_transport::ImageTransport imageTransport_;
147 | 
148 |   //! ROS subscriber and publisher.
149 |   image_transport::Subscriber imageSubscriber_;
150 |   ros::Publisher objectPublisher_;
151 |   ros::Publisher boundingBoxesPublisher_;
152 | 
153 |   //! Detected objects.
154 |   std::vector<std::vector<RosBox_> > rosBoxes_;
155 |   std::vector<int> rosBoxCounter_;
156 |   darknet_ros_msgs::BoundingBoxes boundingBoxesResults_;
157 | 
158 |   //! Camera related parameters.
159 |   int frameWidth_;
160 |   int frameHeight_;
161 | 
162 |   //! Publisher of the bounding box image.
163 |   ros::Publisher detectionImagePublisher_;
164 | 
165 |   // Yolo running on thread.
166 |   std::thread yoloThread_;
167 | 
168 |   // Darknet.
169 |   char** demoNames_;
170 |   image** demoAlphabet_;
171 |   int demoClasses_;
172 | 
173 |   network* net_;
174 |   std_msgs::Header headerBuff_[3];
175 |   image buff_[3];
176 |   image buffLetter_[3];
177 |   int buffId_[3];
178 |   int buffIndex_ = 0;
179 |   IplImage* ipl_;
180 |   float fps_ = 0;
181 |   float demoThresh_ = 0;
182 |   float demoHier_ = .5;
183 |   int running_ = 0;
184 | 
185 |   int demoDelay_ = 0;
186 |   int demoFrame_ = 3;
187 |   float** predictions_;
188 |   int demoIndex_ = 0;
189 |   int demoDone_ = 0;
190 |   float* lastAvg2_;
191 |   float* lastAvg_;
192 |   float* avg_;
193 |   int demoTotal_ = 0;
194 |   double demoTime_;
195 | 
196 |   RosBox_* roiBoxes_;
197 |   bool viewImage_;
198 |   bool enableConsoleOutput_;
199 |   int waitKeyDelay_;
200 |   int fullScreen_;
201 |   char* demoPrefix_;
202 | 
203 |   std_msgs::Header imageHeader_;
204 |   cv::Mat camImageCopy_;
205 |   boost::shared_mutex mutexImageCallback_;
206 | 
207 |   bool imageStatus_ = false;
208 |   boost::shared_mutex mutexImageStatus_;
209 | 
210 |   bool isNodeRunning_ = true;
211 |   boost::shared_mutex mutexNodeStatus_;
212 | 
213 |   int actionId_;
214 |   boost::shared_mutex mutexActionStatus_;
215 | 
216 |   // double getWallTime();
217 | 
218 |   int sizeNetwork(network* net);
219 | 
220 |   void rememberNetwork(network* net);
221 | 
222 |   detection* avgPredictions(network* net, int* nboxes);
223 | 
224 |   void* detectInThread();
225 | 
226 |   void* fetchInThread();
227 | 
228 |   void* displayInThread(void* ptr);
229 | 
230 |   void* displayLoop(void* ptr);
231 | 
232 |   void* detectLoop(void* ptr);
233 | 
234 |   void setupNetwork(char* cfgfile, char* weightfile, char* datafile, float thresh, char** names, int classes, int delay, char* prefix,
235 |                     int avg_frames, float hier, int w, int h, int frames, int fullscreen);
236 | 
237 |   void yolo();
238 | 
239 |   IplImageWithHeader_ getIplImageWithHeader();
240 | 
241 |   bool getImageStatus(void);
242 | 
243 |   bool isNodeRunning(void);
244 | 
245 |   void* publishInThread();
246 | };
247 | 
248 | } /* namespace darknet_ros*/
249 | 


--------------------------------------------------------------------------------
/darknet_ros/test/ObjectDetection.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * ObjectDetection.cpp
  3 |  *
  4 |  *  Created on: Jan 07, 2017
  5 |  *      Author: Marko Bjelonic
  6 |  *	 Institute: ETH Zurich, Robotic Systems Lab
  7 |  */
  8 | 
  9 | // Google Test
 10 | #include <gtest/gtest.h>
 11 | 
 12 | // ROS
 13 | #include <actionlib/client/simple_action_client.h>
 14 | #include <ros/package.h>
 15 | #include <ros/ros.h>
 16 | #include <sensor_msgs/Image.h>
 17 | 
 18 | // boost
 19 | #include <boost/thread.hpp>
 20 | 
 21 | // OpenCV2.
 22 | #include <cv_bridge/cv_bridge.h>
 23 | #include <opencv2/core/core.hpp>
 24 | #include <opencv2/highgui/highgui.hpp>
 25 | #include <opencv2/imgproc/imgproc.hpp>
 26 | 
 27 | // Actions.
 28 | #include <darknet_ros_msgs/CheckForObjectsAction.h>
 29 | 
 30 | using CheckForObjectsActionClient = actionlib::SimpleActionClient<darknet_ros_msgs::CheckForObjectsAction>;
 31 | using CheckForObjectsActionClientPtr = std::shared_ptr<CheckForObjectsActionClient>;
 32 | 
 33 | // c++
 34 | #include <cmath>
 35 | #include <string>
 36 | 
 37 | #ifdef DARKNET_FILE_PATH
 38 | std::string darknetFilePath_ = DARKNET_FILE_PATH;
 39 | #else
 40 | #error Path of darknet repository is not defined in CMakeLists.txt.
 41 | #endif
 42 | 
 43 | darknet_ros_msgs::BoundingBoxes boundingBoxesResults_;
 44 | 
 45 | /*!
 46 |  * Done-callback for CheckForObjects action client.
 47 |  * @param[in] state
 48 |  * @param[in] result
 49 |  */
 50 | void checkForObjectsResultCB(const actionlib::SimpleClientGoalState& state, const darknet_ros_msgs::CheckForObjectsResultConstPtr& result) {
 51 |   std::cout << "[ObjectDetectionTest] Received bounding boxes." << std::endl;
 52 | 
 53 |   boundingBoxesResults_ = result->bounding_boxes;
 54 | }
 55 | 
 56 | bool sendImageToYolo(ros::NodeHandle nh, const std::string& pathToTestImage) {
 57 |   //! Check for objects action client.
 58 |   CheckForObjectsActionClientPtr checkForObjectsActionClient;
 59 | 
 60 |   // Action clients.
 61 |   std::string checkForObjectsActionName;
 62 |   nh.param("/darknet_ros/camera_action", checkForObjectsActionName, std::string("/darknet_ros/check_for_objects"));
 63 |   checkForObjectsActionClient.reset(new CheckForObjectsActionClient(nh, checkForObjectsActionName, true));
 64 | 
 65 |   // Wait till action server launches.
 66 |   if (!checkForObjectsActionClient->waitForServer(ros::Duration(20.0))) {
 67 |     std::cout << "[ObjectDetectionTest] sendImageToYolo(): checkForObjects action server has not been advertised." << std::endl;
 68 |     return false;
 69 |   }
 70 | 
 71 |   // Get test image
 72 |   cv_bridge::CvImagePtr cv_ptr(new cv_bridge::CvImage);
 73 |   cv_ptr->image = cv::imread(pathToTestImage, CV_LOAD_IMAGE_COLOR);
 74 |   cv_ptr->encoding = sensor_msgs::image_encodings::RGB8;
 75 |   sensor_msgs::ImagePtr image = cv_ptr->toImageMsg();
 76 | 
 77 |   // Generate goal.
 78 |   darknet_ros_msgs::CheckForObjectsGoal goal;
 79 |   goal.image = *image;
 80 | 
 81 |   // Send goal.
 82 |   ros::Time beginYolo = ros::Time::now();
 83 |   checkForObjectsActionClient->sendGoal(goal, boost::bind(&checkForObjectsResultCB, _1, _2),
 84 |                                         CheckForObjectsActionClient::SimpleActiveCallback(),
 85 |                                         CheckForObjectsActionClient::SimpleFeedbackCallback());
 86 | 
 87 |   if (!checkForObjectsActionClient->waitForResult(ros::Duration(100.0))) {
 88 |     std::cout << "[ObjectDetectionTest] sendImageToYolo(): checkForObjects action server took to long to send back result." << std::endl;
 89 |     return false;
 90 |   }
 91 |   ros::Time endYolo = ros::Time::now();
 92 |   std::cout << "[ObjectDetectionTest] Object detection for one image took " << endYolo - beginYolo << " seconds." << std::endl;
 93 |   return true;
 94 | }
 95 | 
 96 | TEST(ObjectDetection, DISABLED_DetectDog) {
 97 |   srand(static_cast<unsigned int>(time(nullptr)));
 98 |   ros::NodeHandle nodeHandle("~");
 99 | 
100 |   // Path to test image.
101 |   std::string pathToTestImage = darknetFilePath_;
102 |   pathToTestImage += "/data/";
103 |   pathToTestImage += "dog";
104 |   pathToTestImage += ".jpg";
105 | 
106 |   // Send dog image to yolo.
107 |   ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage));
108 |   ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage));
109 | 
110 |   // Evaluate if yolo was able to detect the three objects: dog, bicycle and car.
111 |   bool detectedDog = false;
112 |   double centerErrorDog;
113 |   bool detectedBicycle = false;
114 |   double centerErrorBicycle;
115 |   bool detectedCar = false;
116 |   double centerErrorCar;
117 | 
118 |   for (auto& boundingBox : boundingBoxesResults_.bounding_boxes) {
119 |     double xPosCenter = boundingBox.xmin + (boundingBox.xmax - boundingBox.xmin) * 0.5;
120 |     double yPosCenter = boundingBox.ymin + (boundingBox.ymax - boundingBox.ymin) * 0.5;
121 | 
122 |     if (boundingBox.Class == "dog") {
123 |       detectedDog = true;
124 |       // std::cout << "centerErrorDog  " << xPosCenter << ", " <<  yPosCenter << std::endl;
125 |       centerErrorDog = std::sqrt(std::pow(xPosCenter - 222.5, 2) + std::pow(yPosCenter - 361.5, 2));
126 |     }
127 |     if (boundingBox.Class == "bicycle") {
128 |       detectedBicycle = true;
129 |       // std::cout << "centerErrorBicycle "  << xPosCenter << ", " <<  yPosCenter << std::endl;
130 |       centerErrorBicycle = std::sqrt(std::pow(xPosCenter - 338.0, 2) + std::pow(yPosCenter - 289.0, 2));
131 |     }
132 |     if (boundingBox.Class == "truck") {
133 |       detectedCar = true;
134 |       // std::cout << "centerErrorCar  " << xPosCenter << ", " <<  yPosCenter << std::endl;
135 |       centerErrorCar = std::sqrt(std::pow(xPosCenter - 561.0, 2) + std::pow(yPosCenter - 126.5, 2));
136 |     }
137 |   }
138 | 
139 |   ASSERT_TRUE(detectedDog);
140 |   EXPECT_LT(centerErrorDog, 40.0);
141 |   ASSERT_TRUE(detectedBicycle);
142 |   EXPECT_LT(centerErrorBicycle, 40.0);
143 |   ASSERT_TRUE(detectedCar);
144 |   EXPECT_LT(centerErrorCar, 40.0);
145 | }
146 | 
147 | TEST(ObjectDetection, DetectANYmal) {
148 |   srand(static_cast<unsigned int>(time(nullptr)));
149 |   ros::NodeHandle nodeHandle("~");
150 | 
151 |   // Path to test image.
152 |   std::string pathToTestImage = ros::package::getPath("darknet_ros");
153 |   pathToTestImage += "/doc/";
154 |   pathToTestImage += "quadruped_anymal_and_person";
155 |   pathToTestImage += ".JPG";
156 | 
157 |   // Send dog image to yolo.
158 |   ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage));
159 |   ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage));
160 | 
161 |   // Evaluate if yolo was able to detect the three objects: dog, bicycle and car.
162 |   bool detectedPerson = false;
163 |   double centerErrorPersonX;
164 |   double centerErrorPersonY;
165 | 
166 |   for (auto& boundingBox : boundingBoxesResults_.bounding_boxes) {
167 |     double xPosCenter = boundingBox.xmin + (boundingBox.xmax - boundingBox.xmin) * 0.5;
168 |     double yPosCenter = boundingBox.ymin + (boundingBox.ymax - boundingBox.ymin) * 0.5;
169 | 
170 |     if (boundingBox.Class == "person") {
171 |       detectedPerson = true;
172 |       centerErrorPersonX = std::sqrt(std::pow(xPosCenter - 1650.0, 2));
173 |       centerErrorPersonY = std::sqrt(std::pow(xPosCenter - 1675.0, 2));
174 |     }
175 |   }
176 | 
177 |   ASSERT_TRUE(detectedPerson);
178 |   EXPECT_LT(centerErrorPersonX, 30);
179 |   EXPECT_LT(centerErrorPersonY, 30);
180 | }
181 | 
182 | TEST(ObjectDetection, DISABLED_DetectPerson) {
183 |   srand(static_cast<unsigned int>(time(nullptr)));
184 |   ros::NodeHandle nodeHandle("~");
185 | 
186 |   // Path to test image.
187 |   std::string pathToTestImage = darknetFilePath_;
188 |   pathToTestImage += "/data/";
189 |   pathToTestImage += "person";
190 |   pathToTestImage += ".jpg";
191 | 
192 |   ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage));
193 |   ASSERT_TRUE(sendImageToYolo(nodeHandle, pathToTestImage));
194 | 
195 |   // Evaluate if yolo was able to detect the person.
196 |   bool detectedPerson = false;
197 |   double centerErrorPerson;
198 | 
199 |   for (auto& boundingBox : boundingBoxesResults_.bounding_boxes) {
200 |     double xPosCenter = boundingBox.xmin + (boundingBox.xmax - boundingBox.xmin) * 0.5;
201 |     double yPosCenter = boundingBox.ymin + (boundingBox.ymax - boundingBox.ymin) * 0.5;
202 | 
203 |     if (boundingBox.Class == "person") {
204 |       detectedPerson = true;
205 |       // std::cout << "centerErrorPerson  " << xPosCenter << ", " <<  yPosCenter << std::endl;
206 |       centerErrorPerson = std::sqrt(std::pow(xPosCenter - 228.0, 2) + std::pow(yPosCenter - 238.0, 2));
207 |     }
208 |   }
209 | 
210 |   ASSERT_TRUE(detectedPerson);
211 |   EXPECT_LT(centerErrorPerson, 40.0);
212 | }
213 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # YOLO ROS: Real-Time Object Detection for ROS
  2 | 
  3 | ## Overview
  4 | 
  5 | This is a ROS package developed for object detection in camera images. You only look once (YOLO) is a state-of-the-art, real-time object detection system. In the following ROS package you are able to use YOLO (V3) on GPU and CPU. The pre-trained model of the convolutional neural network is able to detect pre-trained classes including the data set from VOC and COCO, or you can also create a network with your own detection objects. For more information about YOLO, Darknet, available training data and training YOLO see the following link: [YOLO: Real-Time Object Detection](http://pjreddie.com/darknet/yolo/).
  6 | 
  7 | The YOLO packages have been tested under ROS Melodic and Ubuntu 18.04. This is research code, expect that it changes often and any fitness for a particular purpose is disclaimed.
  8 | 
  9 | **Author: [Marko Bjelonic](https://www.markobjelonic.com), marko.bjelonic@mavt.ethz.ch**
 10 | 
 11 | **Affiliation: [Robotic Systems Lab](http://www.rsl.ethz.ch/), ETH Zurich**
 12 | 
 13 | ![Darknet Ros example: Detection image](darknet_ros/doc/test_detection.png)
 14 | ![Darknet Ros example: Detection image](darknet_ros/doc/test_detection_anymal.png)
 15 | 
 16 | Based on the [Pascal VOC](https://pjreddie.com/projects/pascal-voc-dataset-mirror/) 2012 dataset, YOLO can detect the 20 Pascal object classes:
 17 | 
 18 | - person
 19 | - bird, cat, cow, dog, horse, sheep
 20 | - aeroplane, bicycle, boat, bus, car, motorbike, train
 21 | - bottle, chair, dining table, potted plant, sofa, tv/monitor
 22 | 
 23 | Based on the [COCO](http://cocodataset.org/#home) dataset, YOLO can detect the 80 COCO object classes:
 24 | 
 25 | - person
 26 | - bicycle, car, motorbike, aeroplane, bus, train, truck, boat
 27 | - traffic light, fire hydrant, stop sign, parking meter, bench
 28 | - cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe
 29 | - backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket
 30 | - bottle, wine glass, cup, fork, knife, spoon, bowl
 31 | - banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake
 32 | - chair, sofa, pottedplant, bed, diningtable, toilet, tvmonitor, laptop, mouse, remote, keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair drier, toothbrush
 33 | 
 34 | ## Citing
 35 | 
 36 | The YOLO methods used in this software are described in the paper: [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640).
 37 | 
 38 | If you are using YOLO V3 for ROS, please add the following citation to your publication:
 39 | 
 40 | M. Bjelonic
 41 | **"YOLO ROS: Real-Time Object Detection for ROS"**,
 42 | URL: https://github.com/leggedrobotics/darknet_ros, 2018.
 43 | 
 44 |     @misc{bjelonicYolo2018,
 45 |       author = {Marko Bjelonic},
 46 |       title = {{YOLO ROS}: Real-Time Object Detection for {ROS}},
 47 |       howpublished = {\url{https://github.com/leggedrobotics/darknet_ros}},
 48 |       year = {2016--2018},
 49 |     }
 50 | 
 51 | ## Installation
 52 | 
 53 | ### Dependencies
 54 | 
 55 | This software is built on the Robotic Operating System ([ROS]), which needs to be [installed](http://wiki.ros.org) first. Additionally, YOLO for ROS depends on following software:
 56 | 
 57 | - [OpenCV](http://opencv.org/) (computer vision library),
 58 | - [boost](http://www.boost.org/) (c++ library),
 59 | 
 60 | ### Building
 61 | 
 62 | [![Build Status](https://ci.leggedrobotics.com/buildStatus/icon?job=github_leggedrobotics/darknet_ros/master)](https://ci.leggedrobotics.com/job/github_leggedrobotics/job/darknet_ros/job/master/)
 63 | 
 64 | In order to install darknet_ros, clone the latest version using SSH (see [how to set up an SSH key](https://confluence.atlassian.com/bitbucket/set-up-an-ssh-key-728138079.html)) from this repository into your catkin workspace and compile the package using ROS.
 65 | 
 66 |     cd catkin_workspace/src
 67 |     git clone --recursive git@github.com:leggedrobotics/darknet_ros.git
 68 |     cd ../
 69 | 
 70 | To maximize performance, make sure to build in *Release* mode. You can specify the build type by setting
 71 | 
 72 |     catkin_make -DCMAKE_BUILD_TYPE=Release
 73 | 
 74 | or using the [Catkin Command Line Tools](http://catkin-tools.readthedocs.io/en/latest/index.html#)
 75 | 
 76 |     catkin build darknet_ros -DCMAKE_BUILD_TYPE=Release
 77 | 
 78 | Darknet on the CPU is fast (approximately 1.5 seconds on an Intel Core i7-6700HQ CPU @ 2.60GHz × 8) but it's like 500 times faster on GPU! You'll have to have an Nvidia GPU and you'll have to install CUDA. The CMakeLists.txt file automatically detects if you have CUDA installed or not. CUDA is a parallel computing platform and application programming interface (API) model created by Nvidia. If you do not have CUDA on your System the build process will switch to the CPU version of YOLO. If you are compiling with CUDA, you might receive the following build error:
 79 | 
 80 |     nvcc fatal : Unsupported gpu architecture 'compute_61'.
 81 | 
 82 | This means that you need to check the compute capability (version) of your GPU. You can find a list of supported GPUs in CUDA here: [CUDA - WIKIPEDIA](https://en.wikipedia.org/wiki/CUDA#Supported_GPUs). Simply find the compute capability of your GPU and add it into darknet_ros/CMakeLists.txt. Simply add a similar line like
 83 | 
 84 |     -O3 -gencode arch=compute_62,code=sm_62
 85 | 
 86 | ### Download weights
 87 | 
 88 | The yolo-voc.weights and tiny-yolo-voc.weights are downloaded automatically in the CMakeLists.txt file. If you need to download them again, go into the weights folder and download the two pre-trained weights from the COCO data set:
 89 | 
 90 |     cd catkin_workspace/src/darknet_ros/darknet_ros/yolo_network_config/weights/
 91 |     wget http://pjreddie.com/media/files/yolov2.weights
 92 |     wget http://pjreddie.com/media/files/yolov2-tiny.weights
 93 | 
 94 | And weights from the VOC data set can be found here:
 95 | 
 96 |     wget http://pjreddie.com/media/files/yolov2-voc.weights
 97 |     wget http://pjreddie.com/media/files/yolov2-tiny-voc.weights
 98 | 
 99 | And the pre-trained weight from YOLO v3 can be found here:
100 | 
101 |     wget http://pjreddie.com/media/files/yolov3-tiny.weights
102 |     wget http://pjreddie.com/media/files/yolov3.weights
103 | 
104 | There are more pre-trained weights from different data sets reported [here](https://pjreddie.com/darknet/yolo/).
105 | 
106 | ### Use your own detection objects
107 | 
108 | In order to use your own detection objects you need to provide your weights and your cfg file inside the directories:
109 | 
110 |     catkin_workspace/src/darknet_ros/darknet_ros/yolo_network_config/weights/
111 |     catkin_workspace/src/darknet_ros/darknet_ros/yolo_network_config/cfg/
112 | 
113 | In addition, you need to create your config file for ROS where you define the names of the detection objects. You need to include it inside:
114 | 
115 |     catkin_workspace/src/darknet_ros/darknet_ros/config/
116 | 
117 | Then in the launch file you have to point to your new config file in the line:
118 | 
119 |     <rosparam command="load" ns="darknet_ros" file="$(find darknet_ros)/config/your_config_file.yaml"/>
120 | 
121 | ### Unit Tests
122 | 
123 | Run the unit tests using the [Catkin Command Line Tools](http://catkin-tools.readthedocs.io/en/latest/index.html#)
124 | 
125 |     catkin build darknet_ros --no-deps --verbose --catkin-make-args run_tests
126 | 
127 | You will see the image above popping up.
128 | 
129 | ## Basic Usage
130 | 
131 | In order to get YOLO ROS: Real-Time Object Detection for ROS to run with your robot, you will need to adapt a few parameters. It is the easiest if duplicate and adapt all the parameter files that you need to change from the `darknet_ros` package. These are specifically the parameter files in `config` and the launch file from the `launch` folder.
132 | 
133 | ## Nodes
134 | 
135 | ### Node: darknet_ros
136 | 
137 | This is the main YOLO ROS: Real-Time Object Detection for ROS node. It uses the camera measurements to detect pre-learned objects in the frames.
138 | 
139 | ### ROS related parameters
140 | 
141 | You can change the names and other parameters of the publishers, subscribers and actions inside `darknet_ros/config/ros.yaml`.
142 | 
143 | #### Subscribed Topics
144 | 
145 | * **`/camera_reading`** ([sensor_msgs/Image])
146 | 
147 |     The camera measurements.
148 | 
149 | #### Published Topics
150 | 
151 | * **`object_detector`** ([std_msgs::Int8])
152 | 
153 |     Publishes the number of detected objects.
154 | 
155 | * **`bounding_boxes`** ([darknet_ros_msgs::BoundingBoxes])
156 | 
157 |     Publishes an array of bounding boxes that gives information of the position and size of the bounding box in pixel coordinates.
158 | 
159 | * **`detection_image`** ([sensor_msgs::Image])
160 | 
161 |     Publishes an image of the detection image including the bounding boxes.
162 | 
163 | #### Actions
164 | 
165 | * **`camera_reading`** ([sensor_msgs::Image])
166 | 
167 |     Sends an action with an image and the result is an array of bounding boxes.
168 | 
169 | ### Detection related parameters
170 | 
171 | You can change the parameters that are related to the detection by adding a new config file that looks similar to `darknet_ros/config/yolo.yaml`.
172 | 
173 | * **`image_view/enable_opencv`** (bool)
174 | 
175 |     Enable or disable the open cv view of the detection image including the bounding boxes.
176 | 
177 | * **`image_view/wait_key_delay`** (int)
178 | 
179 |     Wait key delay in ms of the open cv window.
180 | 
181 | * **`yolo_model/config_file/name`** (string)
182 | 
183 |     Name of the cfg file of the network that is used for detection. The code searches for this name inside `darknet_ros/yolo_network_config/cfg/`.
184 | 
185 | * **`yolo_model/weight_file/name`** (string)
186 | 
187 |     Name of the weights file of the network that is used for detection. The code searches for this name inside `darknet_ros/yolo_network_config/weights/`.
188 | 
189 | * **`yolo_model/threshold/value`** (float)
190 | 
191 |     Threshold of the detection algorithm. It is defined between 0 and 1.
192 | 
193 | * **`yolo_model/detection_classes/names`** (array of strings)
194 | 
195 |     Detection names of the network used by the cfg and weights file inside `darknet_ros/yolo_network_config/`.
196 | 


--------------------------------------------------------------------------------
/darknet_ros/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 2.8.12)
  2 | project(darknet_ros)
  3 | 
  4 | # Set c++11 cmake flags
  5 | set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
  6 | set(CMAKE_C_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wno-unused-variable -Wfatal-errors -fPIC ${CMAKE_C_FLAGS}")
  7 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
  8 | 
  9 | # Define path of darknet folder here.
 10 | find_path(DARKNET_PATH
 11 |   NAMES "README.md"
 12 |   HINTS "${CMAKE_CURRENT_SOURCE_DIR}/../darknet/")
 13 | message(STATUS "Darknet path dir = ${DARKNET_PATH}")
 14 | add_definitions(-DDARKNET_FILE_PATH="${DARKNET_PATH}")
 15 | 
 16 | # Find CUDA
 17 | find_package(CUDA QUIET)
 18 | if (CUDA_FOUND)
 19 |   find_package(CUDA REQUIRED)
 20 |   message(STATUS "CUDA Version: ${CUDA_VERSION_STRINGS}")
 21 |   message(STATUS "CUDA Libararies: ${CUDA_LIBRARIES}")
 22 |   set(
 23 |     CUDA_NVCC_FLAGS
 24 |     ${CUDA_NVCC_FLAGS};
 25 |     -O3
 26 |     # -gencode arch=compute_30,code=sm_30
 27 |     -gencode arch=compute_35,code=sm_35
 28 |     -gencode arch=compute_50,code=[sm_50,compute_50]
 29 |     -gencode arch=compute_52,code=[sm_52,compute_52]
 30 |     -gencode arch=compute_61,code=sm_61
 31 |     -gencode arch=compute_62,code=sm_62
 32 |   )
 33 |   add_definitions(-DGPU)
 34 | else()
 35 |   list(APPEND LIBRARIES "m")
 36 | endif()
 37 | 
 38 | # Find X11
 39 | message ( STATUS "Searching for X11..." )
 40 | find_package ( X11 REQUIRED )
 41 | if ( X11_FOUND )
 42 |   include_directories ( ${X11_INCLUDE_DIR} )
 43 |   link_libraries ( ${X11_LIBRARIES} )
 44 |   message ( STATUS " X11_INCLUDE_DIR: " ${X11_INCLUDE_DIR} )
 45 |   message ( STATUS " X11_LIBRARIES: " ${X11_LIBRARIES} )
 46 | endif ( X11_FOUND )
 47 | 
 48 | # Find rquired packeges
 49 | find_package(Boost REQUIRED COMPONENTS thread)
 50 | find_package(OpenCV REQUIRED)
 51 | include_directories(${OpenCV_INCLUDE_DIRS})
 52 | find_package(catkin REQUIRED
 53 |   COMPONENTS
 54 |     cv_bridge
 55 |     roscpp
 56 |     rospy
 57 |     std_msgs
 58 |     actionlib
 59 |     darknet_ros_msgs
 60 |     image_transport
 61 | )
 62 | 
 63 | # Enable OPENCV in darknet
 64 | add_definitions(-DOPENCV)
 65 | add_definitions(-O4 -g)
 66 | 
 67 | catkin_package(
 68 |   INCLUDE_DIRS
 69 |     include
 70 |   LIBRARIES
 71 |     ${PROJECT_NAME}_lib
 72 |   CATKIN_DEPENDS
 73 |     cv_bridge
 74 |     roscpp
 75 |     actionlib
 76 |     rospy
 77 |     std_msgs
 78 |     darknet_ros_msgs
 79 |     image_transport
 80 |   DEPENDS
 81 |     Boost
 82 | )
 83 | 
 84 | include_directories(
 85 |   ${DARKNET_PATH}/src
 86 |   ${DARKNET_PATH}/include
 87 |   include
 88 |   ${Boost_INCLUDE_DIRS}
 89 |   ${catkin_INCLUDE_DIRS}
 90 | )
 91 | 
 92 | set(PROJECT_LIB_FILES
 93 |     src/YoloObjectDetector.cpp                    src/image_interface.c
 94 | )
 95 | 
 96 | set(DARKNET_CORE_FILES
 97 |     ${DARKNET_PATH}/src/activation_layer.c        ${DARKNET_PATH}/src/im2col.c
 98 |     ${DARKNET_PATH}/src/activations.c             ${DARKNET_PATH}/src/image.c
 99 |     ${DARKNET_PATH}/src/avgpool_layer.c           ${DARKNET_PATH}/src/layer.c
100 |     ${DARKNET_PATH}/src/batchnorm_layer.c         ${DARKNET_PATH}/src/list.c
101 |     ${DARKNET_PATH}/src/blas.c                    ${DARKNET_PATH}/src/local_layer.c
102 |     ${DARKNET_PATH}/src/box.c                     ${DARKNET_PATH}/src/lstm_layer.c
103 |     ${DARKNET_PATH}/src/col2im.c                  ${DARKNET_PATH}/src/matrix.c
104 |     ${DARKNET_PATH}/src/connected_layer.c         ${DARKNET_PATH}/src/maxpool_layer.c
105 |     ${DARKNET_PATH}/src/convolutional_layer.c     ${DARKNET_PATH}/src/network.c
106 |     ${DARKNET_PATH}/src/cost_layer.c              ${DARKNET_PATH}/src/normalization_layer.c
107 |     ${DARKNET_PATH}/src/crnn_layer.c              ${DARKNET_PATH}/src/option_list.c
108 |     ${DARKNET_PATH}/src/crop_layer.c              ${DARKNET_PATH}/src/parser.c
109 |     ${DARKNET_PATH}/src/dark_cuda.c               ${DARKNET_PATH}/src/region_layer.c
110 |     ${DARKNET_PATH}/src/data.c                    ${DARKNET_PATH}/src/reorg_layer.c
111 |     ${DARKNET_PATH}/src/deconvolutional_layer.c   ${DARKNET_PATH}/src/rnn_layer.c
112 |     ${DARKNET_PATH}/src/demo.c                    ${DARKNET_PATH}/src/route_layer.c
113 |     ${DARKNET_PATH}/src/detection_layer.c         ${DARKNET_PATH}/src/shortcut_layer.c
114 |     ${DARKNET_PATH}/src/dropout_layer.c           ${DARKNET_PATH}/src/softmax_layer.c
115 |     ${DARKNET_PATH}/src/gemm.c                    ${DARKNET_PATH}/src/tree.c
116 |     ${DARKNET_PATH}/src/gru_layer.c               ${DARKNET_PATH}/src/utils.c
117 |     ${DARKNET_PATH}/src/upsample_layer.c          # ${DARKNET_PATH}/src/logistic_layer.c
118 |     # ${DARKNET_PATH}/src/l2norm_layer.c          ${DARKNET_PATH}/src/yolo_layer.c
119 |     ${DARKNET_PATH}/src/representation_layer.c
120 | 
121 |     ${DARKNET_PATH}/src/art.c                     # ${DARKNET_PATH}/src/lsd.c
122 |     # ${DARKNET_PATH}/src/attention.c               ${DARKNET_PATH}/src/nightmare.c
123 |     ${DARKNET_PATH}/src/captcha.c                 # ${DARKNET_PATH}/src/regressor.c
124 |     ${DARKNET_PATH}/src/cifar.c                   ${DARKNET_PATH}/src/rnn.c
125 |     ${DARKNET_PATH}/src/classifier.c              # ${DARKNET_PATH}/src/segmenter.c
126 |     ${DARKNET_PATH}/src/coco.c                    ${DARKNET_PATH}/src/super.c
127 |     ${DARKNET_PATH}/src/darknet.c                 ${DARKNET_PATH}/src/tag.c
128 |     ${DARKNET_PATH}/src/detector.c                ${DARKNET_PATH}/src/yolo.c
129 |     ${DARKNET_PATH}/src/go.c
130 | 
131 |     ${DARKNET_PATH}/src/image_opencv.cpp
132 |     ${DARKNET_PATH}/src/conv_lstm_layer.c
133 |     ${DARKNET_PATH}/src/sam_layer.c
134 |     ${DARKNET_PATH}/src/gaussian_yolo_layer.c
135 |     ${DARKNET_PATH}/src/http_stream.cpp
136 |     ${DARKNET_PATH}/src/scale_channels_layer.c
137 |     ${DARKNET_PATH}/src/nightmare.c
138 |     ${DARKNET_PATH}/src/voxel.c
139 |     ${DARKNET_PATH}/src/compare.c
140 |     ${DARKNET_PATH}/src/reorg_old_layer.c
141 |     ${DARKNET_PATH}/src/writing.c
142 |     ${DARKNET_PATH}/src/yolo_layer.c
143 |     ${DARKNET_PATH}/src/dice.c
144 |     ${DARKNET_PATH}/src/rnn_vid.c
145 | )
146 | 
147 | set(DARKNET_CUDA_FILES
148 |     ${DARKNET_PATH}/src/activation_kernels.cu     ${DARKNET_PATH}/src/crop_layer_kernels.cu
149 |     ${DARKNET_PATH}/src/avgpool_layer_kernels.cu  ${DARKNET_PATH}/src/deconvolutional_kernels.cu
150 |     ${DARKNET_PATH}/src/blas_kernels.cu           ${DARKNET_PATH}/src/dropout_layer_kernels.cu
151 |     ${DARKNET_PATH}/src/col2im_kernels.cu         ${DARKNET_PATH}/src/im2col_kernels.cu
152 |     ${DARKNET_PATH}/src/convolutional_kernels.cu  ${DARKNET_PATH}/src/maxpool_layer_kernels.cu
153 | 
154 |     ${DARKNET_PATH}/src/network_kernels.cu
155 | 
156 | )
157 | 
158 | if (CUDA_FOUND)
159 | 
160 |   link_directories(
161 |     ${CUDA_TOOLKIT_ROOT_DIR}/lib64
162 |   )
163 | 
164 |   cuda_add_library(${PROJECT_NAME}_lib
165 |     ${PROJECT_LIB_FILES} ${DARKNET_CORE_FILES}
166 |     ${DARKNET_CUDA_FILES}
167 |   )
168 | 
169 |   target_link_libraries(${PROJECT_NAME}_lib
170 |     cuda
171 |     cudart
172 |     cublas
173 |     curand
174 |   )
175 | 
176 |   cuda_add_executable(${PROJECT_NAME}
177 |     src/yolo_object_detector_node.cpp
178 |   )
179 | 
180 | else()
181 | 
182 |   add_library(${PROJECT_NAME}_lib
183 |     ${PROJECT_LIB_FILES} ${DARKNET_CORE_FILES}
184 |   )
185 | 
186 |   add_executable(${PROJECT_NAME}
187 |     src/yolo_object_detector_node.cpp
188 |   )
189 | 
190 | endif()
191 | 
192 | target_link_libraries(${PROJECT_NAME}_lib
193 |   m
194 |   pthread
195 |   stdc++
196 |   ${Boost_LIBRARIES}
197 |   ${OpenCV_LIBRARIES}
198 |   ${catkin_LIBRARIES}
199 |   ${OpenCV_LIBS}
200 | )
201 | 
202 | target_link_libraries(${PROJECT_NAME}
203 |   ${PROJECT_NAME}_lib
204 | )
205 | 
206 | add_dependencies(${PROJECT_NAME}_lib
207 |   darknet_ros_msgs_generate_messages_cpp
208 | )
209 | 
210 | install(TARGETS ${PROJECT_NAME}_lib
211 |   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
212 |   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
213 |   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
214 | )
215 | 
216 | install(TARGETS ${PROJECT_NAME}
217 |   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
218 | )
219 | 
220 | install(
221 |   DIRECTORY include/${PROJECT_NAME}/
222 |   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
223 |   FILES_MATCHING PATTERN "*.h"
224 | )
225 | 
226 | install(DIRECTORY config launch yolo_network_config
227 |   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
228 | )
229 | 
230 | # Download yolov2-tiny.weights
231 | set(PATH "${CMAKE_CURRENT_SOURCE_DIR}/yolo_network_config/weights")
232 | set(FILE "${PATH}/yolov2-tiny.weights")
233 | message(STATUS "Checking and downloading yolov2-tiny.weights if needed ...")
234 | if (NOT EXISTS "${FILE}")
235 |   message(STATUS "... file does not exist. Downloading now ...")
236 |   execute_process(COMMAND wget -q https://github.com/leggedrobotics/darknet_ros/releases/download/1.1.4/yolov2-tiny.weights -P ${PATH})
237 | endif()
238 | 
239 | # Download yolov3.weights
240 | set(FILE "${PATH}/yolov3.weights")
241 | message(STATUS "Checking and downloading yolov3.weights if needed ...")
242 | if (NOT EXISTS "${FILE}")
243 |   message(STATUS "... file does not exist. Downloading now ...")
244 |   execute_process(COMMAND wget -q https://github.com/leggedrobotics/darknet_ros/releases/download/1.1.4/yolov3.weights -P ${PATH})
245 | endif()
246 | 
247 | # Download yolov4.weights
248 | set(FILE "${PATH}/yolov4.weights")
249 | message(STATUS "Checking and downloading yolov4.weights if needed ...")
250 | if (NOT EXISTS "${FILE}")
251 |   message(STATUS "... file does not exist. Downloading now ...")
252 |   execute_process(COMMAND wget -q https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -P ${PATH})
253 | endif()
254 | 
255 | #############
256 | ## Testing ##
257 | #############
258 | 
259 | if(CATKIN_ENABLE_TESTING)
260 |   # Download yolov2.weights
261 |   set(PATH "${CMAKE_CURRENT_SOURCE_DIR}/yolo_network_config/weights")
262 |   set(FILE "${PATH}/yolov2.weights")
263 |   message(STATUS "Checking and downloading yolov2.weights if needed ...")
264 |   if (NOT EXISTS "${FILE}")
265 |     message(STATUS "... file does not exist. Downloading now ...")
266 |     execute_process(COMMAND wget -q https://github.com/leggedrobotics/darknet_ros/releases/download/1.1.4/yolov2.weights -P ${PATH})
267 |   endif()
268 | 
269 |   find_package(rostest REQUIRED)
270 | 
271 |   # Object detection in images.
272 |   add_rostest_gtest(${PROJECT_NAME}_object_detection-test
273 |     test/object_detection.test
274 |     test/test_main.cpp
275 |     test/ObjectDetection.cpp
276 |   )
277 |   target_link_libraries(${PROJECT_NAME}_object_detection-test
278 |     ${catkin_LIBRARIES}
279 |   )
280 | endif()
281 | 
282 | #########################
283 | ###   CLANG TOOLING   ###
284 | #########################
285 | find_package(cmake_clang_tools QUIET)
286 | if (cmake_clang_tools_FOUND)
287 |   message(STATUS "Run clang tooling")
288 |   add_clang_tooling(
289 |     TARGETS ${PROJECT_NAME}
290 |     SOURCE_DIRS ${CMAKE_CURRENT_LIST_DIR}/src ${CMAKE_CURRENT_LIST_DIR}/include ${CMAKE_CURRENT_LIST_DIR}/test
291 |     CT_HEADER_DIRS ${CMAKE_CURRENT_LIST_DIR}/include
292 |     CF_WERROR
293 |   )
294 | endif (cmake_clang_tools_FOUND)
295 | 


--------------------------------------------------------------------------------
/darknet_ros/yolo_network_config/cfg/yolov3-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 |  batch=1
  4 |  subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=16
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 50200
 21 | policy=steps
 22 | steps=40000,45000
 23 | scales=.1,.1
 24 | 
 25 | 
 26 | 
 27 | [convolutional]
 28 | batch_normalize=1
 29 | filters=32
 30 | size=3
 31 | stride=1
 32 | pad=1
 33 | activation=leaky
 34 | 
 35 | # Downsample
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=2
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [convolutional]
 46 | batch_normalize=1
 47 | filters=32
 48 | size=1
 49 | stride=1
 50 | pad=1
 51 | activation=leaky
 52 | 
 53 | [convolutional]
 54 | batch_normalize=1
 55 | filters=64
 56 | size=3
 57 | stride=1
 58 | pad=1
 59 | activation=leaky
 60 | 
 61 | [shortcut]
 62 | from=-3
 63 | activation=linear
 64 | 
 65 | # Downsample
 66 | 
 67 | [convolutional]
 68 | batch_normalize=1
 69 | filters=128
 70 | size=3
 71 | stride=2
 72 | pad=1
 73 | activation=leaky
 74 | 
 75 | [convolutional]
 76 | batch_normalize=1
 77 | filters=64
 78 | size=1
 79 | stride=1
 80 | pad=1
 81 | activation=leaky
 82 | 
 83 | [convolutional]
 84 | batch_normalize=1
 85 | filters=128
 86 | size=3
 87 | stride=1
 88 | pad=1
 89 | activation=leaky
 90 | 
 91 | [shortcut]
 92 | from=-3
 93 | activation=linear
 94 | 
 95 | [convolutional]
 96 | batch_normalize=1
 97 | filters=64
 98 | size=1
 99 | stride=1
100 | pad=1
101 | activation=leaky
102 | 
103 | [convolutional]
104 | batch_normalize=1
105 | filters=128
106 | size=3
107 | stride=1
108 | pad=1
109 | activation=leaky
110 | 
111 | [shortcut]
112 | from=-3
113 | activation=linear
114 | 
115 | # Downsample
116 | 
117 | [convolutional]
118 | batch_normalize=1
119 | filters=256
120 | size=3
121 | stride=2
122 | pad=1
123 | activation=leaky
124 | 
125 | [convolutional]
126 | batch_normalize=1
127 | filters=128
128 | size=1
129 | stride=1
130 | pad=1
131 | activation=leaky
132 | 
133 | [convolutional]
134 | batch_normalize=1
135 | filters=256
136 | size=3
137 | stride=1
138 | pad=1
139 | activation=leaky
140 | 
141 | [shortcut]
142 | from=-3
143 | activation=linear
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [convolutional]
154 | batch_normalize=1
155 | filters=256
156 | size=3
157 | stride=1
158 | pad=1
159 | activation=leaky
160 | 
161 | [shortcut]
162 | from=-3
163 | activation=linear
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=128
168 | size=1
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=256
176 | size=3
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [shortcut]
182 | from=-3
183 | activation=linear
184 | 
185 | [convolutional]
186 | batch_normalize=1
187 | filters=128
188 | size=1
189 | stride=1
190 | pad=1
191 | activation=leaky
192 | 
193 | [convolutional]
194 | batch_normalize=1
195 | filters=256
196 | size=3
197 | stride=1
198 | pad=1
199 | activation=leaky
200 | 
201 | [shortcut]
202 | from=-3
203 | activation=linear
204 | 
205 | 
206 | [convolutional]
207 | batch_normalize=1
208 | filters=128
209 | size=1
210 | stride=1
211 | pad=1
212 | activation=leaky
213 | 
214 | [convolutional]
215 | batch_normalize=1
216 | filters=256
217 | size=3
218 | stride=1
219 | pad=1
220 | activation=leaky
221 | 
222 | [shortcut]
223 | from=-3
224 | activation=linear
225 | 
226 | [convolutional]
227 | batch_normalize=1
228 | filters=128
229 | size=1
230 | stride=1
231 | pad=1
232 | activation=leaky
233 | 
234 | [convolutional]
235 | batch_normalize=1
236 | filters=256
237 | size=3
238 | stride=1
239 | pad=1
240 | activation=leaky
241 | 
242 | [shortcut]
243 | from=-3
244 | activation=linear
245 | 
246 | [convolutional]
247 | batch_normalize=1
248 | filters=128
249 | size=1
250 | stride=1
251 | pad=1
252 | activation=leaky
253 | 
254 | [convolutional]
255 | batch_normalize=1
256 | filters=256
257 | size=3
258 | stride=1
259 | pad=1
260 | activation=leaky
261 | 
262 | [shortcut]
263 | from=-3
264 | activation=linear
265 | 
266 | [convolutional]
267 | batch_normalize=1
268 | filters=128
269 | size=1
270 | stride=1
271 | pad=1
272 | activation=leaky
273 | 
274 | [convolutional]
275 | batch_normalize=1
276 | filters=256
277 | size=3
278 | stride=1
279 | pad=1
280 | activation=leaky
281 | 
282 | [shortcut]
283 | from=-3
284 | activation=linear
285 | 
286 | # Downsample
287 | 
288 | [convolutional]
289 | batch_normalize=1
290 | filters=512
291 | size=3
292 | stride=2
293 | pad=1
294 | activation=leaky
295 | 
296 | [convolutional]
297 | batch_normalize=1
298 | filters=256
299 | size=1
300 | stride=1
301 | pad=1
302 | activation=leaky
303 | 
304 | [convolutional]
305 | batch_normalize=1
306 | filters=512
307 | size=3
308 | stride=1
309 | pad=1
310 | activation=leaky
311 | 
312 | [shortcut]
313 | from=-3
314 | activation=linear
315 | 
316 | 
317 | [convolutional]
318 | batch_normalize=1
319 | filters=256
320 | size=1
321 | stride=1
322 | pad=1
323 | activation=leaky
324 | 
325 | [convolutional]
326 | batch_normalize=1
327 | filters=512
328 | size=3
329 | stride=1
330 | pad=1
331 | activation=leaky
332 | 
333 | [shortcut]
334 | from=-3
335 | activation=linear
336 | 
337 | 
338 | [convolutional]
339 | batch_normalize=1
340 | filters=256
341 | size=1
342 | stride=1
343 | pad=1
344 | activation=leaky
345 | 
346 | [convolutional]
347 | batch_normalize=1
348 | filters=512
349 | size=3
350 | stride=1
351 | pad=1
352 | activation=leaky
353 | 
354 | [shortcut]
355 | from=-3
356 | activation=linear
357 | 
358 | 
359 | [convolutional]
360 | batch_normalize=1
361 | filters=256
362 | size=1
363 | stride=1
364 | pad=1
365 | activation=leaky
366 | 
367 | [convolutional]
368 | batch_normalize=1
369 | filters=512
370 | size=3
371 | stride=1
372 | pad=1
373 | activation=leaky
374 | 
375 | [shortcut]
376 | from=-3
377 | activation=linear
378 | 
379 | [convolutional]
380 | batch_normalize=1
381 | filters=256
382 | size=1
383 | stride=1
384 | pad=1
385 | activation=leaky
386 | 
387 | [convolutional]
388 | batch_normalize=1
389 | filters=512
390 | size=3
391 | stride=1
392 | pad=1
393 | activation=leaky
394 | 
395 | [shortcut]
396 | from=-3
397 | activation=linear
398 | 
399 | 
400 | [convolutional]
401 | batch_normalize=1
402 | filters=256
403 | size=1
404 | stride=1
405 | pad=1
406 | activation=leaky
407 | 
408 | [convolutional]
409 | batch_normalize=1
410 | filters=512
411 | size=3
412 | stride=1
413 | pad=1
414 | activation=leaky
415 | 
416 | [shortcut]
417 | from=-3
418 | activation=linear
419 | 
420 | 
421 | [convolutional]
422 | batch_normalize=1
423 | filters=256
424 | size=1
425 | stride=1
426 | pad=1
427 | activation=leaky
428 | 
429 | [convolutional]
430 | batch_normalize=1
431 | filters=512
432 | size=3
433 | stride=1
434 | pad=1
435 | activation=leaky
436 | 
437 | [shortcut]
438 | from=-3
439 | activation=linear
440 | 
441 | [convolutional]
442 | batch_normalize=1
443 | filters=256
444 | size=1
445 | stride=1
446 | pad=1
447 | activation=leaky
448 | 
449 | [convolutional]
450 | batch_normalize=1
451 | filters=512
452 | size=3
453 | stride=1
454 | pad=1
455 | activation=leaky
456 | 
457 | [shortcut]
458 | from=-3
459 | activation=linear
460 | 
461 | # Downsample
462 | 
463 | [convolutional]
464 | batch_normalize=1
465 | filters=1024
466 | size=3
467 | stride=2
468 | pad=1
469 | activation=leaky
470 | 
471 | [convolutional]
472 | batch_normalize=1
473 | filters=512
474 | size=1
475 | stride=1
476 | pad=1
477 | activation=leaky
478 | 
479 | [convolutional]
480 | batch_normalize=1
481 | filters=1024
482 | size=3
483 | stride=1
484 | pad=1
485 | activation=leaky
486 | 
487 | [shortcut]
488 | from=-3
489 | activation=linear
490 | 
491 | [convolutional]
492 | batch_normalize=1
493 | filters=512
494 | size=1
495 | stride=1
496 | pad=1
497 | activation=leaky
498 | 
499 | [convolutional]
500 | batch_normalize=1
501 | filters=1024
502 | size=3
503 | stride=1
504 | pad=1
505 | activation=leaky
506 | 
507 | [shortcut]
508 | from=-3
509 | activation=linear
510 | 
511 | [convolutional]
512 | batch_normalize=1
513 | filters=512
514 | size=1
515 | stride=1
516 | pad=1
517 | activation=leaky
518 | 
519 | [convolutional]
520 | batch_normalize=1
521 | filters=1024
522 | size=3
523 | stride=1
524 | pad=1
525 | activation=leaky
526 | 
527 | [shortcut]
528 | from=-3
529 | activation=linear
530 | 
531 | [convolutional]
532 | batch_normalize=1
533 | filters=512
534 | size=1
535 | stride=1
536 | pad=1
537 | activation=leaky
538 | 
539 | [convolutional]
540 | batch_normalize=1
541 | filters=1024
542 | size=3
543 | stride=1
544 | pad=1
545 | activation=leaky
546 | 
547 | [shortcut]
548 | from=-3
549 | activation=linear
550 | 
551 | ######################
552 | 
553 | [convolutional]
554 | batch_normalize=1
555 | filters=512
556 | size=1
557 | stride=1
558 | pad=1
559 | activation=leaky
560 | 
561 | [convolutional]
562 | batch_normalize=1
563 | size=3
564 | stride=1
565 | pad=1
566 | filters=1024
567 | activation=leaky
568 | 
569 | [convolutional]
570 | batch_normalize=1
571 | filters=512
572 | size=1
573 | stride=1
574 | pad=1
575 | activation=leaky
576 | 
577 | [convolutional]
578 | batch_normalize=1
579 | size=3
580 | stride=1
581 | pad=1
582 | filters=1024
583 | activation=leaky
584 | 
585 | [convolutional]
586 | batch_normalize=1
587 | filters=512
588 | size=1
589 | stride=1
590 | pad=1
591 | activation=leaky
592 | 
593 | [convolutional]
594 | batch_normalize=1
595 | size=3
596 | stride=1
597 | pad=1
598 | filters=1024
599 | activation=leaky
600 | 
601 | [convolutional]
602 | size=1
603 | stride=1
604 | pad=1
605 | filters=75
606 | activation=linear
607 | 
608 | [yolo]
609 | mask = 6,7,8
610 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
611 | classes=20
612 | num=9
613 | jitter=.3
614 | ignore_thresh = .5
615 | truth_thresh = 1
616 | random=1
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=75
690 | activation=linear
691 | 
692 | [yolo]
693 | mask = 3,4,5
694 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
695 | classes=20
696 | num=9
697 | jitter=.3
698 | ignore_thresh = .5
699 | truth_thresh = 1
700 | random=1
701 | 
702 | [route]
703 | layers = -4
704 | 
705 | [convolutional]
706 | batch_normalize=1
707 | filters=128
708 | size=1
709 | stride=1
710 | pad=1
711 | activation=leaky
712 | 
713 | [upsample]
714 | stride=2
715 | 
716 | [route]
717 | layers = -1, 36
718 | 
719 | 
720 | 
721 | [convolutional]
722 | batch_normalize=1
723 | filters=128
724 | size=1
725 | stride=1
726 | pad=1
727 | activation=leaky
728 | 
729 | [convolutional]
730 | batch_normalize=1
731 | size=3
732 | stride=1
733 | pad=1
734 | filters=256
735 | activation=leaky
736 | 
737 | [convolutional]
738 | batch_normalize=1
739 | filters=128
740 | size=1
741 | stride=1
742 | pad=1
743 | activation=leaky
744 | 
745 | [convolutional]
746 | batch_normalize=1
747 | size=3
748 | stride=1
749 | pad=1
750 | filters=256
751 | activation=leaky
752 | 
753 | [convolutional]
754 | batch_normalize=1
755 | filters=128
756 | size=1
757 | stride=1
758 | pad=1
759 | activation=leaky
760 | 
761 | [convolutional]
762 | batch_normalize=1
763 | size=3
764 | stride=1
765 | pad=1
766 | filters=256
767 | activation=leaky
768 | 
769 | [convolutional]
770 | size=1
771 | stride=1
772 | pad=1
773 | filters=75
774 | activation=linear
775 | 
776 | [yolo]
777 | mask = 0,1,2
778 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
779 | classes=20
780 | num=9
781 | jitter=.3
782 | ignore_thresh = .5
783 | truth_thresh = 1
784 | random=1
785 | 
786 | 


--------------------------------------------------------------------------------
/darknet_ros/yolo_network_config/cfg/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=16
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .5
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .5
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .5
787 | truth_thresh = 1
788 | random=1
789 | 
790 | 


--------------------------------------------------------------------------------
/darknet_ros/yolo_network_config/cfg/yolov4.cfg:
--------------------------------------------------------------------------------
   1 | [net]
   2 | # batch=64
   3 | batch=1
   4 | subdivisions=8
   5 | # Training
   6 | #width=512
   7 | #height=512
   8 | width=608
   9 | height=608
  10 | # width=416
  11 | # height=416
  12 | channels=3
  13 | momentum=0.949
  14 | decay=0.0005
  15 | angle=0
  16 | saturation = 1.5
  17 | exposure = 1.5
  18 | hue=.1
  19 | 
  20 | learning_rate=0.0013
  21 | burn_in=1000
  22 | max_batches = 500500
  23 | policy=steps
  24 | steps=400000,450000
  25 | scales=.1,.1
  26 | 
  27 | #cutmix=1
  28 | mosaic=1
  29 | 
  30 | #:104x104 54:52x52 85:26x26 104:13x13 for 416
  31 | 
  32 | [convolutional]
  33 | batch_normalize=1
  34 | filters=32
  35 | size=3
  36 | stride=1
  37 | pad=1
  38 | activation=mish
  39 | 
  40 | # Downsample
  41 | 
  42 | [convolutional]
  43 | batch_normalize=1
  44 | filters=64
  45 | size=3
  46 | stride=2
  47 | pad=1
  48 | activation=mish
  49 | 
  50 | [convolutional]
  51 | batch_normalize=1
  52 | filters=64
  53 | size=1
  54 | stride=1
  55 | pad=1
  56 | activation=mish
  57 | 
  58 | [route]
  59 | layers = -2
  60 | 
  61 | [convolutional]
  62 | batch_normalize=1
  63 | filters=64
  64 | size=1
  65 | stride=1
  66 | pad=1
  67 | activation=mish
  68 | 
  69 | [convolutional]
  70 | batch_normalize=1
  71 | filters=32
  72 | size=1
  73 | stride=1
  74 | pad=1
  75 | activation=mish
  76 | 
  77 | [convolutional]
  78 | batch_normalize=1
  79 | filters=64
  80 | size=3
  81 | stride=1
  82 | pad=1
  83 | activation=mish
  84 | 
  85 | [shortcut]
  86 | from=-3
  87 | activation=linear
  88 | 
  89 | [convolutional]
  90 | batch_normalize=1
  91 | filters=64
  92 | size=1
  93 | stride=1
  94 | pad=1
  95 | activation=mish
  96 | 
  97 | [route]
  98 | layers = -1,-7
  99 | 
 100 | [convolutional]
 101 | batch_normalize=1
 102 | filters=64
 103 | size=1
 104 | stride=1
 105 | pad=1
 106 | activation=mish
 107 | 
 108 | # Downsample
 109 | 
 110 | [convolutional]
 111 | batch_normalize=1
 112 | filters=128
 113 | size=3
 114 | stride=2
 115 | pad=1
 116 | activation=mish
 117 | 
 118 | [convolutional]
 119 | batch_normalize=1
 120 | filters=64
 121 | size=1
 122 | stride=1
 123 | pad=1
 124 | activation=mish
 125 | 
 126 | [route]
 127 | layers = -2
 128 | 
 129 | [convolutional]
 130 | batch_normalize=1
 131 | filters=64
 132 | size=1
 133 | stride=1
 134 | pad=1
 135 | activation=mish
 136 | 
 137 | [convolutional]
 138 | batch_normalize=1
 139 | filters=64
 140 | size=1
 141 | stride=1
 142 | pad=1
 143 | activation=mish
 144 | 
 145 | [convolutional]
 146 | batch_normalize=1
 147 | filters=64
 148 | size=3
 149 | stride=1
 150 | pad=1
 151 | activation=mish
 152 | 
 153 | [shortcut]
 154 | from=-3
 155 | activation=linear
 156 | 
 157 | [convolutional]
 158 | batch_normalize=1
 159 | filters=64
 160 | size=1
 161 | stride=1
 162 | pad=1
 163 | activation=mish
 164 | 
 165 | [convolutional]
 166 | batch_normalize=1
 167 | filters=64
 168 | size=3
 169 | stride=1
 170 | pad=1
 171 | activation=mish
 172 | 
 173 | [shortcut]
 174 | from=-3
 175 | activation=linear
 176 | 
 177 | [convolutional]
 178 | batch_normalize=1
 179 | filters=64
 180 | size=1
 181 | stride=1
 182 | pad=1
 183 | activation=mish
 184 | 
 185 | [route]
 186 | layers = -1,-10
 187 | 
 188 | [convolutional]
 189 | batch_normalize=1
 190 | filters=128
 191 | size=1
 192 | stride=1
 193 | pad=1
 194 | activation=mish
 195 | 
 196 | # Downsample
 197 | 
 198 | [convolutional]
 199 | batch_normalize=1
 200 | filters=256
 201 | size=3
 202 | stride=2
 203 | pad=1
 204 | activation=mish
 205 | 
 206 | [convolutional]
 207 | batch_normalize=1
 208 | filters=128
 209 | size=1
 210 | stride=1
 211 | pad=1
 212 | activation=mish
 213 | 
 214 | [route]
 215 | layers = -2
 216 | 
 217 | [convolutional]
 218 | batch_normalize=1
 219 | filters=128
 220 | size=1
 221 | stride=1
 222 | pad=1
 223 | activation=mish
 224 | 
 225 | [convolutional]
 226 | batch_normalize=1
 227 | filters=128
 228 | size=1
 229 | stride=1
 230 | pad=1
 231 | activation=mish
 232 | 
 233 | [convolutional]
 234 | batch_normalize=1
 235 | filters=128
 236 | size=3
 237 | stride=1
 238 | pad=1
 239 | activation=mish
 240 | 
 241 | [shortcut]
 242 | from=-3
 243 | activation=linear
 244 | 
 245 | [convolutional]
 246 | batch_normalize=1
 247 | filters=128
 248 | size=1
 249 | stride=1
 250 | pad=1
 251 | activation=mish
 252 | 
 253 | [convolutional]
 254 | batch_normalize=1
 255 | filters=128
 256 | size=3
 257 | stride=1
 258 | pad=1
 259 | activation=mish
 260 | 
 261 | [shortcut]
 262 | from=-3
 263 | activation=linear
 264 | 
 265 | [convolutional]
 266 | batch_normalize=1
 267 | filters=128
 268 | size=1
 269 | stride=1
 270 | pad=1
 271 | activation=mish
 272 | 
 273 | [convolutional]
 274 | batch_normalize=1
 275 | filters=128
 276 | size=3
 277 | stride=1
 278 | pad=1
 279 | activation=mish
 280 | 
 281 | [shortcut]
 282 | from=-3
 283 | activation=linear
 284 | 
 285 | [convolutional]
 286 | batch_normalize=1
 287 | filters=128
 288 | size=1
 289 | stride=1
 290 | pad=1
 291 | activation=mish
 292 | 
 293 | [convolutional]
 294 | batch_normalize=1
 295 | filters=128
 296 | size=3
 297 | stride=1
 298 | pad=1
 299 | activation=mish
 300 | 
 301 | [shortcut]
 302 | from=-3
 303 | activation=linear
 304 | 
 305 | 
 306 | [convolutional]
 307 | batch_normalize=1
 308 | filters=128
 309 | size=1
 310 | stride=1
 311 | pad=1
 312 | activation=mish
 313 | 
 314 | [convolutional]
 315 | batch_normalize=1
 316 | filters=128
 317 | size=3
 318 | stride=1
 319 | pad=1
 320 | activation=mish
 321 | 
 322 | [shortcut]
 323 | from=-3
 324 | activation=linear
 325 | 
 326 | [convolutional]
 327 | batch_normalize=1
 328 | filters=128
 329 | size=1
 330 | stride=1
 331 | pad=1
 332 | activation=mish
 333 | 
 334 | [convolutional]
 335 | batch_normalize=1
 336 | filters=128
 337 | size=3
 338 | stride=1
 339 | pad=1
 340 | activation=mish
 341 | 
 342 | [shortcut]
 343 | from=-3
 344 | activation=linear
 345 | 
 346 | [convolutional]
 347 | batch_normalize=1
 348 | filters=128
 349 | size=1
 350 | stride=1
 351 | pad=1
 352 | activation=mish
 353 | 
 354 | [convolutional]
 355 | batch_normalize=1
 356 | filters=128
 357 | size=3
 358 | stride=1
 359 | pad=1
 360 | activation=mish
 361 | 
 362 | [shortcut]
 363 | from=-3
 364 | activation=linear
 365 | 
 366 | [convolutional]
 367 | batch_normalize=1
 368 | filters=128
 369 | size=1
 370 | stride=1
 371 | pad=1
 372 | activation=mish
 373 | 
 374 | [convolutional]
 375 | batch_normalize=1
 376 | filters=128
 377 | size=3
 378 | stride=1
 379 | pad=1
 380 | activation=mish
 381 | 
 382 | [shortcut]
 383 | from=-3
 384 | activation=linear
 385 | 
 386 | [convolutional]
 387 | batch_normalize=1
 388 | filters=128
 389 | size=1
 390 | stride=1
 391 | pad=1
 392 | activation=mish
 393 | 
 394 | [route]
 395 | layers = -1,-28
 396 | 
 397 | [convolutional]
 398 | batch_normalize=1
 399 | filters=256
 400 | size=1
 401 | stride=1
 402 | pad=1
 403 | activation=mish
 404 | 
 405 | # Downsample
 406 | 
 407 | [convolutional]
 408 | batch_normalize=1
 409 | filters=512
 410 | size=3
 411 | stride=2
 412 | pad=1
 413 | activation=mish
 414 | 
 415 | [convolutional]
 416 | batch_normalize=1
 417 | filters=256
 418 | size=1
 419 | stride=1
 420 | pad=1
 421 | activation=mish
 422 | 
 423 | [route]
 424 | layers = -2
 425 | 
 426 | [convolutional]
 427 | batch_normalize=1
 428 | filters=256
 429 | size=1
 430 | stride=1
 431 | pad=1
 432 | activation=mish
 433 | 
 434 | [convolutional]
 435 | batch_normalize=1
 436 | filters=256
 437 | size=1
 438 | stride=1
 439 | pad=1
 440 | activation=mish
 441 | 
 442 | [convolutional]
 443 | batch_normalize=1
 444 | filters=256
 445 | size=3
 446 | stride=1
 447 | pad=1
 448 | activation=mish
 449 | 
 450 | [shortcut]
 451 | from=-3
 452 | activation=linear
 453 | 
 454 | 
 455 | [convolutional]
 456 | batch_normalize=1
 457 | filters=256
 458 | size=1
 459 | stride=1
 460 | pad=1
 461 | activation=mish
 462 | 
 463 | [convolutional]
 464 | batch_normalize=1
 465 | filters=256
 466 | size=3
 467 | stride=1
 468 | pad=1
 469 | activation=mish
 470 | 
 471 | [shortcut]
 472 | from=-3
 473 | activation=linear
 474 | 
 475 | 
 476 | [convolutional]
 477 | batch_normalize=1
 478 | filters=256
 479 | size=1
 480 | stride=1
 481 | pad=1
 482 | activation=mish
 483 | 
 484 | [convolutional]
 485 | batch_normalize=1
 486 | filters=256
 487 | size=3
 488 | stride=1
 489 | pad=1
 490 | activation=mish
 491 | 
 492 | [shortcut]
 493 | from=-3
 494 | activation=linear
 495 | 
 496 | 
 497 | [convolutional]
 498 | batch_normalize=1
 499 | filters=256
 500 | size=1
 501 | stride=1
 502 | pad=1
 503 | activation=mish
 504 | 
 505 | [convolutional]
 506 | batch_normalize=1
 507 | filters=256
 508 | size=3
 509 | stride=1
 510 | pad=1
 511 | activation=mish
 512 | 
 513 | [shortcut]
 514 | from=-3
 515 | activation=linear
 516 | 
 517 | 
 518 | [convolutional]
 519 | batch_normalize=1
 520 | filters=256
 521 | size=1
 522 | stride=1
 523 | pad=1
 524 | activation=mish
 525 | 
 526 | [convolutional]
 527 | batch_normalize=1
 528 | filters=256
 529 | size=3
 530 | stride=1
 531 | pad=1
 532 | activation=mish
 533 | 
 534 | [shortcut]
 535 | from=-3
 536 | activation=linear
 537 | 
 538 | 
 539 | [convolutional]
 540 | batch_normalize=1
 541 | filters=256
 542 | size=1
 543 | stride=1
 544 | pad=1
 545 | activation=mish
 546 | 
 547 | [convolutional]
 548 | batch_normalize=1
 549 | filters=256
 550 | size=3
 551 | stride=1
 552 | pad=1
 553 | activation=mish
 554 | 
 555 | [shortcut]
 556 | from=-3
 557 | activation=linear
 558 | 
 559 | 
 560 | [convolutional]
 561 | batch_normalize=1
 562 | filters=256
 563 | size=1
 564 | stride=1
 565 | pad=1
 566 | activation=mish
 567 | 
 568 | [convolutional]
 569 | batch_normalize=1
 570 | filters=256
 571 | size=3
 572 | stride=1
 573 | pad=1
 574 | activation=mish
 575 | 
 576 | [shortcut]
 577 | from=-3
 578 | activation=linear
 579 | 
 580 | [convolutional]
 581 | batch_normalize=1
 582 | filters=256
 583 | size=1
 584 | stride=1
 585 | pad=1
 586 | activation=mish
 587 | 
 588 | [convolutional]
 589 | batch_normalize=1
 590 | filters=256
 591 | size=3
 592 | stride=1
 593 | pad=1
 594 | activation=mish
 595 | 
 596 | [shortcut]
 597 | from=-3
 598 | activation=linear
 599 | 
 600 | [convolutional]
 601 | batch_normalize=1
 602 | filters=256
 603 | size=1
 604 | stride=1
 605 | pad=1
 606 | activation=mish
 607 | 
 608 | [route]
 609 | layers = -1,-28
 610 | 
 611 | [convolutional]
 612 | batch_normalize=1
 613 | filters=512
 614 | size=1
 615 | stride=1
 616 | pad=1
 617 | activation=mish
 618 | 
 619 | # Downsample
 620 | 
 621 | [convolutional]
 622 | batch_normalize=1
 623 | filters=1024
 624 | size=3
 625 | stride=2
 626 | pad=1
 627 | activation=mish
 628 | 
 629 | [convolutional]
 630 | batch_normalize=1
 631 | filters=512
 632 | size=1
 633 | stride=1
 634 | pad=1
 635 | activation=mish
 636 | 
 637 | [route]
 638 | layers = -2
 639 | 
 640 | [convolutional]
 641 | batch_normalize=1
 642 | filters=512
 643 | size=1
 644 | stride=1
 645 | pad=1
 646 | activation=mish
 647 | 
 648 | [convolutional]
 649 | batch_normalize=1
 650 | filters=512
 651 | size=1
 652 | stride=1
 653 | pad=1
 654 | activation=mish
 655 | 
 656 | [convolutional]
 657 | batch_normalize=1
 658 | filters=512
 659 | size=3
 660 | stride=1
 661 | pad=1
 662 | activation=mish
 663 | 
 664 | [shortcut]
 665 | from=-3
 666 | activation=linear
 667 | 
 668 | [convolutional]
 669 | batch_normalize=1
 670 | filters=512
 671 | size=1
 672 | stride=1
 673 | pad=1
 674 | activation=mish
 675 | 
 676 | [convolutional]
 677 | batch_normalize=1
 678 | filters=512
 679 | size=3
 680 | stride=1
 681 | pad=1
 682 | activation=mish
 683 | 
 684 | [shortcut]
 685 | from=-3
 686 | activation=linear
 687 | 
 688 | [convolutional]
 689 | batch_normalize=1
 690 | filters=512
 691 | size=1
 692 | stride=1
 693 | pad=1
 694 | activation=mish
 695 | 
 696 | [convolutional]
 697 | batch_normalize=1
 698 | filters=512
 699 | size=3
 700 | stride=1
 701 | pad=1
 702 | activation=mish
 703 | 
 704 | [shortcut]
 705 | from=-3
 706 | activation=linear
 707 | 
 708 | [convolutional]
 709 | batch_normalize=1
 710 | filters=512
 711 | size=1
 712 | stride=1
 713 | pad=1
 714 | activation=mish
 715 | 
 716 | [convolutional]
 717 | batch_normalize=1
 718 | filters=512
 719 | size=3
 720 | stride=1
 721 | pad=1
 722 | activation=mish
 723 | 
 724 | [shortcut]
 725 | from=-3
 726 | activation=linear
 727 | 
 728 | [convolutional]
 729 | batch_normalize=1
 730 | filters=512
 731 | size=1
 732 | stride=1
 733 | pad=1
 734 | activation=mish
 735 | 
 736 | [route]
 737 | layers = -1,-16
 738 | 
 739 | [convolutional]
 740 | batch_normalize=1
 741 | filters=1024
 742 | size=1
 743 | stride=1
 744 | pad=1
 745 | activation=mish
 746 | 
 747 | ##########################
 748 | 
 749 | [convolutional]
 750 | batch_normalize=1
 751 | filters=512
 752 | size=1
 753 | stride=1
 754 | pad=1
 755 | activation=leaky
 756 | 
 757 | [convolutional]
 758 | batch_normalize=1
 759 | size=3
 760 | stride=1
 761 | pad=1
 762 | filters=1024
 763 | activation=leaky
 764 | 
 765 | [convolutional]
 766 | batch_normalize=1
 767 | filters=512
 768 | size=1
 769 | stride=1
 770 | pad=1
 771 | activation=leaky
 772 | 
 773 | ### SPP ###
 774 | [maxpool]
 775 | stride=1
 776 | size=5
 777 | 
 778 | [route]
 779 | layers=-2
 780 | 
 781 | [maxpool]
 782 | stride=1
 783 | size=9
 784 | 
 785 | [route]
 786 | layers=-4
 787 | 
 788 | [maxpool]
 789 | stride=1
 790 | size=13
 791 | 
 792 | [route]
 793 | layers=-1,-3,-5,-6
 794 | ### End SPP ###
 795 | 
 796 | [convolutional]
 797 | batch_normalize=1
 798 | filters=512
 799 | size=1
 800 | stride=1
 801 | pad=1
 802 | activation=leaky
 803 | 
 804 | [convolutional]
 805 | batch_normalize=1
 806 | size=3
 807 | stride=1
 808 | pad=1
 809 | filters=1024
 810 | activation=leaky
 811 | 
 812 | [convolutional]
 813 | batch_normalize=1
 814 | filters=512
 815 | size=1
 816 | stride=1
 817 | pad=1
 818 | activation=leaky
 819 | 
 820 | [convolutional]
 821 | batch_normalize=1
 822 | filters=256
 823 | size=1
 824 | stride=1
 825 | pad=1
 826 | activation=leaky
 827 | 
 828 | [upsample]
 829 | stride=2
 830 | 
 831 | [route]
 832 | layers = 85
 833 | 
 834 | [convolutional]
 835 | batch_normalize=1
 836 | filters=256
 837 | size=1
 838 | stride=1
 839 | pad=1
 840 | activation=leaky
 841 | 
 842 | [route]
 843 | layers = -1, -3
 844 | 
 845 | [convolutional]
 846 | batch_normalize=1
 847 | filters=256
 848 | size=1
 849 | stride=1
 850 | pad=1
 851 | activation=leaky
 852 | 
 853 | [convolutional]
 854 | batch_normalize=1
 855 | size=3
 856 | stride=1
 857 | pad=1
 858 | filters=512
 859 | activation=leaky
 860 | 
 861 | [convolutional]
 862 | batch_normalize=1
 863 | filters=256
 864 | size=1
 865 | stride=1
 866 | pad=1
 867 | activation=leaky
 868 | 
 869 | [convolutional]
 870 | batch_normalize=1
 871 | size=3
 872 | stride=1
 873 | pad=1
 874 | filters=512
 875 | activation=leaky
 876 | 
 877 | [convolutional]
 878 | batch_normalize=1
 879 | filters=256
 880 | size=1
 881 | stride=1
 882 | pad=1
 883 | activation=leaky
 884 | 
 885 | [convolutional]
 886 | batch_normalize=1
 887 | filters=128
 888 | size=1
 889 | stride=1
 890 | pad=1
 891 | activation=leaky
 892 | 
 893 | [upsample]
 894 | stride=2
 895 | 
 896 | [route]
 897 | layers = 54
 898 | 
 899 | [convolutional]
 900 | batch_normalize=1
 901 | filters=128
 902 | size=1
 903 | stride=1
 904 | pad=1
 905 | activation=leaky
 906 | 
 907 | [route]
 908 | layers = -1, -3
 909 | 
 910 | [convolutional]
 911 | batch_normalize=1
 912 | filters=128
 913 | size=1
 914 | stride=1
 915 | pad=1
 916 | activation=leaky
 917 | 
 918 | [convolutional]
 919 | batch_normalize=1
 920 | size=3
 921 | stride=1
 922 | pad=1
 923 | filters=256
 924 | activation=leaky
 925 | 
 926 | [convolutional]
 927 | batch_normalize=1
 928 | filters=128
 929 | size=1
 930 | stride=1
 931 | pad=1
 932 | activation=leaky
 933 | 
 934 | [convolutional]
 935 | batch_normalize=1
 936 | size=3
 937 | stride=1
 938 | pad=1
 939 | filters=256
 940 | activation=leaky
 941 | 
 942 | [convolutional]
 943 | batch_normalize=1
 944 | filters=128
 945 | size=1
 946 | stride=1
 947 | pad=1
 948 | activation=leaky
 949 | 
 950 | ##########################
 951 | 
 952 | [convolutional]
 953 | batch_normalize=1
 954 | size=3
 955 | stride=1
 956 | pad=1
 957 | filters=256
 958 | activation=leaky
 959 | 
 960 | [convolutional]
 961 | size=1
 962 | stride=1
 963 | pad=1
 964 | filters=255
 965 | activation=linear
 966 | 
 967 | 
 968 | [yolo]
 969 | mask = 0,1,2
 970 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
 971 | classes=80
 972 | num=9
 973 | jitter=.3
 974 | ignore_thresh = .7
 975 | truth_thresh = 1
 976 | scale_x_y = 1.2
 977 | iou_thresh=0.213
 978 | cls_normalizer=1.0
 979 | iou_normalizer=0.07
 980 | iou_loss=ciou
 981 | nms_kind=greedynms
 982 | beta_nms=0.6
 983 | max_delta=5
 984 | 
 985 | 
 986 | [route]
 987 | layers = -4
 988 | 
 989 | [convolutional]
 990 | batch_normalize=1
 991 | size=3
 992 | stride=2
 993 | pad=1
 994 | filters=256
 995 | activation=leaky
 996 | 
 997 | [route]
 998 | layers = -1, -16
 999 | 
1000 | [convolutional]
1001 | batch_normalize=1
1002 | filters=256
1003 | size=1
1004 | stride=1
1005 | pad=1
1006 | activation=leaky
1007 | 
1008 | [convolutional]
1009 | batch_normalize=1
1010 | size=3
1011 | stride=1
1012 | pad=1
1013 | filters=512
1014 | activation=leaky
1015 | 
1016 | [convolutional]
1017 | batch_normalize=1
1018 | filters=256
1019 | size=1
1020 | stride=1
1021 | pad=1
1022 | activation=leaky
1023 | 
1024 | [convolutional]
1025 | batch_normalize=1
1026 | size=3
1027 | stride=1
1028 | pad=1
1029 | filters=512
1030 | activation=leaky
1031 | 
1032 | [convolutional]
1033 | batch_normalize=1
1034 | filters=256
1035 | size=1
1036 | stride=1
1037 | pad=1
1038 | activation=leaky
1039 | 
1040 | [convolutional]
1041 | batch_normalize=1
1042 | size=3
1043 | stride=1
1044 | pad=1
1045 | filters=512
1046 | activation=leaky
1047 | 
1048 | [convolutional]
1049 | size=1
1050 | stride=1
1051 | pad=1
1052 | filters=255
1053 | activation=linear
1054 | 
1055 | 
1056 | [yolo]
1057 | mask = 3,4,5
1058 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1059 | classes=80
1060 | num=9
1061 | jitter=.3
1062 | ignore_thresh = .7
1063 | truth_thresh = 1
1064 | scale_x_y = 1.1
1065 | iou_thresh=0.213
1066 | cls_normalizer=1.0
1067 | iou_normalizer=0.07
1068 | iou_loss=ciou
1069 | nms_kind=greedynms
1070 | beta_nms=0.6
1071 | max_delta=5
1072 | 
1073 | 
1074 | [route]
1075 | layers = -4
1076 | 
1077 | [convolutional]
1078 | batch_normalize=1
1079 | size=3
1080 | stride=2
1081 | pad=1
1082 | filters=512
1083 | activation=leaky
1084 | 
1085 | [route]
1086 | layers = -1, -37
1087 | 
1088 | [convolutional]
1089 | batch_normalize=1
1090 | filters=512
1091 | size=1
1092 | stride=1
1093 | pad=1
1094 | activation=leaky
1095 | 
1096 | [convolutional]
1097 | batch_normalize=1
1098 | size=3
1099 | stride=1
1100 | pad=1
1101 | filters=1024
1102 | activation=leaky
1103 | 
1104 | [convolutional]
1105 | batch_normalize=1
1106 | filters=512
1107 | size=1
1108 | stride=1
1109 | pad=1
1110 | activation=leaky
1111 | 
1112 | [convolutional]
1113 | batch_normalize=1
1114 | size=3
1115 | stride=1
1116 | pad=1
1117 | filters=1024
1118 | activation=leaky
1119 | 
1120 | [convolutional]
1121 | batch_normalize=1
1122 | filters=512
1123 | size=1
1124 | stride=1
1125 | pad=1
1126 | activation=leaky
1127 | 
1128 | [convolutional]
1129 | batch_normalize=1
1130 | size=3
1131 | stride=1
1132 | pad=1
1133 | filters=1024
1134 | activation=leaky
1135 | 
1136 | [convolutional]
1137 | size=1
1138 | stride=1
1139 | pad=1
1140 | filters=255
1141 | activation=linear
1142 | 
1143 | 
1144 | [yolo]
1145 | mask = 6,7,8
1146 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1147 | classes=80
1148 | num=9
1149 | jitter=.3
1150 | ignore_thresh = .7
1151 | truth_thresh = 1
1152 | random=1
1153 | scale_x_y = 1.05
1154 | iou_thresh=0.213
1155 | cls_normalizer=1.0
1156 | iou_normalizer=0.07
1157 | iou_loss=ciou
1158 | nms_kind=greedynms
1159 | beta_nms=0.6
1160 | max_delta=5
1161 | 
1162 | 


--------------------------------------------------------------------------------
/darknet_ros/src/YoloObjectDetector.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * YoloObjectDetector.cpp
  3 |  *
  4 |  *  Created on: Dec 19, 2016
  5 |  *      Author: Marko Bjelonic
  6 |  *   Institute: ETH Zurich, Robotic Systems Lab
  7 |  */
  8 | 
  9 | // yolo object detector
 10 | #include "darknet_ros/YoloObjectDetector.hpp"
 11 | 
 12 | // Check for xServer
 13 | #include <X11/Xlib.h>
 14 | 
 15 | #ifdef DARKNET_FILE_PATH
 16 | std::string darknetFilePath_ = DARKNET_FILE_PATH;
 17 | #else
 18 | #error Path of darknet repository is not defined in CMakeLists.txt.
 19 | #endif
 20 | 
 21 | namespace darknet_ros {
 22 | 
 23 | char* cfg;
 24 | char* weights;
 25 | char* data;
 26 | char** detectionNames;
 27 | 
 28 | YoloObjectDetector::YoloObjectDetector(ros::NodeHandle nh)
 29 |     : nodeHandle_(nh), imageTransport_(nodeHandle_), numClasses_(0), classLabels_(0), rosBoxes_(0), rosBoxCounter_(0) {
 30 |   ROS_INFO("[YoloObjectDetector] Node started.");
 31 | 
 32 |   // Read parameters from config file.
 33 |   if (!readParameters()) {
 34 |     ros::requestShutdown();
 35 |   }
 36 | 
 37 |   init();
 38 | }
 39 | 
 40 | YoloObjectDetector::~YoloObjectDetector() {
 41 |   {
 42 |     boost::unique_lock<boost::shared_mutex> lockNodeStatus(mutexNodeStatus_);
 43 |     isNodeRunning_ = false;
 44 |   }
 45 |   yoloThread_.join();
 46 | }
 47 | 
 48 | bool YoloObjectDetector::readParameters() {
 49 |   // Load common parameters.
 50 |   nodeHandle_.param("image_view/enable_opencv", viewImage_, true);
 51 |   nodeHandle_.param("image_view/wait_key_delay", waitKeyDelay_, 3);
 52 |   nodeHandle_.param("image_view/enable_console_output", enableConsoleOutput_, false);
 53 | 
 54 |   // Check if Xserver is running on Linux.
 55 |   if (XOpenDisplay(NULL)) {
 56 |     // Do nothing!
 57 |     ROS_INFO("[YoloObjectDetector] Xserver is running.");
 58 |   } else {
 59 |     ROS_INFO("[YoloObjectDetector] Xserver is not running.");
 60 |     viewImage_ = false;
 61 |   }
 62 | 
 63 |   // Set vector sizes.
 64 |   nodeHandle_.param("yolo_model/detection_classes/names", classLabels_, std::vector<std::string>(0));
 65 |   numClasses_ = classLabels_.size();
 66 |   rosBoxes_ = std::vector<std::vector<RosBox_> >(numClasses_);
 67 |   rosBoxCounter_ = std::vector<int>(numClasses_);
 68 | 
 69 |   return true;
 70 | }
 71 | 
 72 | void YoloObjectDetector::init() {
 73 |   ROS_INFO("[YoloObjectDetector] init().");
 74 | 
 75 |   // Initialize deep network of darknet.
 76 |   std::string weightsPath;
 77 |   std::string configPath;
 78 |   std::string dataPath;
 79 |   std::string configModel;
 80 |   std::string weightsModel;
 81 | 
 82 |   // Threshold of object detection.
 83 |   float thresh;
 84 |   nodeHandle_.param("yolo_model/threshold/value", thresh, (float)0.3);
 85 | 
 86 |   // Path to weights file.
 87 |   nodeHandle_.param("yolo_model/weight_file/name", weightsModel, std::string("yolov2-tiny.weights"));
 88 |   nodeHandle_.param("weights_path", weightsPath, std::string("/default"));
 89 |   weightsPath += "/" + weightsModel;
 90 |   weights = new char[weightsPath.length() + 1];
 91 |   strcpy(weights, weightsPath.c_str());
 92 | 
 93 |   // Path to config file.
 94 |   nodeHandle_.param("yolo_model/config_file/name", configModel, std::string("yolov2-tiny.cfg"));
 95 |   nodeHandle_.param("config_path", configPath, std::string("/default"));
 96 |   configPath += "/" + configModel;
 97 |   cfg = new char[configPath.length() + 1];
 98 |   strcpy(cfg, configPath.c_str());
 99 | 
100 |   // Path to data folder.
101 |   dataPath = darknetFilePath_;
102 |   dataPath += "/data";
103 |   data = new char[dataPath.length() + 1];
104 |   strcpy(data, dataPath.c_str());
105 | 
106 |   // Get classes.
107 |   detectionNames = (char**)realloc((void*)detectionNames, (numClasses_ + 1) * sizeof(char*));
108 |   for (int i = 0; i < numClasses_; i++) {
109 |     detectionNames[i] = new char[classLabels_[i].length() + 1];
110 |     strcpy(detectionNames[i], classLabels_[i].c_str());
111 |   }
112 | 
113 |   // Load network.
114 |   setupNetwork(cfg, weights, data, thresh, detectionNames, numClasses_, 0, 0, 1, 0.5, 0, 0, 0, 0);
115 |   yoloThread_ = std::thread(&YoloObjectDetector::yolo, this);
116 | 
117 |   // Initialize publisher and subscriber.
118 |   std::string cameraTopicName;
119 |   int cameraQueueSize;
120 |   std::string objectDetectorTopicName;
121 |   int objectDetectorQueueSize;
122 |   bool objectDetectorLatch;
123 |   std::string boundingBoxesTopicName;
124 |   int boundingBoxesQueueSize;
125 |   bool boundingBoxesLatch;
126 |   std::string detectionImageTopicName;
127 |   int detectionImageQueueSize;
128 |   bool detectionImageLatch;
129 | 
130 |   nodeHandle_.param("subscribers/camera_reading/topic", cameraTopicName, std::string("/camera/image_raw"));
131 |   nodeHandle_.param("subscribers/camera_reading/queue_size", cameraQueueSize, 1);
132 |   nodeHandle_.param("publishers/object_detector/topic", objectDetectorTopicName, std::string("found_object"));
133 |   nodeHandle_.param("publishers/object_detector/queue_size", objectDetectorQueueSize, 1);
134 |   nodeHandle_.param("publishers/object_detector/latch", objectDetectorLatch, false);
135 |   nodeHandle_.param("publishers/bounding_boxes/topic", boundingBoxesTopicName, std::string("bounding_boxes"));
136 |   nodeHandle_.param("publishers/bounding_boxes/queue_size", boundingBoxesQueueSize, 1);
137 |   nodeHandle_.param("publishers/bounding_boxes/latch", boundingBoxesLatch, false);
138 |   nodeHandle_.param("publishers/detection_image/topic", detectionImageTopicName, std::string("detection_image"));
139 |   nodeHandle_.param("publishers/detection_image/queue_size", detectionImageQueueSize, 1);
140 |   nodeHandle_.param("publishers/detection_image/latch", detectionImageLatch, true);
141 | 
142 |   imageSubscriber_ = imageTransport_.subscribe(cameraTopicName, cameraQueueSize, &YoloObjectDetector::cameraCallback, this);
143 |   objectPublisher_ =
144 |       nodeHandle_.advertise<darknet_ros_msgs::ObjectCount>(objectDetectorTopicName, objectDetectorQueueSize, objectDetectorLatch);
145 |   boundingBoxesPublisher_ =
146 |       nodeHandle_.advertise<darknet_ros_msgs::BoundingBoxes>(boundingBoxesTopicName, boundingBoxesQueueSize, boundingBoxesLatch);
147 |   detectionImagePublisher_ =
148 |       nodeHandle_.advertise<sensor_msgs::Image>(detectionImageTopicName, detectionImageQueueSize, detectionImageLatch);
149 | 
150 |   // Action servers.
151 |   std::string checkForObjectsActionName;
152 |   nodeHandle_.param("actions/camera_reading/topic", checkForObjectsActionName, std::string("check_for_objects"));
153 |   checkForObjectsActionServer_.reset(new CheckForObjectsActionServer(nodeHandle_, checkForObjectsActionName, false));
154 |   checkForObjectsActionServer_->registerGoalCallback(boost::bind(&YoloObjectDetector::checkForObjectsActionGoalCB, this));
155 |   checkForObjectsActionServer_->registerPreemptCallback(boost::bind(&YoloObjectDetector::checkForObjectsActionPreemptCB, this));
156 |   checkForObjectsActionServer_->start();
157 | }
158 | 
159 | void YoloObjectDetector::cameraCallback(const sensor_msgs::ImageConstPtr& msg) {
160 |   ROS_DEBUG("[YoloObjectDetector] USB image received.");
161 | 
162 |   cv_bridge::CvImagePtr cam_image;
163 | 
164 |   try {
165 |     cam_image = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
166 |   } catch (cv_bridge::Exception& e) {
167 |     ROS_ERROR("cv_bridge exception: %s", e.what());
168 |     return;
169 |   }
170 | 
171 |   if (cam_image) {
172 |     {
173 |       boost::unique_lock<boost::shared_mutex> lockImageCallback(mutexImageCallback_);
174 |       imageHeader_ = msg->header;
175 |       camImageCopy_ = cam_image->image.clone();
176 |     }
177 |     {
178 |       boost::unique_lock<boost::shared_mutex> lockImageStatus(mutexImageStatus_);
179 |       imageStatus_ = true;
180 |     }
181 |     frameWidth_ = cam_image->image.size().width;
182 |     frameHeight_ = cam_image->image.size().height;
183 |   }
184 |   return;
185 | }
186 | 
187 | void YoloObjectDetector::checkForObjectsActionGoalCB() {
188 |   ROS_DEBUG("[YoloObjectDetector] Start check for objects action.");
189 | 
190 |   boost::shared_ptr<const darknet_ros_msgs::CheckForObjectsGoal> imageActionPtr = checkForObjectsActionServer_->acceptNewGoal();
191 |   sensor_msgs::Image imageAction = imageActionPtr->image;
192 | 
193 |   cv_bridge::CvImagePtr cam_image;
194 | 
195 |   try {
196 |     cam_image = cv_bridge::toCvCopy(imageAction, sensor_msgs::image_encodings::BGR8);
197 |   } catch (cv_bridge::Exception& e) {
198 |     ROS_ERROR("cv_bridge exception: %s", e.what());
199 |     return;
200 |   }
201 | 
202 |   if (cam_image) {
203 |     {
204 |       boost::unique_lock<boost::shared_mutex> lockImageCallback(mutexImageCallback_);
205 |       camImageCopy_ = cam_image->image.clone();
206 |     }
207 |     {
208 |       boost::unique_lock<boost::shared_mutex> lockImageCallback(mutexActionStatus_);
209 |       actionId_ = imageActionPtr->id;
210 |     }
211 |     {
212 |       boost::unique_lock<boost::shared_mutex> lockImageStatus(mutexImageStatus_);
213 |       imageStatus_ = true;
214 |     }
215 |     frameWidth_ = cam_image->image.size().width;
216 |     frameHeight_ = cam_image->image.size().height;
217 |   }
218 |   return;
219 | }
220 | 
221 | void YoloObjectDetector::checkForObjectsActionPreemptCB() {
222 |   ROS_DEBUG("[YoloObjectDetector] Preempt check for objects action.");
223 |   checkForObjectsActionServer_->setPreempted();
224 | }
225 | 
226 | bool YoloObjectDetector::isCheckingForObjects() const {
227 |   return (ros::ok() && checkForObjectsActionServer_->isActive() && !checkForObjectsActionServer_->isPreemptRequested());
228 | }
229 | 
230 | bool YoloObjectDetector::publishDetectionImage(const cv::Mat& detectionImage) {
231 |   if (detectionImagePublisher_.getNumSubscribers() < 1) return false;
232 |   cv_bridge::CvImage cvImage;
233 |   cvImage.header.stamp = ros::Time::now();
234 |   cvImage.header.frame_id = "detection_image";
235 |   cvImage.encoding = sensor_msgs::image_encodings::BGR8;
236 |   cvImage.image = detectionImage;
237 |   detectionImagePublisher_.publish(*cvImage.toImageMsg());
238 |   ROS_DEBUG("Detection image has been published.");
239 |   return true;
240 | }
241 | 
242 | // double YoloObjectDetector::getWallTime()
243 | // {
244 | //   struct timeval time;
245 | //   if (gettimeofday(&time, NULL)) {
246 | //     return 0;
247 | //   }
248 | //   return (double) time.tv_sec + (double) time.tv_usec * .000001;
249 | // }
250 | 
251 | int YoloObjectDetector::sizeNetwork(network* net) {
252 |   int i;
253 |   int count = 0;
254 |   for (i = 0; i < net->n; ++i) {
255 |     layer l = net->layers[i];
256 |     if (l.type == YOLO || l.type == REGION || l.type == DETECTION) {
257 |       count += l.outputs;
258 |     }
259 |   }
260 |   return count;
261 | }
262 | 
263 | void YoloObjectDetector::rememberNetwork(network* net) {
264 |   int i;
265 |   int count = 0;
266 |   for (i = 0; i < net->n; ++i) {
267 |     layer l = net->layers[i];
268 |     if (l.type == YOLO || l.type == REGION || l.type == DETECTION) {
269 |       memcpy(predictions_[demoIndex_] + count, net->layers[i].output, sizeof(float) * l.outputs);
270 |       count += l.outputs;
271 |     }
272 |   }
273 | }
274 | 
275 | detection* YoloObjectDetector::avgPredictions(network* net, int* nboxes) {
276 |   int i, j;
277 |   int count = 0;
278 |   fill_cpu(demoTotal_, 0, avg_, 1);
279 |   for (j = 0; j < demoFrame_; ++j) {
280 |     axpy_cpu(demoTotal_, 1. / demoFrame_, predictions_[j], 1, avg_, 1);
281 |   }
282 |   for (i = 0; i < net->n; ++i) {
283 |     layer l = net->layers[i];
284 |     if (l.type == YOLO || l.type == REGION || l.type == DETECTION) {
285 |       memcpy(l.output, avg_ + count, sizeof(float) * l.outputs);
286 |       count += l.outputs;
287 |     }
288 |   }
289 |   // detection* dets = get_network_boxes(net, buff_[0].w, buff_[0].h, demoThresh_, demoHier_, 0, 1, nboxes);
290 |   detection* dets = get_network_boxes(net, buff_[0].w, buff_[0].h, demoThresh_, demoHier_, 0, 1, nboxes, 1);
291 |   return dets;
292 | }
293 | 
294 | void* YoloObjectDetector::detectInThread() {
295 |   running_ = 1;
296 |   float nms = .4;
297 | 
298 |   layer l = net_->layers[net_->n - 1];
299 |   float* X = buffLetter_[(buffIndex_ + 2) % 3].data;
300 |   float* prediction = network_predict(*net_, X);
301 | 
302 |   rememberNetwork(net_);
303 |   detection* dets = 0;
304 |   int nboxes = 0;
305 |   dets = avgPredictions(net_, &nboxes);
306 | 
307 |   if (nms > 0) do_nms_obj(dets, nboxes, l.classes, nms);
308 | 
309 |   if (enableConsoleOutput_) {
310 |     printf("\033[2J");
311 |     printf("\033[1;1H");
312 |     printf("\nFPS:%.1f\n", fps_);
313 |     printf("Objects:\n\n");
314 |   }
315 |   image display = buff_[(buffIndex_ + 2) % 3];
316 |   // draw_detections(display, dets, nboxes, demoThresh_, demoNames_, demoAlphabet_, demoClasses_, 1);
317 |   draw_detections_v3(display, dets, nboxes, demoThresh_, demoNames_, demoAlphabet_, demoClasses_, 1);
318 | 
319 | 
320 |   // extract the bounding boxes and send them to ROS
321 |   int i, j;
322 |   int count = 0;
323 |   for (i = 0; i < nboxes; ++i) {
324 |     float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.;
325 |     float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.;
326 |     float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.;
327 |     float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.;
328 | 
329 |     if (xmin < 0) xmin = 0;
330 |     if (ymin < 0) ymin = 0;
331 |     if (xmax > 1) xmax = 1;
332 |     if (ymax > 1) ymax = 1;
333 | 
334 |     // iterate through possible boxes and collect the bounding boxes
335 |     for (j = 0; j < demoClasses_; ++j) {
336 |       if (dets[i].prob[j]) {
337 |         float x_center = (xmin + xmax) / 2;
338 |         float y_center = (ymin + ymax) / 2;
339 |         float BoundingBox_width = xmax - xmin;
340 |         float BoundingBox_height = ymax - ymin;
341 | 
342 |         // define bounding box
343 |         // BoundingBox must be 1% size of frame (3.2x2.4 pixels)
344 |         if (BoundingBox_width > 0.01 && BoundingBox_height > 0.01) {
345 |           roiBoxes_[count].x = x_center;
346 |           roiBoxes_[count].y = y_center;
347 |           roiBoxes_[count].w = BoundingBox_width;
348 |           roiBoxes_[count].h = BoundingBox_height;
349 |           roiBoxes_[count].Class = j;
350 |           roiBoxes_[count].prob = dets[i].prob[j];
351 |           count++;
352 |         }
353 |       }
354 |     }
355 |   }
356 | 
357 |   // create array to store found bounding boxes
358 |   // if no object detected, make sure that ROS knows that num = 0
359 |   if (count == 0) {
360 |     roiBoxes_[0].num = 0;
361 |   } else {
362 |     roiBoxes_[0].num = count;
363 |   }
364 | 
365 |   free_detections(dets, nboxes);
366 |   demoIndex_ = (demoIndex_ + 1) % demoFrame_;
367 |   running_ = 0;
368 |   return 0;
369 | }
370 | 
371 | void* YoloObjectDetector::fetchInThread() {
372 |   {
373 |     boost::shared_lock<boost::shared_mutex> lock(mutexImageCallback_);
374 |     IplImageWithHeader_ imageAndHeader = getIplImageWithHeader();
375 |     IplImage* ROS_img = imageAndHeader.image;
376 |     ipl_into_image(ROS_img, buff_[buffIndex_]);
377 |     headerBuff_[buffIndex_] = imageAndHeader.header;
378 |     buffId_[buffIndex_] = actionId_;
379 |   }
380 |   rgbgr_image(buff_[buffIndex_]);
381 |   letterbox_image_into(buff_[buffIndex_], net_->w, net_->h, buffLetter_[buffIndex_]);
382 |   return 0;
383 | }
384 | 
385 | void* YoloObjectDetector::displayInThread(void* ptr) {
386 |   show_image_cv(buff_[(buffIndex_ + 1) % 3], "YOLO V4");
387 |   int c = cv::waitKey(waitKeyDelay_);
388 |   if (c != -1) c = c % 256;
389 |   if (c == 27) {
390 |     demoDone_ = 1;
391 |     return 0;
392 |   } else if (c == 82) {
393 |     demoThresh_ += .02;
394 |   } else if (c == 84) {
395 |     demoThresh_ -= .02;
396 |     if (demoThresh_ <= .02) demoThresh_ = .02;
397 |   } else if (c == 83) {
398 |     demoHier_ += .02;
399 |   } else if (c == 81) {
400 |     demoHier_ -= .02;
401 |     if (demoHier_ <= .0) demoHier_ = .0;
402 |   }
403 |   return 0;
404 | }
405 | 
406 | void* YoloObjectDetector::displayLoop(void* ptr) {
407 |   while (1) {
408 |     displayInThread(0);
409 |   }
410 | }
411 | 
412 | void* YoloObjectDetector::detectLoop(void* ptr) {
413 |   while (1) {
414 |     detectInThread();
415 |   }
416 | }
417 | 
418 | void YoloObjectDetector::setupNetwork(char* cfgfile, char* weightfile, char* datafile, float thresh, char** names, int classes, int delay,
419 |                                       char* prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen) {
420 |   demoPrefix_ = prefix;
421 |   demoDelay_ = delay;
422 |   demoFrame_ = avg_frames;
423 |   image** alphabet = load_alphabet_with_file(datafile);
424 |   demoNames_ = names;
425 |   demoAlphabet_ = alphabet;
426 |   demoClasses_ = classes;
427 |   demoThresh_ = thresh;
428 |   demoHier_ = hier;
429 |   fullScreen_ = fullscreen;
430 |   printf("YOLO V4\n");
431 |   net_ = load_network(cfgfile, weightfile, 0);
432 |   set_batch_network(net_, 1);
433 | }
434 | 
435 | void YoloObjectDetector::yolo() {
436 |   const auto wait_duration = std::chrono::milliseconds(2000);
437 |   while (!getImageStatus()) {
438 |     printf("Waiting for image.\n");
439 |     if (!isNodeRunning()) {
440 |       return;
441 |     }
442 |     std::this_thread::sleep_for(wait_duration);
443 |   }
444 | 
445 |   std::thread detect_thread;
446 |   std::thread fetch_thread;
447 | 
448 |   srand(2222222);
449 | 
450 |   int i;
451 |   demoTotal_ = sizeNetwork(net_);
452 |   predictions_ = (float**)calloc(demoFrame_, sizeof(float*));
453 |   for (i = 0; i < demoFrame_; ++i) {
454 |     predictions_[i] = (float*)calloc(demoTotal_, sizeof(float));
455 |   }
456 |   avg_ = (float*)calloc(demoTotal_, sizeof(float));
457 | 
458 |   layer l = net_->layers[net_->n - 1];
459 |   roiBoxes_ = (darknet_ros::RosBox_*)calloc(l.w * l.h * l.n, sizeof(darknet_ros::RosBox_));
460 | 
461 |   {
462 |     boost::shared_lock<boost::shared_mutex> lock(mutexImageCallback_);
463 |     IplImageWithHeader_ imageAndHeader = getIplImageWithHeader();
464 |     IplImage* ROS_img = imageAndHeader.image;
465 |     buff_[0] = ipl_to_image(ROS_img);
466 |     headerBuff_[0] = imageAndHeader.header;
467 |   }
468 |   buff_[1] = copy_image(buff_[0]);
469 |   buff_[2] = copy_image(buff_[0]);
470 |   headerBuff_[1] = headerBuff_[0];
471 |   headerBuff_[2] = headerBuff_[0];
472 |   buffLetter_[0] = letterbox_image(buff_[0], net_->w, net_->h);
473 |   buffLetter_[1] = letterbox_image(buff_[0], net_->w, net_->h);
474 |   buffLetter_[2] = letterbox_image(buff_[0], net_->w, net_->h);
475 |   ipl_ = cvCreateImage(cvSize(buff_[0].w, buff_[0].h), IPL_DEPTH_8U, buff_[0].c);
476 | 
477 |   int count = 0;
478 | 
479 |   if (!demoPrefix_ && viewImage_) {
480 |     cv::namedWindow("YOLO V4", cv::WINDOW_NORMAL);
481 |     if (fullScreen_) {
482 |       cv::setWindowProperty("YOLO V4", cv::WND_PROP_FULLSCREEN, cv::WINDOW_FULLSCREEN);
483 |     } else {
484 |       cv::moveWindow("YOLO V4", 0, 0);
485 |       cv::resizeWindow("YOLO V4", 640, 480);
486 |     }
487 |   }
488 | 
489 |   demoTime_ = what_time_is_it_now();
490 | 
491 |   while (!demoDone_) {
492 |     buffIndex_ = (buffIndex_ + 1) % 3;
493 |     fetch_thread = std::thread(&YoloObjectDetector::fetchInThread, this);
494 |     detect_thread = std::thread(&YoloObjectDetector::detectInThread, this);
495 |     if (!demoPrefix_) {
496 |       fps_ = 1. / (what_time_is_it_now() - demoTime_);
497 |       demoTime_ = what_time_is_it_now();
498 |       if (viewImage_) {
499 |         displayInThread(0);
500 |       } else {
501 |         generate_image(buff_[(buffIndex_ + 1) % 3], ipl_);
502 |       }
503 |       publishInThread();
504 |     } else {
505 |       char name[256];
506 |       sprintf(name, "%s_%08d", demoPrefix_, count);
507 |       save_image(buff_[(buffIndex_ + 1) % 3], name);
508 |     }
509 |     fetch_thread.join();
510 |     detect_thread.join();
511 |     ++count;
512 |     if (!isNodeRunning()) {
513 |       demoDone_ = true;
514 |     }
515 |   }
516 | }
517 | 
518 | IplImageWithHeader_ YoloObjectDetector::getIplImageWithHeader() {
519 |   IplImage* ROS_img = new IplImage(camImageCopy_);
520 |   IplImageWithHeader_ header = {.image = ROS_img, .header = imageHeader_};
521 |   return header;
522 | }
523 | 
524 | bool YoloObjectDetector::getImageStatus(void) {
525 |   boost::shared_lock<boost::shared_mutex> lock(mutexImageStatus_);
526 |   return imageStatus_;
527 | }
528 | 
529 | bool YoloObjectDetector::isNodeRunning(void) {
530 |   boost::shared_lock<boost::shared_mutex> lock(mutexNodeStatus_);
531 |   return isNodeRunning_;
532 | }
533 | 
534 | void* YoloObjectDetector::publishInThread() {
535 |   // Publish image.
536 |   cv::Mat cvImage = cv::cvarrToMat(ipl_);
537 |   if (!publishDetectionImage(cv::Mat(cvImage))) {
538 |     ROS_DEBUG("Detection image has not been broadcasted.");
539 |   }
540 | 
541 |   // Publish bounding boxes and detection result.
542 |   int num = roiBoxes_[0].num;
543 |   if (num > 0 && num <= 100) {
544 |     for (int i = 0; i < num; i++) {
545 |       for (int j = 0; j < numClasses_; j++) {
546 |         if (roiBoxes_[i].Class == j) {
547 |           rosBoxes_[j].push_back(roiBoxes_[i]);
548 |           rosBoxCounter_[j]++;
549 |         }
550 |       }
551 |     }
552 | 
553 |     darknet_ros_msgs::ObjectCount msg;
554 |     msg.header.stamp = ros::Time::now();
555 |     msg.header.frame_id = "detection";
556 |     msg.count = num;
557 |     objectPublisher_.publish(msg);
558 | 
559 |     for (int i = 0; i < numClasses_; i++) {
560 |       if (rosBoxCounter_[i] > 0) {
561 |         darknet_ros_msgs::BoundingBox boundingBox;
562 | 
563 |         for (int j = 0; j < rosBoxCounter_[i]; j++) {
564 |           int xmin = (rosBoxes_[i][j].x - rosBoxes_[i][j].w / 2) * frameWidth_;
565 |           int ymin = (rosBoxes_[i][j].y - rosBoxes_[i][j].h / 2) * frameHeight_;
566 |           int xmax = (rosBoxes_[i][j].x + rosBoxes_[i][j].w / 2) * frameWidth_;
567 |           int ymax = (rosBoxes_[i][j].y + rosBoxes_[i][j].h / 2) * frameHeight_;
568 | 
569 |           boundingBox.Class = classLabels_[i];
570 |           boundingBox.id = i;
571 |           boundingBox.probability = rosBoxes_[i][j].prob;
572 |           boundingBox.xmin = xmin;
573 |           boundingBox.ymin = ymin;
574 |           boundingBox.xmax = xmax;
575 |           boundingBox.ymax = ymax;
576 |           boundingBoxesResults_.bounding_boxes.push_back(boundingBox);
577 |         }
578 |       }
579 |     }
580 |     boundingBoxesResults_.header.stamp = ros::Time::now();
581 |     boundingBoxesResults_.header.frame_id = "detection";
582 |     boundingBoxesResults_.image_header = headerBuff_[(buffIndex_ + 1) % 3];
583 |     boundingBoxesPublisher_.publish(boundingBoxesResults_);
584 |   } else {
585 |     darknet_ros_msgs::ObjectCount msg;
586 |     msg.header.stamp = ros::Time::now();
587 |     msg.header.frame_id = "detection";
588 |     msg.count = 0;
589 |     objectPublisher_.publish(msg);
590 |   }
591 |   if (isCheckingForObjects()) {
592 |     ROS_DEBUG("[YoloObjectDetector] check for objects in image.");
593 |     darknet_ros_msgs::CheckForObjectsResult objectsActionResult;
594 |     objectsActionResult.id = buffId_[0];
595 |     objectsActionResult.bounding_boxes = boundingBoxesResults_;
596 |     checkForObjectsActionServer_->setSucceeded(objectsActionResult, "Send bounding boxes.");
597 |   }
598 |   boundingBoxesResults_.bounding_boxes.clear();
599 |   for (int i = 0; i < numClasses_; i++) {
600 |     rosBoxes_[i].clear();
601 |     rosBoxCounter_[i] = 0;
602 |   }
603 | 
604 |   return 0;
605 | }
606 | 
607 | } /* namespace darknet_ros*/
608 | 


--------------------------------------------------------------------------------