├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── dependencies
    ├── install_protobuf-3.8.0.sh
    └── install_pycuda.sh
├── docs
    ├── results.gif
    ├── yolov4_custom_1.png
    ├── yolov4_custom_2.png
    └── yolov4_custom_3.png
├── launch
    ├── video_source.launch
    ├── yolov3_trt.launch
    ├── yolov4_trt.launch
    └── yolov4_trt_batch.launch
├── msg
    ├── BoundingBox2D.msg
    ├── Detector2D.msg
    ├── Detector2DArray.msg
    └── ObjectHypothesis.msg
├── package.xml
├── plugins
    ├── Makefile
    ├── README.md
    ├── libyolo_layer.so
    ├── yolo_layer.cu
    ├── yolo_layer.h
    └── yolo_layer.o
├── setup.py
├── src
    ├── image_converter.cpp
    ├── image_converter.h
    ├── node_video_source.cpp
    ├── ros_compat.cpp
    └── ros_compat.h
├── trt_yolo_v3.py
├── trt_yolo_v4.py
├── trt_yolo_v4_batch.py
├── utils
    ├── __init__.py
    ├── display.py
    ├── visualization.py
    ├── yolo_classes.py
    ├── yolo_with_plugins.py
    └── yolo_with_plugins_batch.py
└── yolo
    ├── __pycache__
        └── plugins.cpython-36.pyc
    ├── convert_yolo_trt.sh
    ├── download_yolo.sh
    ├── onnx_to_tensorrt.py
    ├── plugins.py
    ├── requirements.txt
    └── yolo_to_onnx.py


/.gitignore:
--------------------------------------------------------------------------------
1 | yolo/yolov4-416.trt
2 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.0.2)
 2 | project(yolov4_trt_ros)
 3 | 
 4 | find_package(OpenCV 3.0 REQUIRED)
 5 | find_package(jetson-utils REQUIRED)
 6 | find_package(jetson-inference REQUIRED)
 7 | find_package(CUDA REQUIRED)
 8 | 
 9 | catkin_python_setup()
10 | 
11 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/package.xml ${CMAKE_CURRENT_SOURCE_DIR}/package.xml COPYONLY)
12 | 
13 | find_package(catkin REQUIRED COMPONENTS
14 |   image_transport
15 |   rospy
16 |   sensor_msgs
17 |   vision_msgs
18 |   cv_bridge
19 |   message_generation
20 | )
21 | 
22 | add_message_files(
23 |   FILES
24 |   Detector2D.msg
25 |   Detector2DArray.msg
26 |   BoundingBox2D.msg
27 |   ObjectHypothesis.msg
28 | )
29 | 
30 | generate_messages(DEPENDENCIES std_msgs vision_msgs)
31 | 	 
32 | catkin_package(
33 |   CATKIN_DEPENDS nodelet rospy image_transport sensor_msgs vision_msgs cv_bridge message_runtime
34 | )
35 | 
36 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")	# enable c++11 (TensorRT requirement)
37 | include_directories(${catkin_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS})
38 | add_definitions(-DROS1)
39 | 
40 | set(common_src src/image_converter.cpp src/ros_compat.cpp)
41 | 
42 | add_executable(video_source src/node_video_source.cpp ${common_src})
43 | target_link_libraries(video_source ${catkin_LIBRARIES} jetson-inference)
44 | 
45 | catkin_install_python(PROGRAMS trt_yolo_v4_batch.py
46 |   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
47 | )
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Indra Kurniawan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # YOLOv4 with TensorRT engine
  2 | 
  3 | This package contains the yolov4_trt_node that performs the inference using NVIDIA's TensorRT engine
  4 | 
  5 | This package works for both YOLOv3 and YOLOv4. Do change the commands accordingly, corresponding to the YOLO model used.
  6 | 
  7 | 
  8 | ![Video_Result2](docs/results.gif)
  9 | 
 10 | ---
 11 | ## Setting up the environment
 12 | 
 13 | ### Install dependencies
 14 | 
 15 | ### Current Environment:
 16 | 
 17 | - Jetson Xavier AGX
 18 | - ROS Melodic
 19 | - Ubuntu 18.04
 20 | - Jetpack 4.4
 21 | - TensorRT 7+
 22 | 
 23 | #### Dependencies:
 24 | 
 25 | - OpenCV 3.x
 26 | - numpy 1.15.1
 27 | - Protobuf 3.8.0
 28 | - Pycuda 2019.1.2
 29 | - onnx 1.4.1 (depends on Protobuf)
 30 | 
 31 | ### Install all dependencies with below commands
 32 | 
 33 | ```
 34 | Install pycuda (takes awhile)
 35 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/dependencies
 36 | $ ./install_pycuda.sh
 37 | 
 38 | Install Protobuf (takes awhile)
 39 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/dependencies
 40 | $ ./install_protobuf-3.8.0.sh
 41 | 
 42 | Install onnx (depends on Protobuf above)
 43 | $ sudo pip3 install onnx==1.4.1
 44 | ```
 45 | 
 46 | * Please also install [jetson-inference](https://github.com/dusty-nv/ros_deep_learning#jetson-inference)
 47 | * Note: This package uses similar nodes to ros_deep_learning package. Please place a CATKIN_IGNORE in that package to avoid similar node name catkin_make error
 48 | * If these scripts do not work for you, do refer to this amazing repository by [jefflgaol](https://github.com/jefflgaol/Install-Packages-Jetson-ARM-Family) on installing the above packages and more on Jetson ARM devices.
 49 | ---
 50 | ## Setting up the package
 51 | 
 52 | ### 1. Clone project into catkin_ws and build it
 53 | 
 54 | ``` 
 55 | $ cd ~/catkin_ws && catkin_make
 56 | $ source devel/setup.bash
 57 | ```
 58 | 
 59 | ### 2. Make libyolo_layer.so
 60 | 
 61 | ```
 62 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/plugins
 63 | $ make
 64 | ```
 65 | 
 66 | This will generate a libyolo_layer.so file
 67 | 
 68 | ### 3. Place your yolo.weights and yolo.cfg file in the yolo folder
 69 | 
 70 | ```
 71 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/yolo
 72 | ```
 73 | ** Please name the yolov4.weights and yolov4.cfg file as follows:
 74 | - yolov4.weights
 75 | - yolov4.cfg
 76 | 
 77 | Run the conversion script to convert to TensorRT engine file
 78 | 
 79 | ```
 80 | $ ./convert_yolo_trt
 81 | ```
 82 | 
 83 | - Input the appropriate arguments
 84 | - This conversion might take awhile
 85 | - The optimised TensorRT engine would now be saved as yolov3-416.trt / yolov4-416.trt
 86 | 
 87 | ### 4. Change the class labels
 88 | 
 89 | ```
 90 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/utils
 91 | $ vim yolo_classes.py
 92 | ```
 93 | 
 94 | - Change the class labels to suit your model
 95 | 
 96 | ### 5. Change the video_input and topic_name
 97 | 
 98 | ```
 99 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/launch
100 | ```
101 | 
102 | - `yolov3_trt.launch` : change the topic_name
103 | 
104 | - `yolov4_trt.launch` : change the topic_name
105 | 
106 | - `video_source.launch` : change the input format (refer to this [Link](https://github.com/dusty-nv/jetson-inference/blob/master/docs/aux-streaming.md)
107 | 
108 |    * video_source.launch requires jetson-inference to be installed
109 |    * Default input is CSI camera
110 | 
111 | ---
112 | ## Using the package
113 | 
114 | ### Running the package
115 | 
116 | Note: Run the launch files separately in different terminals
117 | 
118 | ### 1. Run the video_source 
119 | 
120 | ```
121 | # For csi input
122 | $ roslaunch yolov4_trt_ros video_source.launch input:=csi://0
123 | 
124 | # For video input
125 | $ roslaunch yolov4_trt_ros video_source.launch input:=/path_to_video/video.mp4
126 | 
127 | # For USB camera
128 | $ roslaunch yolov4_trt_ros video_source.launch input:=v4l2://0
129 | ```
130 | 
131 | ### 2. Run the yolo detector
132 | 
133 | ```
134 | # For YOLOv3 (single input)
135 | $ roslaunch yolov4_trt_ros yolov3_trt.launch
136 | 
137 | # For YOLOv4 (single input)
138 | $ roslaunch yolov4_trt_ros yolov4_trt.launch
139 | 
140 | # For YOLOv4 (multiple input)
141 | $ roslaunch yolov4_trt_ros yolov4_trt_batch.launch
142 | ```
143 | 
144 | ### 3. For maximum performance
145 | 
146 | ```
147 | $ cd /usr/bin/
148 | $ sudo ./nvpmodel -m 0	# Enable 2 Denver CPU
149 | $ sudo ./jetson_clock	# Maximise CPU/GPU performance
150 | ```
151 | 
152 | * These commands are found/referred in this [forum post](https://forums.developer.nvidia.com/t/nvpmodel-and-jetson-clocks/58659/2)
153 | * Please ensure the jetson device is cooled appropriately to prevent overheating
154 | 
155 | ### Parameters
156 | 
157 | - str model = "yolov3" or "yolov4" 
158 | - str model_path = "/abs_path_to_model/"
159 | - int input_shape = 288/416/608
160 | - int category_num = 80
161 | - double conf_th = 0.5
162 | - bool show_img = True
163 | 
164 | - Default Input FPS from CSI camera = 30.0
165 | * To change this, go to jetson-inference/utils/camera/gstCamera.cpp 
166 | 
167 | ``` 
168 | # In line 359, change this line
169 | mOptions.frameRate = 15
170 | 
171 | # To desired frame_rate
172 | mOptions.frameRate = desired_frame_rate
173 | ``` 
174 | ---
175 | ## Results obtained
176 | 
177 | ### Inference Results
178 | #### Single Camera Input
179 | 
180 |    | Model    | Hardware |    FPS    |  Inference Time (ms)  | 
181 |    |:---------|:--:|:---------:|:----------------:|
182 |    | Yolov4-416| Xavier AGX | 40.0 | 0.025 |
183 |    | Yolov4-416| Jetson Tx2 | 16.0 | 0.0625 |
184 | 
185 |    
186 | * Will be adding inference tests for YOLOv3/4 and YOLOv3-tiny/YOLOv4-tiny for different Jetson devices and multiple camera inputs inference tests in the future
187 | 
188 | ### 1. Screenshots 
189 | 
190 | ### Video_in: .mp4 video (640x360)
191 | ### Tests Done on Xavier AGX
192 | ### Avg FPS : ~38 FPS
193 | 
194 | ![Video_Result1](docs/yolov4_custom_1.png)
195 | 
196 | ![Video_Result2](docs/yolov4_custom_2.png)
197 | 
198 | ![Video_Result3](docs/yolov4_custom_3.png)
199 | 
200 | ---
201 | ## Licenses and References
202 | 
203 | ### 1. TensorRT samples from [jkjung-avt](https://github.com/jkjung-avt/) 
204 | 
205 | Many thanks for his project with tensorrt samples. I have referenced his source code and adapted it to ROS for robotics applications.
206 | 
207 | I also used the pycuda and protobuf installation script from his project
208 | 
209 | Those code are under [MIT License](https://github.com/jkjung-avt/tensorrt_demos/blob/master/LICENSE)
210 | 
211 | ### 2. Jetson-inference from [dusty-nv](https://github.com/dusty-nv/ros_deep_learning#jetson-inference)
212 | 
213 | Many thanks for his work on the Jetson Inference with ROS. I have used his video_source input from his project for capturing video inputs. 
214 | 
215 | Those code are under [NVIDIA License](https://github.com/dusty-nv/jetson-inference/blob/master/LICENSE.md) 


--------------------------------------------------------------------------------
/dependencies/install_protobuf-3.8.0.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | folder=${HOME}/src
 6 | mkdir -p $folder
 7 | 
 8 | echo "** Install requirements"
 9 | sudo apt-get install -y autoconf libtool
10 | 
11 | echo "** Download protobuf-3.8.0 sources"
12 | cd $folder
13 | if [ ! -f protobuf-python-3.8.0.zip ]; then
14 |   wget https://github.com/protocolbuffers/protobuf/releases/download/v3.8.0/protobuf-python-3.8.0.zip
15 | fi
16 | if [ ! -f protoc-3.8.0-linux-aarch_64.zip ]; then
17 |   wget https://github.com/protocolbuffers/protobuf/releases/download/v3.8.0/protoc-3.8.0-linux-aarch_64.zip
18 | fi
19 | 
20 | echo "** Install protoc"
21 | unzip protobuf-python-3.8.0.zip
22 | unzip protoc-3.8.0-linux-aarch_64.zip -d protoc-3.8.0
23 | sudo cp protoc-3.8.0/bin/protoc /usr/local/bin/protoc
24 | 
25 | echo "** Build and install protobuf-3.8.0 libraries"
26 | export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp
27 | cd protobuf-3.8.0/
28 | ./autogen.sh
29 | ./configure --prefix=/usr/local
30 | make -j$(nproc)
31 | make check
32 | sudo make install
33 | sudo ldconfig
34 | 
35 | echo "** Update python3 protobuf module"
36 | # remove previous installation of python3 protobuf module
37 | sudo pip3 uninstall -y protobuf
38 | sudo pip3 install Cython
39 | cd python/
40 | python3 setup.py build --cpp_implementation
41 | python3 setup.py test --cpp_implementation
42 | sudo python3 setup.py install --cpp_implementation
43 | 
44 | echo "** Build protobuf-3.8.0 successfully"
45 | 


--------------------------------------------------------------------------------
/dependencies/install_pycuda.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Reference for installing 'pycuda': https://wiki.tiker.net/PyCuda/Installation/Linux/Ubuntu
 4 | 
 5 | set -e
 6 | 
 7 | if ! which nvcc > /dev/null; then
 8 |   echo "ERROR: nvcc not found"
 9 |   exit
10 | fi
11 | 
12 | arch=$(uname -m)
13 | folder=${HOME}/src
14 | mkdir -p $folder
15 | 
16 | echo "** Install requirements"
17 | sudo apt-get install -y build-essential python3-dev
18 | sudo apt-get install -y libboost-python-dev libboost-thread-dev
19 | sudo pip3 install setuptools
20 | 
21 | boost_pylib=$(basename /usr/lib/${arch}-linux-gnu/libboost_python*-py3?.so)
22 | boost_pylibname=${boost_pylib%.so}
23 | boost_pyname=${boost_pylibname/lib/}
24 | 
25 | echo "** Download pycuda-2019.1.2 sources"
26 | pushd $folder
27 | if [ ! -f pycuda-2019.1.2.tar.gz ]; then
28 |   wget https://files.pythonhosted.org/packages/5e/3f/5658c38579b41866ba21ee1b5020b8225cec86fe717e4b1c5c972de0a33c/pycuda-2019.1.2.tar.gz
29 | fi
30 | 
31 | echo "** Build and install pycuda-2019.1.2"
32 | CPU_CORES=$(nproc)
33 | echo "** cpu cores available: " $CPU_CORES
34 | tar xzvf pycuda-2019.1.2.tar.gz
35 | cd pycuda-2019.1.2
36 | python3 ./configure.py --python-exe=/usr/bin/python3 --cuda-root=/usr/local/cuda --cudadrv-lib-dir=/usr/lib/${arch}-linux-gnu --boost-inc-dir=/usr/include --boost-lib-dir=/usr/lib/${arch}-linux-gnu --boost-python-libname=${boost_pyname} --boost-thread-libname=boost_thread --no-use-shipped-boost
37 | make -j$CPU_CORES
38 | python3 setup.py build
39 | sudo python3 setup.py install
40 | 
41 | popd
42 | 
43 | python3 -c "import pycuda; print('pycuda version:', pycuda.VERSION)"
44 | 


--------------------------------------------------------------------------------
/docs/results.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/results.gif


--------------------------------------------------------------------------------
/docs/yolov4_custom_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_1.png


--------------------------------------------------------------------------------
/docs/yolov4_custom_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_2.png


--------------------------------------------------------------------------------
/docs/yolov4_custom_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_3.png


--------------------------------------------------------------------------------
/launch/video_source.launch:
--------------------------------------------------------------------------------
 1 | <launch>
 2 | 	<arg name="input" default="csi://0"/>
 3 | 	<arg name="input_width" default="0"/>
 4 | 	<arg name="input_height" default="0"/>
 5 | 	<arg name="input_codec" default="unknown"/>
 6 | 	<arg name="input_loop" default="0"/>
 7 | 
 8 | 	<node pkg="yolov4_trt_ros" type="video_source" name="video_source" output="screen">
 9 | 		<param name="resource" value="$(arg input)"/>
10 | 		<param name="width" value="$(arg input_width)"/>
11 | 		<param name="height" value="$(arg input_height)"/>
12 | 		<param name="loop" value="$(arg input_loop)"/>
13 | 	</node>
14 | 
15 | </launch>
16 | 


--------------------------------------------------------------------------------
/launch/yolov3_trt.launch:
--------------------------------------------------------------------------------
 1 | <launch>
 2 | 	<!-- yolov4_trt_node -->
 3 | 	
 4 | 
 5 | 	<node pkg="yolov4_trt_ros" type="trt_yolo_v3.py" name="yolov4_trt_node" output="screen">
 6 | 		<param name="topic_name" type="string" value="/video_source/raw" />
 7 | 		<param name="model" type="string" value="yolov3" />	
 8 | 		<param name="model_path" type="string" value="$(find yolov4_trt_ros)/yolo/" />
 9 | 		<param name="input_shape" type="string" value="416" />
10 | 		<param name="category_number" type="int" value="80" />
11 | 		<param name="confidence_threshold" type="double" value="0.5" />
12 | 		<param name="show_image" type="bool" value="True" />
13 | 	</node>
14 | </launch>
15 | 


--------------------------------------------------------------------------------
/launch/yolov4_trt.launch:
--------------------------------------------------------------------------------
 1 | <launch>
 2 | 	<!-- yolov4_trt_node -->
 3 | 	<node pkg="yolov4_trt_ros" type="trt_yolo_v4.py" name="yolov4_trt_node" output="screen">		
 4 | 		<param name="video_topic" type="string" value="/video_source/raw" />
 5 | 		<param name="model" type="string" value="yolov4" />	
 6 | 		<param name="model_path" type="string" value="$(find yolov4_trt_ros)/yolo/" />
 7 | 		<param name="input_shape" type="string" value="416" />
 8 | 		<param name="category_number" type="int" value="80" />
 9 | 		<param name="confidence_threshold" type="double" value="0.5" />
10 | 		<param name="show_image" type="bool" value="True" />
11 | 	</node>
12 | </launch>
13 | 


--------------------------------------------------------------------------------
/launch/yolov4_trt_batch.launch:
--------------------------------------------------------------------------------
 1 | <launch>
 2 | 	<!-- yolov4_trt_node -->
 3 | 	<node pkg="yolov4_trt_ros" type="trt_yolo_v4_batch.py" name="yolov4_trt_node_batch" output="screen">
 4 | 		<param name="video_topic1" type="string" value="/zed/zed_node/left_raw/image_raw_color" />
 5 | 		<param name="video_topic2" type="string" value="/zed/zed_node/right_raw/image_raw_color" />
 6 | 		<param name="model" type="string" value="yolov4" />	
 7 | 		<param name="model_path" type="string" value="$(find yolov4_trt_ros)/yolo/" />
 8 | 		<param name="input_shape" type="string" value="416" />
 9 | 		<param name="category_number" type="int" value="80" />
10 | 		<param name="confidence_threshold" type="double" value="0.5" />
11 | 		<param name="show_image" type="bool" value="True" />
12 | 	</node>
13 | </launch>
14 | 


--------------------------------------------------------------------------------
/msg/BoundingBox2D.msg:
--------------------------------------------------------------------------------
 1 | # A 2D bounding box that can be rotated about its center.
 2 | # All dimensions are in pixels, but represented using floating-point
 3 | #   values to allow sub-pixel precision. If an exact pixel crop is required
 4 | #   for a rotated bounding box, it can be calculated using Bresenham's line
 5 | #   algorithm.
 6 | 
 7 | # The 2D position (in pixels) and orientation of the bounding box center.
 8 | geometry_msgs/Pose2D center
 9 | 
10 | # The size (in pixels) of the bounding box surrounding the object relative
11 | #   to the pose of its center.
12 | float64 size_x
13 | float64 size_y
14 | 


--------------------------------------------------------------------------------
/msg/Detector2D.msg:
--------------------------------------------------------------------------------
 1 | # Defines a 2D detection result.
 2 | #
 3 | # This is similar to a 2D classification, but includes position information,
 4 | #   allowing a classification result for a specific crop or image point to
 5 | #   to be located in the larger image.
 6 | 
 7 | Header header
 8 | 
 9 | # Class probabilities
10 | ObjectHypothesis results
11 | 
12 | # 2D bounding box surrounding the object.
13 | BoundingBox2D bbox
14 | 


--------------------------------------------------------------------------------
/msg/Detector2DArray.msg:
--------------------------------------------------------------------------------
1 | # A list of 2D detections, for a multi-object 2D detector.
2 | 
3 | Header header
4 | 
5 | # A list of the detected proposals. A multi-proposal detector might generate
6 | #   this list with many candidate detections generated from a single input.
7 | Detector2D[] detections
8 | 


--------------------------------------------------------------------------------
/msg/ObjectHypothesis.msg:
--------------------------------------------------------------------------------
 1 | # An object hypothesis that contains no position information.
 2 | 
 3 | # The unique numeric ID of object detected. To get additional information about
 4 | #   this ID, such as its human-readable name, listeners should perform a lookup
 5 | #   in a metadata database. See vision_msgs/VisionInfo.msg for more detail.
 6 | int64 id
 7 | 
 8 | # The probability or confidence value of the detected object. By convention,
 9 | #   this value should lie in the range [0-1].
10 | float64 score
11 | 


--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package format="2">
 3 |   <name>yolov4_trt_ros</name>
 4 |   <version>0.0.0</version>
 5 |   <description>The yolov4_trt_ros package</description>
 6 | 
 7 |   <maintainer email="e0175651@u.nus.edu">indra</maintainer>
 8 | 
 9 |   <license>BSD</license>
10 |   <buildtool_depend>catkin</buildtool_depend>
11 | 
12 |   <build_depend>cv_bridge</build_depend>
13 |   <build_depend>image_transport</build_depend>
14 |   <build_depend>rospy</build_depend>
15 |   <build_depend>sensor_msgs</build_depend>
16 |   <build_depend>vision_msgs</build_depend>
17 |   <build_depend>nodelet</build_depend>
18 |   <build_depend>message_generation</build_depend>
19 | 
20 |   <build_export_depend>cv_bridge</build_export_depend>
21 |   <build_export_depend>image_transport</build_export_depend>
22 |   <build_export_depend>rospy</build_export_depend>
23 |   <build_export_depend>sensor_msgs</build_export_depend>
24 |   <build_export_depend>vision_msgs</build_export_depend>
25 |   <build_export_depend>nodelet</build_export_depend>
26 |   
27 |   <exec_depend>cv_bridge</exec_depend>
28 |   <exec_depend>image_transport</exec_depend>
29 |   <exec_depend>rospy</exec_depend>
30 |   <exec_depend>sensor_msgs</exec_depend>
31 |   <exec_depend>vision_msgs</exec_depend>
32 |   <exec_depend>nodelet</exec_depend>
33 |   <exec_depend>message_runtime</exec_depend>
34 | </package>
35 | 


--------------------------------------------------------------------------------
/plugins/Makefile:
--------------------------------------------------------------------------------
 1 | CC=g++
 2 | LD=ld
 3 | CXXFLAGS=-Wall -std=c++11 -g -O
 4 | 
 5 | NVCC=nvcc
 6 | 
 7 | cpu_arch=$(shell uname -m)
 8 | ifeq ($(cpu_arch), aarch64)  # Jetson
 9 |   chip_id=$(shell cat /sys/module/tegra_fuse/parameters/tegra_chip_id)
10 |   ifeq ($(chip_id), 33)       # Nano and TX1
11 |     compute=53
12 |   else ifeq ($(chip_id), 24)  # TX2
13 |     compute=62
14 |   else ifeq ($(chip_id), 25)  # Xavier NX
15 |     compute=72
16 |   else
17 |     $(error Cannot determine cuda compute automatically, so please modify Makefile manually)
18 |   endif
19 | else ifeq ($(cpu_arch), x86_64)  # x86_64 PC
20 |   $(warning "compute=75" is for GeForce RTX-2080 Ti.  Please make sure CUDA compute is set correctly for your system in the Makefile.)
21 |   compute=75
22 | else
23 |   $(error Unkown CPU architecture: neither "aarch" nor "x86_64")
24 | endif
25 | 
26 | NVCCFLAGS=-m64 -gencode arch=compute_$(compute),code=sm_$(compute) \
27 |                -gencode arch=compute_$(compute),code=compute_$(compute)
28 | 
29 | # These are the directories where I installed TensorRT on my x86_64 PC.
30 | TENSORRT_INCS=-I"/usr/local/TensorRT-7.1.3.4/include"
31 | TENSORRT_LIBS=-L"/usr/local/TensorRT-7.1.3.4/lib"
32 | 
33 | # INCS and LIBS
34 | INCS=-I"/usr/local/cuda/include" $(TENSORRT_INCS) -I"/usr/local/include" -I"plugin"
35 | LIBS=-L"/usr/local/cuda/lib64" $(TENSORRT_LIBS) -L"/usr/local/lib" -Wl,--start-group -lnvinfer -lnvparsers -lnvinfer_plugin -lcudnn -lcublas -lcudart_static -lnvToolsExt -lcudart -lrt -ldl -lpthread -Wl,--end-group
36 | 
37 | .PHONY: all clean
38 | 
39 | all: libyolo_layer.so
40 | 
41 | clean:
42 | 	rm -f *.so *.o
43 | 
44 | libyolo_layer.so: yolo_layer.o
45 | 	$(CC) -shared -o $@ $< $(LIBS)
46 | 
47 | yolo_layer.o: yolo_layer.cu yolo_layer.h
48 | 	$(NVCC) -ccbin $(CC) $(INCS) $(NVCCFLAGS) -Xcompiler -fPIC -c -o $@ $<
49 | 


--------------------------------------------------------------------------------
/plugins/README.md:
--------------------------------------------------------------------------------
1 | The "yolo_layer.h" and "yolo_layer.cu" were taken and modified from [wang-xinyu/tensorrtx/yolov4](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4).  The original code is under [MIT License](https://github.com/wang-xinyu/tensorrtx/blob/master/LICENSE).
2 | 


--------------------------------------------------------------------------------
/plugins/libyolo_layer.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/plugins/libyolo_layer.so


--------------------------------------------------------------------------------
/plugins/yolo_layer.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * yolo_layer.cu
  3 |  *
  4 |  * This code was originally written by wang-xinyu under MIT license.
  5 |  * I took it from:
  6 |  *
  7 |  *     https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4
  8 |  *
  9 |  * and made necessary modifications.
 10 |  *
 11 |  * - JK Jung
 12 |  */
 13 | 
 14 | #include "yolo_layer.h"
 15 | 
 16 | using namespace Yolo;
 17 | 
 18 | namespace
 19 | {
 20 | // Write values into buffer
 21 | template <typename T>
 22 | void write(char*& buffer, const T& val)
 23 | {
 24 |     *reinterpret_cast<T*>(buffer) = val;
 25 |     buffer += sizeof(T);
 26 | }
 27 | 
 28 | // Read values from buffer
 29 | template <typename T>
 30 | void read(const char*& buffer, T& val)
 31 | {
 32 |     val = *reinterpret_cast<const T*>(buffer);
 33 |     buffer += sizeof(T);
 34 | }
 35 | } // namespace
 36 | 
 37 | namespace nvinfer1
 38 | {
 39 |     YoloLayerPlugin::YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y)
 40 |     {
 41 |         mYoloWidth   = yolo_width;
 42 |         mYoloHeight  = yolo_height;
 43 |         mNumAnchors  = num_anchors;
 44 |         memcpy(mAnchorsHost, anchors, num_anchors * 2 * sizeof(float));
 45 |         mNumClasses  = num_classes;
 46 |         mInputWidth  = input_width;
 47 |         mInputHeight = input_height;
 48 |         mScaleXY     = scale_x_y;
 49 | 
 50 |         CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float)));
 51 |         CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice));
 52 |     }
 53 | 
 54 |     YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
 55 |     {
 56 |         const char *d = reinterpret_cast<const char *>(data), *a = d;
 57 |         read(d, mThreadCount);
 58 |         read(d, mYoloWidth);
 59 |         read(d, mYoloHeight);
 60 |         read(d, mNumAnchors);
 61 |         memcpy(mAnchorsHost, d, MAX_ANCHORS * 2 * sizeof(float));
 62 |         d += MAX_ANCHORS * 2 * sizeof(float);
 63 |         read(d, mNumClasses);
 64 |         read(d, mInputWidth);
 65 |         read(d, mInputHeight);
 66 |         read(d, mScaleXY);
 67 | 
 68 |         CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float)));
 69 |         CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice));
 70 | 
 71 |         assert(d == a + length);
 72 |     }
 73 | 
 74 |     void YoloLayerPlugin::serialize(void* buffer) const
 75 |     {
 76 |         char* d = static_cast<char*>(buffer), *a = d;
 77 |         write(d, mThreadCount);
 78 |         write(d, mYoloWidth);
 79 |         write(d, mYoloHeight);
 80 |         write(d, mNumAnchors);
 81 |         memcpy(d, mAnchorsHost, MAX_ANCHORS * 2 * sizeof(float));
 82 |         d += MAX_ANCHORS * 2 * sizeof(float);
 83 |         write(d, mNumClasses);
 84 |         write(d, mInputWidth);
 85 |         write(d, mInputHeight);
 86 |         write(d, mScaleXY);
 87 | 
 88 |         assert(d == a + getSerializationSize());
 89 |     }
 90 | 
 91 |     size_t YoloLayerPlugin::getSerializationSize() const
 92 |     {
 93 |         return sizeof(mThreadCount) + \
 94 |                sizeof(mYoloWidth) + sizeof(mYoloHeight) + \
 95 |                sizeof(mNumAnchors) + MAX_ANCHORS * 2 * sizeof(float) + \
 96 |                sizeof(mNumClasses) + \
 97 |                sizeof(mInputWidth) + sizeof(mInputHeight) + \
 98 |                sizeof(mScaleXY);
 99 |     }
100 | 
101 |     int YoloLayerPlugin::initialize()
102 |     {
103 |         return 0;
104 |     }
105 | 
106 |     void YoloLayerPlugin::terminate()
107 |     {
108 |         CHECK(cudaFree(mAnchors));
109 |     }
110 | 
111 |     Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
112 |     {
113 |         //output the result to channel
114 |         int totalsize = mYoloWidth * mYoloHeight * mNumAnchors * sizeof(Detection) / sizeof(float);
115 |         return Dims3(totalsize, 1, 1);
116 |     }
117 | 
118 |     void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
119 |     {
120 |         mPluginNamespace = pluginNamespace;
121 |     }
122 | 
123 |     const char* YoloLayerPlugin::getPluginNamespace() const
124 |     {
125 |         return mPluginNamespace;
126 |     }
127 | 
128 |     // Return the DataType of the plugin output at the requested index
129 |     DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
130 |     {
131 |         return DataType::kFLOAT;
132 |     }
133 | 
134 |     // Return true if output tensor is broadcast across a batch.
135 |     bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
136 |     {
137 |         return false;
138 |     }
139 | 
140 |     // Return true if plugin can use input that is broadcast across batch without replication.
141 |     bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
142 |     {
143 |         return false;
144 |     }
145 | 
146 |     void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
147 |     {
148 |     }
149 | 
150 |     // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
151 |     void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
152 |     {
153 |     }
154 | 
155 |     // Detach the plugin object from its execution context.
156 |     void YoloLayerPlugin::detachFromContext()
157 |     {
158 |     }
159 | 
160 |     const char* YoloLayerPlugin::getPluginType() const
161 |     {
162 |         return "YoloLayer_TRT";
163 |     }
164 | 
165 |     const char* YoloLayerPlugin::getPluginVersion() const
166 |     {
167 |         return "1";
168 |     }
169 | 
170 |     void YoloLayerPlugin::destroy()
171 |     {
172 |         delete this;
173 |     }
174 | 
175 |     // Clone the plugin
176 |     IPluginV2IOExt* YoloLayerPlugin::clone() const
177 |     {
178 |         YoloLayerPlugin *p = new YoloLayerPlugin(mYoloWidth, mYoloHeight, mNumAnchors, (float*) mAnchorsHost, mNumClasses, mInputWidth, mInputHeight, mScaleXY);
179 |         p->setPluginNamespace(mPluginNamespace);
180 |         return p;
181 |     }
182 | 
183 |     inline __device__ float sigmoidGPU(float x) { return 1.0f / (1.0f + __expf(-x)); }
184 | 
185 |     inline __device__ float scale_sigmoidGPU(float x, float scale)
186 |     {
187 |         return scale * sigmoidGPU(x) - (scale - 1.0f) * 0.5f;
188 |     }
189 | 
190 |     // CalDetection(): This kernel processes 1 yolo layer calculation.  It
191 |     // distributes calculations so that 1 GPU thread would be responsible
192 |     // for each grid/anchor combination.
193 |     // NOTE: The output (x, y, w, h) are between 0.0 and 1.0
194 |     //       (relative to orginal image width and height).
195 |     __global__ void CalDetection(const float *input, float *output, int yolo_width, int yolo_height, int num_anchors,
196 |                                  const float *anchors, int num_classes, int input_w, int input_h, float scale_x_y)
197 |     {
198 |         int idx = threadIdx.x + blockDim.x * blockIdx.x;
199 |         Detection* det = ((Detection*) output) + idx;
200 |         int total_grids = yolo_width * yolo_height;
201 |         if (idx >= total_grids * num_anchors) return;
202 | 
203 |         int anchor_idx = idx / total_grids;
204 |         idx = idx - total_grids * anchor_idx;
205 |         int info_len = 5 + num_classes;
206 |         const float* cur_input = input + anchor_idx * (info_len * total_grids);
207 | 
208 |         int class_id;
209 |         float max_cls_logit = -CUDART_INF_F;  // minus infinity
210 |         for (int i = 5; i < info_len; ++i) {
211 |             float l = cur_input[idx + i * total_grids];
212 |             if (l > max_cls_logit) {
213 |                 max_cls_logit = l;
214 |                 class_id = i - 5;
215 |             }
216 |         }
217 |         float max_cls_prob = sigmoidGPU(max_cls_logit);
218 |         float box_prob = sigmoidGPU(cur_input[idx + 4 * total_grids]);
219 |         //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH)
220 |         //    return;
221 | 
222 |         int row = idx / yolo_width;
223 |         int col = idx % yolo_width;
224 | 
225 |         det->bbox[0] = (col + scale_sigmoidGPU(cur_input[idx + 0 * total_grids], scale_x_y)) / yolo_width;   // [0, 1]
226 |         det->bbox[1] = (row + scale_sigmoidGPU(cur_input[idx + 1 * total_grids], scale_x_y)) / yolo_height;  // [0, 1]
227 |         det->bbox[2] = __expf(cur_input[idx + 2 * total_grids]) * anchors[2 * anchor_idx] / input_w;         // [0, 1]
228 |         det->bbox[3] = __expf(cur_input[idx + 3 * total_grids]) * anchors[2 * anchor_idx + 1] / input_h;     // [0, 1]
229 | 
230 |         det->bbox[0] -= det->bbox[2] / 2;  // shift from center to top-left
231 |         det->bbox[1] -= det->bbox[3] / 2;
232 | 
233 |         det->det_confidence = box_prob;
234 |         det->class_id = class_id;
235 |         det->class_confidence = max_cls_prob;
236 |     }
237 | 
238 |     void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize)
239 |     {
240 |         int num_elements = batchSize * mNumAnchors * mYoloWidth * mYoloHeight;
241 | 
242 |         //CHECK(cudaMemset(output, 0, num_elements * sizeof(Detection)));
243 | 
244 |         CalDetection<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount>>>
245 |             (inputs[0], output, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY);
246 |     }
247 | 
248 |     int YoloLayerPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)
249 |     {
250 |         forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize);
251 |         return 0;
252 |     }
253 | 
254 |     YoloPluginCreator::YoloPluginCreator()
255 |     {
256 |         mPluginAttributes.clear();
257 | 
258 |         mFC.nbFields = mPluginAttributes.size();
259 |         mFC.fields = mPluginAttributes.data();
260 |     }
261 | 
262 |     const char* YoloPluginCreator::getPluginName() const
263 |     {
264 |         return "YoloLayer_TRT";
265 |     }
266 | 
267 |     const char* YoloPluginCreator::getPluginVersion() const
268 |     {
269 |         return "1";
270 |     }
271 | 
272 |     const PluginFieldCollection* YoloPluginCreator::getFieldNames()
273 |     {
274 |         return &mFC;
275 |     }
276 | 
277 |     IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
278 |     {
279 |         assert(!strcmp(name, getPluginName()));
280 |         const PluginField* fields = fc->fields;
281 |         int yolo_width, yolo_height, num_anchors = 0;
282 |         float anchors[MAX_ANCHORS * 2];
283 |         int num_classes;
284 |         int input_width, input_height;
285 |         float scale_x_y = 1.0;
286 | 
287 |         for (int i = 0; i < fc->nbFields; ++i)
288 |         {
289 |             const char* attrName = fields[i].name;
290 |             if (!strcmp(attrName, "yoloWidth"))
291 |             {
292 |                 assert(fields[i].type == PluginFieldType::kINT32);
293 |                 yolo_width = *(static_cast<const int*>(fields[i].data));
294 |             }
295 |             else if (!strcmp(attrName, "yoloHeight"))
296 |             {
297 |                 assert(fields[i].type == PluginFieldType::kINT32);
298 |                 yolo_height = *(static_cast<const int*>(fields[i].data));
299 |             }
300 |             else if (!strcmp(attrName, "numAnchors"))
301 |             {
302 |                 assert(fields[i].type == PluginFieldType::kINT32);
303 |                 num_anchors = *(static_cast<const int*>(fields[i].data));
304 |             }
305 |             else if (!strcmp(attrName, "numClasses"))
306 |             {
307 |                 assert(fields[i].type == PluginFieldType::kINT32);
308 |                 num_classes = *(static_cast<const int*>(fields[i].data));
309 |             }
310 |             else if (!strcmp(attrName, "inputWidth"))
311 |             {
312 |                 assert(fields[i].type == PluginFieldType::kINT32);
313 |                 input_width = *(static_cast<const int*>(fields[i].data));
314 |             }
315 |             else if (!strcmp(attrName, "inputHeight"))
316 |             {
317 |                 assert(fields[i].type == PluginFieldType::kINT32);
318 |                 input_height = *(static_cast<const int*>(fields[i].data));
319 |             }
320 |             else if (!strcmp(attrName, "anchors")){
321 |                 assert(num_anchors > 0 && num_anchors <= MAX_ANCHORS);
322 |                 assert(fields[i].type == PluginFieldType::kFLOAT32);
323 |                 memcpy(anchors, static_cast<const float*>(fields[i].data), num_anchors * 2 * sizeof(float));
324 |             }
325 |             else if (!strcmp(attrName, "scaleXY"))
326 |             {
327 |                 assert(fields[i].type == PluginFieldType::kFLOAT32);
328 |                 scale_x_y = *(static_cast<const float*>(fields[i].data));
329 |             }
330 |         }
331 |         assert(yolo_width > 0 && yolo_height > 0);
332 |         assert(anchors[0] > 0.0f && anchors[1] > 0.0f);
333 |         assert(num_classes > 0);
334 |         assert(input_width > 0 && input_height > 0);
335 |         assert(scale_x_y >= 1.0);
336 | 
337 |         YoloLayerPlugin* obj = new YoloLayerPlugin(yolo_width, yolo_height, num_anchors, anchors, num_classes, input_width, input_height, scale_x_y);
338 |         obj->setPluginNamespace(mNamespace.c_str());
339 |         return obj;
340 |     }
341 | 
342 |     IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
343 |     {
344 |         YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
345 |         obj->setPluginNamespace(mNamespace.c_str());
346 |         return obj;
347 |     }
348 | 
349 |     PluginFieldCollection YoloPluginCreator::mFC{};
350 |     std::vector<PluginField> YoloPluginCreator::mPluginAttributes;
351 |     REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
352 | } // namespace nvinfer1
353 | 


--------------------------------------------------------------------------------
/plugins/yolo_layer.h:
--------------------------------------------------------------------------------
  1 | #ifndef _YOLO_LAYER_H
  2 | #define _YOLO_LAYER_H
  3 | 
  4 | #include <cassert>
  5 | #include <vector>
  6 | #include <string>
  7 | #include <iostream>
  8 | #include "math_constants.h"
  9 | #include "NvInfer.h"
 10 | 
 11 | #define MAX_ANCHORS 6
 12 | 
 13 | #define CHECK(status)                                           \
 14 |     do {                                                        \
 15 |         auto ret = status;                                      \
 16 |         if (ret != 0) {                                         \
 17 |             std::cerr << "Cuda failure in file '" << __FILE__   \
 18 |                       << "' line " << __LINE__                  \
 19 |                       << ": " << ret << std::endl;              \
 20 |             abort();                                            \
 21 |         }                                                       \
 22 |     } while (0)
 23 | 
 24 | namespace Yolo
 25 | {
 26 |     static constexpr float IGNORE_THRESH = 0.01f;
 27 | 
 28 |     struct alignas(float) Detection {
 29 |         float bbox[4];  // x, y, w, h
 30 |         float det_confidence;
 31 |         float class_id;
 32 |         float class_confidence;
 33 |     };
 34 | }
 35 | 
 36 | namespace nvinfer1
 37 | {
 38 |     class YoloLayerPlugin: public IPluginV2IOExt
 39 |     {
 40 |         public:
 41 |             YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y);
 42 |             YoloLayerPlugin(const void* data, size_t length);
 43 | 
 44 |             ~YoloLayerPlugin() override = default;
 45 | 
 46 |             int getNbOutputs() const override
 47 |             {
 48 |                 return 1;
 49 |             }
 50 | 
 51 |             Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
 52 | 
 53 |             int initialize() override;
 54 | 
 55 |             void terminate() override;
 56 | 
 57 |             virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
 58 | 
 59 |             virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
 60 | 
 61 |             virtual size_t getSerializationSize() const override;
 62 | 
 63 |             virtual void serialize(void* buffer) const override;
 64 | 
 65 |             bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
 66 |                 return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
 67 |             }
 68 | 
 69 |             const char* getPluginType() const override;
 70 | 
 71 |             const char* getPluginVersion() const override;
 72 | 
 73 |             void destroy() override;
 74 | 
 75 |             IPluginV2IOExt* clone() const override;
 76 | 
 77 |             void setPluginNamespace(const char* pluginNamespace) override;
 78 | 
 79 |             const char* getPluginNamespace() const override;
 80 | 
 81 |             DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
 82 | 
 83 |             bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
 84 | 
 85 |             bool canBroadcastInputAcrossBatch(int inputIndex) const override;
 86 | 
 87 |             void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
 88 | 
 89 |             void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override TRTNOEXCEPT;
 90 | 
 91 |             void detachFromContext() override;
 92 | 
 93 |         private:
 94 |             void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize = 1);
 95 | 
 96 |             int mThreadCount = 64;
 97 |             int mYoloWidth, mYoloHeight, mNumAnchors;
 98 |             float mAnchorsHost[MAX_ANCHORS * 2];
 99 |             float *mAnchors;  // allocated on GPU
100 |             int mNumClasses;
101 |             int mInputWidth, mInputHeight;
102 |             float mScaleXY;
103 | 
104 |             const char* mPluginNamespace;
105 |     };
106 | 
107 |     class YoloPluginCreator : public IPluginCreator
108 |     {
109 |         public:
110 |             YoloPluginCreator();
111 | 
112 |             ~YoloPluginCreator() override = default;
113 | 
114 |             const char* getPluginName() const override;
115 | 
116 |             const char* getPluginVersion() const override;
117 | 
118 |             const PluginFieldCollection* getFieldNames() override;
119 | 
120 |             IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
121 | 
122 |             IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
123 | 
124 |             void setPluginNamespace(const char* libNamespace) override
125 |             {
126 |                 mNamespace = libNamespace;
127 |             }
128 | 
129 |             const char* getPluginNamespace() const override
130 |             {
131 |                 return mNamespace.c_str();
132 |             }
133 | 
134 |         private:
135 |             static PluginFieldCollection mFC;
136 |             static std::vector<nvinfer1::PluginField> mPluginAttributes;
137 |             std::string mNamespace;
138 |     };
139 | };
140 | 
141 | #endif
142 | 


--------------------------------------------------------------------------------
/plugins/yolo_layer.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/plugins/yolo_layer.o


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | ## ! DO NOT MANUALLY INVOKE THIS setup.py, USE CATKIN INSTEAD
 2 | 
 3 | from distutils.core import setup
 4 | from catkin_pkg.python_setup import generate_distutils_setup
 5 | 
 6 | # fetch values from package.xml
 7 | setup_args = generate_distutils_setup(
 8 |     packages=["yolov4_trt"], package_dir={"": "src"}
 9 | )
10 | 
11 | setup(**setup_args)
12 | 


--------------------------------------------------------------------------------
/src/image_converter.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #include "image_converter.h"
 24 | 
 25 | #include <jetson-utils/cudaColorspace.h>
 26 | #include <jetson-utils/cudaMappedMemory.h>
 27 | 
 28 | 
 29 | 
 30 | static imageFormat imageFormatFromEncoding( const std::string& encoding )
 31 | {
 32 | 	if( encoding == sensor_msgs::image_encodings::BGR8 )
 33 | 		return IMAGE_BGR8;
 34 | 	else if( encoding == sensor_msgs::image_encodings::BGRA8 )
 35 | 		return IMAGE_BGRA8;
 36 | 	else if( encoding == sensor_msgs::image_encodings::RGB8 )
 37 | 		return IMAGE_RGB8;
 38 | 	else if( encoding == sensor_msgs::image_encodings::RGBA8 )
 39 | 		return IMAGE_RGBA8;
 40 | 	else if( encoding == sensor_msgs::image_encodings::MONO8 )
 41 | 		return IMAGE_GRAY8;
 42 | 	else if( encoding == sensor_msgs::image_encodings::YUV422 )
 43 | 		return IMAGE_UYVY;
 44 | 	else if( encoding == sensor_msgs::image_encodings::BAYER_RGGB8 )
 45 | 		return IMAGE_BAYER_RGGB;
 46 | 	else if( encoding == sensor_msgs::image_encodings::BAYER_BGGR8 )
 47 | 		return IMAGE_BAYER_BGGR;
 48 | 	else if( encoding == sensor_msgs::image_encodings::BAYER_GBRG8 )
 49 | 		return IMAGE_BAYER_GBRG;
 50 | 	else if( encoding == sensor_msgs::image_encodings::BAYER_GRBG8 )
 51 | 		return IMAGE_BAYER_GRBG;
 52 | 
 53 | 	return IMAGE_UNKNOWN;
 54 | }
 55 | 
 56 | static std::string imageFormatToEncoding( imageFormat fmt )
 57 | {
 58 | 	switch(fmt)
 59 | 	{
 60 | 		case IMAGE_BGR8:		return sensor_msgs::image_encodings::BGR8;
 61 | 		case IMAGE_BGRA8:		return sensor_msgs::image_encodings::BGRA8;
 62 | 		case IMAGE_RGB8:		return sensor_msgs::image_encodings::RGB8;
 63 | 		case IMAGE_RGBA8:		return sensor_msgs::image_encodings::RGBA8;
 64 | 		case IMAGE_GRAY8:		return sensor_msgs::image_encodings::MONO8;
 65 | 		case IMAGE_UYVY:		return sensor_msgs::image_encodings::YUV422;
 66 | 		case IMAGE_BAYER_RGGB:	return sensor_msgs::image_encodings::BAYER_RGGB8;
 67 | 		case IMAGE_BAYER_BGGR:	return sensor_msgs::image_encodings::BAYER_BGGR8;
 68 | 		case IMAGE_BAYER_GBRG:	return sensor_msgs::image_encodings::BAYER_GBRG8;
 69 | 		case IMAGE_BAYER_GRBG:	return sensor_msgs::image_encodings::BAYER_GRBG8;
 70 | 	}
 71 | 
 72 | 	return "invalid";
 73 | }
 74 | 
 75 | 
 76 | // constructor
 77 | imageConverter::imageConverter()
 78 | {
 79 | 	mWidth  	  = 0;
 80 | 	mHeight 	  = 0;
 81 | 	mSizeInput  = 0;
 82 | 	mSizeOutput = 0;
 83 | 
 84 | 	mInputCPU = NULL;
 85 | 	mInputGPU = NULL;
 86 | 
 87 | 	mOutputCPU = NULL;
 88 | 	mOutputGPU = NULL;
 89 | }
 90 | 
 91 | 
 92 | // destructor
 93 | imageConverter::~imageConverter()
 94 | {
 95 | 	Free();	
 96 | }
 97 | 
 98 | 
 99 | // Free
100 | void imageConverter::Free()
101 | {
102 | 	if( mInputCPU != NULL )
103 | 	{
104 | 		CUDA(cudaFreeHost(mInputCPU));
105 | 
106 | 		mInputCPU = NULL;
107 | 		mInputGPU = NULL;
108 | 	}
109 | 
110 | 	if( mOutputCPU != NULL )
111 | 	{
112 | 		CUDA(cudaFreeHost(mOutputCPU));
113 | 
114 | 		mOutputCPU = NULL;
115 | 		mOutputGPU = NULL;
116 | 	}
117 | }
118 | 
119 | // Convert
120 | bool imageConverter::Convert( const sensor_msgs::ImageConstPtr& input )
121 | {
122 | 	ROS_DEBUG("converting %ux%u %s image", input->width, input->height, input->encoding.c_str());
123 | 
124 | 	// parse the input format
125 | 	const imageFormat input_format = imageFormatFromEncoding(input->encoding);
126 | 
127 | 	if( input_format == IMAGE_UNKNOWN )
128 | 	{
129 | 		ROS_ERROR("image encoding %s is not a compatible format to use with ros_deep_learning", input->encoding.c_str());
130 | 		return false;
131 | 	}
132 | 
133 | 	// assure memory allocation
134 | 	if( !Resize(input->width, input->height, input_format) )
135 | 		return false;
136 | 	
137 | 	// copy input to shared memory
138 | 	memcpy(mInputCPU, input->data.data(), imageFormatSize(input_format, input->width, input->height));			
139 | 	
140 | 	// convert image format
141 | 	if( CUDA_FAILED(cudaConvertColor(mInputGPU, input_format, mOutputGPU, InternalFormat, input->width, input->height)) )
142 | 	{
143 | 		ROS_ERROR("failed to convert %ux%u image (from %s to %s) with CUDA", mWidth, mHeight, imageFormatToStr(input_format), imageFormatToStr(InternalFormat));
144 | 		return false;
145 | 	}
146 | 
147 | 	return true;
148 | }
149 | 
150 | 
151 | // Convert
152 | bool imageConverter::Convert( sensor_msgs::Image& msg, imageFormat format )
153 | {
154 | 	return Convert(msg, format, ImageGPU());
155 | }
156 | 
157 | 
158 | // Convert
159 | bool imageConverter::Convert( sensor_msgs::Image& msg, imageFormat format, PixelType* imageGPU )
160 | {
161 | 	if( !mInputCPU || !imageGPU || mWidth == 0 || mHeight == 0 || mSizeInput == 0 || mSizeOutput == 0 )
162 | 		return false;
163 | 	
164 | 	// perform colorspace conversion into the desired encoding
165 | 	// in this direction, we reverse use of input/output pointers
166 | 	if( CUDA_FAILED(cudaConvertColor(imageGPU, InternalFormat, mInputGPU, format, mWidth, mHeight)) )
167 | 	{
168 | 		ROS_ERROR("failed to convert %ux%u image (from %s to %s) with CUDA", mWidth, mHeight, imageFormatToStr(InternalFormat), imageFormatToStr(format));
169 | 		return false;
170 | 	}
171 | 
172 | 	// calculate size of the msg
173 | 	const size_t msg_size = imageFormatSize(format, mWidth, mHeight);
174 | 
175 | 	// allocate msg storage
176 | 	msg.data.resize(msg_size);
177 | 
178 | 	// copy the converted image into the msg
179 | 	memcpy(msg.data.data(), mInputCPU, msg_size);
180 | 
181 | 	// populate metadata
182 | 	msg.width  = mWidth;
183 | 	msg.height = mHeight;
184 | 	msg.step   = (mWidth * imageFormatDepth(format)) / 8;
185 | 
186 | 	msg.encoding     = imageFormatToEncoding(format);
187 | 	msg.is_bigendian = false;
188 | 	
189 | 	return true;
190 | }
191 | 
192 | 
193 | // Resize
194 | bool imageConverter::Resize( uint32_t width, uint32_t height, imageFormat inputFormat )
195 | {
196 | 	const size_t input_size  = imageFormatSize(inputFormat, width, height);
197 | 	const size_t output_size = imageFormatSize(InternalFormat, width, height);
198 | 
199 | 	if( input_size != mSizeInput || output_size != mSizeOutput || mWidth != width || mHeight != height )
200 | 	{
201 | 		Free();
202 | 
203 | 		if( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputGPU, input_size) ||
204 | 		    !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputGPU, output_size) )
205 | 		{
206 | 			ROS_ERROR("failed to allocate memory for %ux%u image conversion", width, height);
207 | 			return false;
208 | 		}
209 | 
210 | 		ROS_INFO("allocated CUDA memory for %ux%u image conversion", width, height);
211 | 
212 | 		mWidth      = width;
213 | 		mHeight     = height;
214 | 		mSizeInput  = input_size;
215 | 		mSizeOutput = output_size;		
216 | 	}
217 | 
218 | 	return true;
219 | }
220 | 
221 | 
222 | 


--------------------------------------------------------------------------------
/src/image_converter.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #ifndef __ROS_DEEP_LEARNING_IMAGE_CONVERTER_H_
 24 | #define __ROS_DEEP_LEARNING_IMAGE_CONVERTER_H_
 25 | 
 26 | #include <jetson-utils/cudaUtility.h>
 27 | #include <jetson-utils/imageFormat.h>
 28 | 
 29 | #include "ros_compat.h"
 30 | 
 31 | 
 32 | /**
 33 |  * GPU image conversion
 34 |  */
 35 | class imageConverter
 36 | {
 37 | public:
 38 | 	/**
 39 | 	 * Output image pixel type
 40 | 	 */
 41 | 	typedef uchar3 PixelType;
 42 | 
 43 | 	/**
 44 | 	 * Image format used for internal CUDA processing
 45 | 	 */
 46 | 	static const imageFormat InternalFormat = IMAGE_RGB8;
 47 | 
 48 | 	/**
 49 | 	 * Image format used for outputting ROS image messages
 50 | 	 */
 51 | 	static const imageFormat ROSOutputFormat = IMAGE_BGR8;
 52 | 
 53 | 	/**
 54 | 	 * Constructor
 55 | 	 */
 56 | 	imageConverter();
 57 | 
 58 | 	/**
 59 | 	 * Destructor
 60 | 	 */
 61 | 	~imageConverter();
 62 | 
 63 | 	/**
 64 | 	 * Free the memory
 65 | 	 */
 66 | 	void Free();
 67 | 
 68 | 	/**
 69 | 	 * Convert to 32-bit RGBA floating point
 70 | 	 */
 71 | 	bool Convert( const sensor_msgs::ImageConstPtr& input );
 72 | 
 73 | 	/**
 74 | 	 * Convert to ROS sensor_msgs::Image message
 75 | 	 */
 76 | 	bool Convert( sensor_msgs::Image& msg_out, imageFormat outputFormat );
 77 | 
 78 | 	/**
 79 | 	 * Convert to ROS sensor_msgs::Image message
 80 | 	 */
 81 | 	bool Convert( sensor_msgs::Image& msg_out, imageFormat outputFormat, PixelType* imageGPU );
 82 | 
 83 | 	/**
 84 | 	 * Resize the memory (if necessary)
 85 | 	 */
 86 | 	bool Resize( uint32_t width, uint32_t height, imageFormat inputFormat );
 87 | 
 88 | 	/**
 89 | 	 * Retrieve the converted image width
 90 | 	 */
 91 | 	inline uint32_t GetWidth() const		{ return mWidth; }
 92 | 
 93 | 	/**
 94 | 	 * Retrieve the converted image height
 95 | 	 */
 96 | 	inline uint32_t GetHeight() const		{ return mHeight; }
 97 | 
 98 | 	/**
 99 | 	 * Retrieve the GPU pointer of the converted image
100 | 	 */
101 | 	inline PixelType* ImageGPU() const		{ return mOutputGPU; }
102 | 
103 | private:
104 | 
105 | 	uint32_t mWidth;
106 | 	uint32_t mHeight;	
107 | 	size_t   mSizeInput;
108 | 	size_t   mSizeOutput;
109 | 
110 | 	void* mInputCPU;
111 | 	void* mInputGPU;
112 | 
113 | 	PixelType* mOutputCPU;
114 | 	PixelType* mOutputGPU;
115 | };
116 | 
117 | #endif
118 | 
119 | 


--------------------------------------------------------------------------------
/src/node_video_source.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #include "ros_compat.h"
 24 | #include "image_converter.h"
 25 | 
 26 | #include <jetson-utils/videoSource.h>
 27 | 
 28 | 
 29 | 
 30 | // globals	
 31 | videoSource* stream = NULL;
 32 | imageConverter* image_cvt = NULL;
 33 | Publisher<sensor_msgs::Image> image_pub = NULL;
 34 | 
 35 | 
 36 | // aquire and publish camera frame
 37 | bool aquireFrame()
 38 | {
 39 | 	imageConverter::PixelType* nextFrame = NULL;
 40 | 
 41 | 	// get the latest frame
 42 | 	if( !stream->Capture(&nextFrame, 1000) )
 43 | 	{
 44 | 		ROS_ERROR("failed to capture next frame");
 45 | 		return false;
 46 | 	}
 47 | 
 48 | 	// assure correct image size
 49 | 	if( !image_cvt->Resize(stream->GetWidth(), stream->GetHeight(), imageConverter::ROSOutputFormat) )
 50 | 	{
 51 | 		ROS_ERROR("failed to resize camera image converter");
 52 | 		return false;
 53 | 	}
 54 | 
 55 | 	// populate the message
 56 | 	sensor_msgs::Image msg;
 57 | 
 58 | 	if( !image_cvt->Convert(msg, imageConverter::ROSOutputFormat, nextFrame) )
 59 | 	{
 60 | 		ROS_ERROR("failed to convert video stream frame to sensor_msgs::Image");
 61 | 		return false;
 62 | 	}
 63 | 
 64 | 	// populate timestamp in header field
 65 | 	msg.header.stamp = ROS_TIME_NOW();
 66 | 
 67 | 	// publish the message
 68 | 	image_pub->publish(msg);
 69 | 	ROS_DEBUG("published %ux%u video frame", stream->GetWidth(), stream->GetHeight());
 70 | 	
 71 | 	return true;
 72 | }
 73 | 
 74 | 
 75 | // node main loop
 76 | int main(int argc, char **argv)
 77 | {
 78 | 	/*
 79 | 	 * create node instance
 80 | 	 */
 81 | 	ROS_CREATE_NODE("video_source");
 82 | 
 83 | 	/*
 84 | 	 * declare parameters
 85 | 	 */
 86 | 	videoOptions video_options;
 87 | 
 88 | 	std::string resource_str;
 89 | 	std::string codec_str;
 90 | 
 91 | 	int video_width = video_options.width;
 92 | 	int video_height = video_options.height;
 93 | 
 94 | 	ROS_DECLARE_PARAMETER("resource", resource_str);
 95 | 	ROS_DECLARE_PARAMETER("codec", codec_str);
 96 | 	ROS_DECLARE_PARAMETER("width", video_width);
 97 | 	ROS_DECLARE_PARAMETER("height", video_height);
 98 | 	ROS_DECLARE_PARAMETER("framerate", video_options.frameRate);
 99 | 	ROS_DECLARE_PARAMETER("loop", video_options.loop);
100 | 	
101 | 	/*
102 | 	 * retrieve parameters
103 | 	 */
104 | 	ROS_GET_PARAMETER("resource", resource_str);
105 | 	ROS_GET_PARAMETER("codec", codec_str);
106 | 	ROS_GET_PARAMETER("width", video_width);
107 | 	ROS_GET_PARAMETER("height", video_height);
108 | 	ROS_GET_PARAMETER("framerate", video_options.frameRate);
109 | 	ROS_GET_PARAMETER("loop", video_options.loop);
110 | 	
111 | 	if( resource_str.size() == 0 )
112 | 	{
113 | 		ROS_ERROR("resource param wasn't set - please set the node's resource parameter to the input device/filename/URL");
114 | 		return 0;
115 | 	}
116 | 
117 | 	if( codec_str.size() != 0 )
118 | 		video_options.codec = videoOptions::CodecFromStr(codec_str.c_str());
119 | 
120 | 	video_options.width = video_width;
121 | 	video_options.height = video_height;
122 | 
123 | 	ROS_INFO("opening video source: %s", resource_str.c_str());
124 | 
125 | 	/*
126 | 	 * open video source
127 | 	 */
128 | 	stream = videoSource::Create(resource_str.c_str(), video_options);
129 | 
130 | 	if( !stream )
131 | 	{
132 | 		ROS_ERROR("failed to open video source");
133 | 		return 0;
134 | 	}
135 | 
136 | 
137 | 	/*
138 | 	 * create image converter
139 | 	 */
140 | 	image_cvt = new imageConverter();
141 | 
142 | 	if( !image_cvt )
143 | 	{
144 | 		ROS_ERROR("failed to create imageConverter");
145 | 		return 0;
146 | 	}
147 | 
148 | 
149 | 	/*
150 | 	 * advertise publisher topics
151 | 	 */
152 | 	ROS_CREATE_PUBLISHER(sensor_msgs::Image, "raw", 2, image_pub);
153 | 
154 | 
155 | 	/*
156 | 	 * start the camera streaming
157 | 	 */
158 | 	if( !stream->Open() )
159 | 	{
160 | 		ROS_ERROR("failed to start streaming video source");
161 | 		return 0;
162 | 	}
163 | 
164 | 
165 | 	/*
166 | 	 * start publishing video frames
167 | 	 */
168 | 	while( ROS_OK() )
169 | 	{
170 | 		if( !aquireFrame() )
171 | 		{
172 | 			if( !stream->IsStreaming() )
173 | 			{
174 | 				ROS_INFO("stream is closed or reached EOS, exiting node...");
175 | 				break;
176 | 			}
177 | 		}
178 | 
179 | 		if( ROS_OK() )
180 | 			ROS_SPIN_ONCE();
181 | 	}
182 | 
183 | 
184 | 	/*
185 | 	 * free resources
186 | 	 */
187 | 	delete stream;
188 | 	delete image_cvt;
189 | 
190 | 	return 0;
191 | }
192 | 
193 | 


--------------------------------------------------------------------------------
/src/ros_compat.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 |  * DEALINGS IN THE SOFTWARE.
21 |  */
22 | 
23 | #include "ros_compat.h"
24 | 
25 | #ifdef ROS2
26 | 
27 | std::string __node_name_;
28 | rclcpp::Clock::SharedPtr __global_clock_;
29 | 
30 | #endif
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/src/ros_compat.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 20 |  * DEALINGS IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #ifndef __ROS_DEEP_LEARNING_COMPATABILITY_H_
 24 | #define __ROS_DEEP_LEARNING_COMPATABILITY_H_
 25 | 
 26 | #ifdef ROS1
 27 | 
 28 | #include <ros/ros.h>
 29 | #include <sensor_msgs/Image.h>
 30 | #include <sensor_msgs/image_encodings.h>
 31 | #include <vision_msgs/Classification2D.h>
 32 | #include <vision_msgs/Detection2DArray.h>
 33 | #include <vision_msgs/VisionInfo.h>
 34 | 
 35 | #define ROS_CREATE_NODE(name)				\
 36 | 		ros::init(argc, argv, name); 		\
 37 |  		ros::NodeHandle nh; 			\
 38 | 		ros::NodeHandle private_nh("~");
 39 | 
 40 | template<class MessageType>
 41 | using Publisher = ros::Publisher*;
 42 | 
 43 | #define ROS_CREATE_PUBLISHER(msg, topic, queue, ptr)				ros::Publisher __publisher_##ptr = private_nh.advertise<msg>(topic, queue); ptr = &__publisher_##ptr
 44 | #define ROS_CREATE_PUBLISHER_STATUS(msg, topic, queue, callback, ptr)	ros::Publisher __publisher_##ptr = private_nh.advertise<msg>(topic, queue, [&](const ros::SingleSubscriberPublisher& connect_msg){callback();}); ptr = &__publisher_##ptr
 45 | 
 46 | #define ROS_CREATE_SUBSCRIBER(msg, topic, queue, callback)			private_nh.subscribe(topic, queue, callback)
 47 | #define ROS_SUBSCRIBER_TOPIC(subscriber)						subscriber.getTopic()
 48 | 
 49 | #define ROS_NUM_SUBSCRIBERS(publisher)							publisher->getNumSubscribers()
 50 | #define ROS_GET_NAMESPACE()									private_nh.getNamespace()
 51 | #define ROS_GET_PARAMETER(name, val)							private_nh.getParam(name, val)
 52 | #define ROS_GET_PARAMETER_OR(name, val, alt)						private_nh.param(name, val, alt)
 53 | #define ROS_SET_PARAMETER(name, val)							private_nh.setParam(name, val)
 54 | 
 55 | template<typename T> static void __ros_declare_parameter( ros::NodeHandle& nh, const std::string& name, const T& default_value )
 56 | {
 57 | 	if( !nh.hasParam(name) )
 58 | 		nh.setParam(name, default_value);
 59 | }
 60 | 
 61 | #define ROS_DECLARE_PARAMETER(name, default_value)				__ros_declare_parameter(private_nh, name, default_value)
 62 | 
 63 | #define ROS_TIME_NOW()										ros::Time::now()
 64 | #define ROS_SPIN()											ros::spin()
 65 | #define ROS_SPIN_ONCE()										ros::spinOnce()
 66 | #define ROS_OK()											ros::ok()
 67 | #define ROS_SHUTDOWN() 										ros::shutdown()
 68 | 
 69 | #elif ROS2
 70 | 
 71 | #include <rclcpp/rclcpp.hpp>
 72 | #include <sensor_msgs/msg/image.hpp>
 73 | #include <sensor_msgs/image_encodings.hpp>
 74 | #include <vision_msgs/msg/classification2_d.hpp>
 75 | #include <vision_msgs/msg/detection2_d_array.hpp>
 76 | #include <vision_msgs/msg/vision_info.hpp>
 77 | 
 78 | namespace vision_msgs
 79 | {
 80 | 	typedef msg::Classification2D Classification2D;
 81 | 	typedef msg::Detection2D		Detection2D;
 82 | 	typedef msg::Detection2DArray Detection2DArray;
 83 | 	typedef msg::ObjectHypothesis ObjectHypothesis;
 84 | 	typedef msg::ObjectHypothesisWithPose ObjectHypothesisWithPose;
 85 | 	typedef msg::VisionInfo 		VisionInfo;
 86 | }
 87 | 
 88 | namespace sensor_msgs
 89 | {
 90 | 	typedef msg::Image Image;
 91 | 	typedef msg::Image::SharedPtr ImagePtr;
 92 | 	typedef msg::Image::ConstSharedPtr ImageConstPtr;
 93 | }
 94 | 
 95 | namespace ros = rclcpp;
 96 | 
 97 | extern std::string __node_name_;
 98 | 
 99 | #define ROS_INFO(...)	RCUTILS_LOG_INFO_NAMED(__node_name_.c_str(), __VA_ARGS__)
100 | #define ROS_DEBUG(...)	RCUTILS_LOG_DEBUG_NAMED(__node_name_.c_str(), __VA_ARGS__)
101 | #define ROS_ERROR(...)   RCUTILS_LOG_ERROR_NAMED(__node_name_.c_str(), __VA_ARGS__)
102 | 
103 | #define ROS_CREATE_NODE(name)							\
104 | 		rclcpp::init(argc, argv);					\
105 | 		auto node = rclcpp::Node::make_shared(name, "/" name); \
106 | 		__node_name_ = name; \
107 | 		__global_clock_ = std::make_shared<rclcpp::Clock>(RCL_ROS_TIME);
108 | 
109 | template<class MessageType>
110 | using Publisher = std::shared_ptr<ros::Publisher<MessageType>>;
111 | 
112 | #define ROS_CREATE_PUBLISHER(msg, topic, queue, ptr)				ptr = node->create_publisher<msg>(topic, queue)
113 | #define ROS_CREATE_PUBLISHER_STATUS(msg, topic, queue, callback, ptr)	ptr = node->create_publisher<msg>(topic, queue); rclcpp::TimerBase::SharedPtr __timer_publisher_##ptr = node->create_wall_timer(std::chrono::milliseconds(500), \
114 | 														[&](){ static int __subscribers_##ptr=0; const size_t __subscription_count=ptr->get_subscription_count(); if(__subscribers_##ptr != __subscription_count) { if(__subscription_count > __subscribers_##ptr) callback(); __subscribers_##ptr=__subscription_count; }}) 
115 | 
116 | #define ROS_CREATE_SUBSCRIBER(msg, topic, queue, callback)			node->create_subscription<msg>(topic, queue, callback)
117 | #define ROS_SUBSCRIBER_TOPIC(subscriber)						subscriber->get_topic_name();
118 | 
119 | #define ROS_NUM_SUBSCRIBERS(publisher)							publisher->get_subscription_count()
120 | #define ROS_GET_NAMESPACE()									node->get_namespace()
121 | #define ROS_GET_PARAMETER(name, val)							node->get_parameter(name, val)
122 | #define ROS_GET_PARAMETER_OR(name, val, alt)						node->get_parameter_or(name, val, alt)	// TODO set undefined params in param server
123 | #define ROS_SET_PARAMETER(name, val)							node->set_parameter(rclcpp::Parameter(name, val))
124 | 
125 | #define ROS_DECLARE_PARAMETER(name, default_value)				node->declare_parameter(name, default_value)
126 | 
127 | extern rclcpp::Clock::SharedPtr __global_clock_;
128 | 
129 | #define ROS_TIME_NOW()										__global_clock_->now()
130 | #define ROS_SPIN()											rclcpp::spin(node)
131 | #define ROS_SPIN_ONCE()										rclcpp::spin_some(node)
132 | #define ROS_OK()											rclcpp::ok()
133 | #define ROS_SHUTDOWN() 										rclcpp::shutdown()
134 | 
135 | #endif
136 | #endif
137 | 
138 | 


--------------------------------------------------------------------------------
/trt_yolo_v3.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import time
  5 | 
  6 | import cv2
  7 | import pycuda.autoinit  # For initializing CUDA driver
  8 | import pycuda.driver as cuda
  9 | 
 10 | from utils.yolo_classes import get_cls_dict
 11 | from utils.display import open_window, set_display, show_fps
 12 | from utils.visualization import BBoxVisualization
 13 | from utils.yolo_with_plugins import TrtYOLO
 14 | 
 15 | import rospy
 16 | import rospkg
 17 | from yolov4_trt_ros.msg import Detector2DArray
 18 | from yolov4_trt_ros.msg import Detector2D
 19 | from vision_msgs.msg import BoundingBox2D
 20 | from sensor_msgs.msg import Image
 21 | from cv_bridge import CvBridge, CvBridgeError
 22 | 
 23 | 
 24 | class yolov4(object):
 25 |     def __init__(self):
 26 |         """ Constructor """
 27 | 
 28 |         self.bridge = CvBridge()
 29 |         self.init_params()
 30 |         self.init_yolo()
 31 |         self.cuda_ctx = cuda.Device(0).make_context()
 32 |         self.trt_yolo = TrtYOLO(
 33 |             (self.model_path + self.model), (self.h, self.w), self.category_num)
 34 | 
 35 |     def __del__(self):
 36 |         """ Destructor """
 37 | 
 38 |         self.cuda_ctx.pop()
 39 |         del self.trt_yolo
 40 |         del self.cuda_ctx
 41 | 
 42 |     def clean_up(self):
 43 |         """ Backup destructor: Release cuda memory """
 44 | 
 45 |         if self.trt_yolo is not None:
 46 |             self.cuda_ctx.pop()
 47 |             del self.trt_yolo
 48 |             del self.cuda_ctx
 49 | 
 50 |     def init_params(self):
 51 |         """ Initializes ros parameters """
 52 |         
 53 |         rospack = rospkg.RosPack()
 54 |         package_path = rospack.get_path("yolov4_trt_ros")
 55 |         self.video_topic = rospy.get_param("/video_topic", "/video_source/raw")
 56 |         self.model = rospy.get_param("/model", "yolov3")
 57 |         self.model_path = rospy.get_param(
 58 |             "/model_path", package_path + "/yolo/")
 59 |         self.input_shape = rospy.get_param("/input_shape", "416")
 60 |         self.category_num = rospy.get_param("/category_number", 80)
 61 |         self.conf_th = rospy.get_param("/confidence_threshold", 0.5)
 62 |         self.show_img = rospy.get_param("/show_image", True)
 63 |         self.image_sub = rospy.Subscriber(
 64 |             self.video_topic, Image, self.img_callback, queue_size=1, buff_size=1920*1080*3)
 65 |         self.detection_pub = rospy.Publisher(
 66 |             "detections", Detector2DArray, queue_size=1)
 67 |         self.overlay_pub = rospy.Publisher(
 68 |             "/result/overlay", Image, queue_size=1)
 69 | 
 70 |     def init_yolo(self):
 71 |         """ Initialises yolo parameters required for trt engine """
 72 | 
 73 |         if self.model.find('-') == -1:
 74 |             self.model = self.model + "-" + self.input_shape
 75 |             
 76 |         yolo_dim = self.model.split('-')[-1]
 77 | 
 78 |         if 'x' in yolo_dim:
 79 |             dim_split = yolo_dim.split('x')
 80 |             if len(dim_split) != 2:
 81 |                 raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
 82 |             self.w, self.h = int(dim_split[0]), int(dim_split[1])
 83 |         else:
 84 |             self.h = self.w = int(yolo_dim)
 85 |         if self.h % 32 != 0 or self.w % 32 != 0:
 86 |             raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
 87 | 
 88 |         cls_dict = get_cls_dict(self.category_num)
 89 |         self.vis = BBoxVisualization(cls_dict)
 90 | 
 91 | 
 92 |     def img_callback(self, ros_img):
 93 |         """Continuously capture images from camera and do object detection """
 94 | 
 95 |         tic = time.time()
 96 | 
 97 |         # converts from ros_img to cv_img for processing
 98 |         try:
 99 |             cv_img = self.bridge.imgmsg_to_cv2(
100 |                 ros_img, desired_encoding="bgr8")
101 |             rospy.logdebug("ROS Image converted for processing")
102 |         except CvBridgeError as e:
103 |             rospy.loginfo("Failed to convert image %s", str(e))
104 | 
105 |         if cv_img is not None:
106 |             boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th)
107 | 
108 |             cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
109 |             toc = time.time()
110 |             fps = 1.0 / (toc - tic)
111 | 
112 |             self.publisher(boxes, confs, clss)
113 | 
114 |             if self.show_img:
115 |                 cv_img = show_fps(cv_img, fps)
116 |                 cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img)
117 |                 cv2.waitKey(1)
118 | 
119 |         # converts back to ros_img type for publishing
120 |         try:
121 |             overlay_img = self.bridge.cv2_to_imgmsg(
122 |                 cv_img, encoding="passthrough")
123 |             rospy.logdebug("CV Image converted for publishing")
124 |             self.overlay_pub.publish(overlay_img)
125 |         except CvBridgeError as e:
126 |             rospy.loginfo("Failed to convert image %s", str(e))
127 | 
128 |     def publisher(self, boxes, confs, clss):
129 |         """ Publishes to detector_msgs
130 | 
131 |         Parameters:
132 |         boxes (List(List(int))) : Bounding boxes of all objects
133 |         confs (List(double))	: Probability scores of all objects
134 |         clss  (List(int))	: Class ID of all classes
135 |         """
136 |         detection2d = Detector2DArray()
137 |         detection = Detector2D()
138 |         detection2d.header.stamp = rospy.Time.now()
139 |         
140 |         for i in range(len(boxes)):
141 |             # boxes : xmin, ymin, xmax, ymax
142 |             for _ in boxes:
143 |                 detection.header.stamp = rospy.Time.now()
144 |                 detection.header.frame_id = "camera" # change accordingly
145 |                 detection.results.id = clss[i]
146 |                 detection.results.score = confs[i]
147 | 
148 |                 detection.bbox.center.x = boxes[i][0] + (boxes[i][2] - boxes[i][0])/2
149 |                 detection.bbox.center.y = boxes[i][1] + (boxes[i][3] - boxes[i][1])/2
150 |                 detection.bbox.center.theta = 0.0  # change if required
151 | 
152 |                 detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2])
153 |                 detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3])
154 | 
155 |             detection2d.detections.append(detection)
156 |         
157 |         self.detection_pub.publish(detection2d)
158 | 
159 | 
160 | def main():
161 |     yolo = yolov4()
162 |     rospy.init_node('yolov4_detection', anonymous=True)
163 |     try:
164 |         rospy.spin()
165 |     except KeyboardInterrupt:
166 |         rospy.on_shutdown(yolo.clean_up())
167 |         print("Shutting down")
168 | 
169 | 
170 | if __name__ == '__main__':
171 |     main()
172 | 


--------------------------------------------------------------------------------
/trt_yolo_v4.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import time
  5 | 
  6 | import cv2
  7 | import pycuda.autoinit  # For initializing CUDA driver
  8 | import pycuda.driver as cuda
  9 | 
 10 | from utils.yolo_classes import get_cls_dict
 11 | from utils.display import open_window, set_display, show_fps
 12 | from utils.visualization import BBoxVisualization
 13 | from utils.yolo_with_plugins import TrtYOLO
 14 | 
 15 | import rospy
 16 | import rospkg
 17 | from yolov4_trt_ros.msg import Detector2DArray
 18 | from yolov4_trt_ros.msg import Detector2D
 19 | from vision_msgs.msg import BoundingBox2D
 20 | from sensor_msgs.msg import Image
 21 | from cv_bridge import CvBridge, CvBridgeError
 22 | 
 23 | 
 24 | class yolov4(object):
 25 |     def __init__(self):
 26 |         """ Constructor """
 27 | 
 28 |         self.bridge = CvBridge()
 29 |         self.init_params()
 30 |         self.init_yolo()
 31 |         self.cuda_ctx = cuda.Device(0).make_context()
 32 |         self.trt_yolo = TrtYOLO(
 33 |             (self.model_path + self.model), (self.h, self.w), self.category_num)
 34 | 
 35 |     def __del__(self):
 36 |         """ Destructor """
 37 | 
 38 |         self.cuda_ctx.pop()
 39 |         del self.trt_yolo
 40 |         del self.cuda_ctx
 41 | 
 42 |     def clean_up(self):
 43 |         """ Backup destructor: Release cuda memory """
 44 | 
 45 |         if self.trt_yolo is not None:
 46 |             self.cuda_ctx.pop()
 47 |             del self.trt_yolo
 48 |             del self.cuda_ctx
 49 | 
 50 |     def init_params(self):
 51 |         """ Initializes ros parameters """
 52 |         
 53 |         rospack = rospkg.RosPack()
 54 |         package_path = rospack.get_path("yolov4_trt_ros")
 55 |         self.video_topic = rospy.get_param("/video_topic", "/video_source/raw")
 56 |         self.model = rospy.get_param("/model", "yolov4Custom")
 57 |         self.model_path = rospy.get_param(
 58 |             "/model_path", package_path + "/yolo/")
 59 |         self.category_num = rospy.get_param("/category_number", 10)
 60 |         self.input_shape = rospy.get_param("/input_shape", "416")
 61 |         self.conf_th = rospy.get_param("/confidence_threshold", 0.5)
 62 |         self.show_img = rospy.get_param("/show_image", True)
 63 |         self.image_sub = rospy.Subscriber(
 64 |             self.video_topic, Image, self.img_callback, queue_size=1, buff_size=1920*1080*3)
 65 |         self.detection_pub = rospy.Publisher(
 66 |             "detections", Detector2DArray, queue_size=1)
 67 |         self.overlay_pub = rospy.Publisher(
 68 |             "/result/overlay", Image, queue_size=1)
 69 | 
 70 |     def init_yolo(self):
 71 |         """ Initialises yolo parameters required for trt engine """
 72 | 
 73 |         if self.model.find('-') == -1:
 74 |             self.model = self.model + "-" + self.input_shape
 75 |             
 76 |         yolo_dim = self.model.split('-')[-1]
 77 | 
 78 |         if 'x' in yolo_dim:
 79 |             dim_split = yolo_dim.split('x')
 80 |             if len(dim_split) != 2:
 81 |                 raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
 82 |             self.w, self.h = int(dim_split[0]), int(dim_split[1])
 83 |         else:
 84 |             self.h = self.w = int(yolo_dim)
 85 |         if self.h % 32 != 0 or self.w % 32 != 0:
 86 |             raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
 87 | 
 88 |         cls_dict = get_cls_dict(self.category_num)
 89 |         self.vis = BBoxVisualization(cls_dict)
 90 | 
 91 | 
 92 |     def img_callback(self, ros_img):
 93 |         """Continuously capture images from camera and do object detection """
 94 | 
 95 |         tic = time.time()
 96 | 
 97 |         # converts from ros_img to cv_img for processing
 98 |         try:
 99 |             cv_img = self.bridge.imgmsg_to_cv2(
100 |                 ros_img, desired_encoding="bgr8")
101 |             rospy.logdebug("ROS Image converted for processing")
102 |         except CvBridgeError as e:
103 |             rospy.loginfo("Failed to convert image %s", str(e))
104 | 
105 |         if cv_img is not None:
106 |             boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th)
107 | 
108 |             cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
109 |             toc = time.time()
110 |             fps = 1.0 / (toc - tic)
111 | 
112 |             self.publisher(boxes, confs, clss)
113 | 
114 |             if self.show_img:
115 |                 cv_img = show_fps(cv_img, fps)
116 |                 cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img)
117 |                 cv2.waitKey(1)
118 | 
119 |         # converts back to ros_img type for publishing
120 |         try:
121 |             overlay_img = self.bridge.cv2_to_imgmsg(
122 |                 cv_img, encoding="passthrough")
123 |             rospy.logdebug("CV Image converted for publishing")
124 |             self.overlay_pub.publish(overlay_img)
125 |         except CvBridgeError as e:
126 |             rospy.loginfo("Failed to convert image %s", str(e))
127 | 
128 |     def publisher(self, boxes, confs, clss):
129 |         """ Publishes to detector_msgs
130 | 
131 |         Parameters:
132 |         boxes (List(List(int))) : Bounding boxes of all objects
133 |         confs (List(double))	: Probability scores of all objects
134 |         clss  (List(int))	: Class ID of all classes
135 |         """
136 |         detection2d = Detector2DArray()
137 |         detection = Detector2D()
138 |         detection2d.header.stamp = rospy.Time.now()
139 | 	detection2d.header.frame_id = "camera" # change accordingly
140 |         
141 |         for i in range(len(boxes)):
142 |             # boxes : xmin, ymin, xmax, ymax
143 |             for _ in boxes:
144 | 		detection.header.stamp = rospy.Time.now()
145 |                 detection.header.frame_id = "camera" # change accordingly
146 |                 detection.results.id = clss[i]
147 |                 detection.results.score = confs[i]
148 | 
149 |                 detection.bbox.center.x = boxes[i][0] + (boxes[i][2] - boxes[i][0])/2
150 |                 detection.bbox.center.y = boxes[i][1] + (boxes[i][3] - boxes[i][1])/2
151 |                 detection.bbox.center.theta = 0.0  # change if required
152 | 
153 |                 detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2])
154 |                 detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3])
155 | 
156 |             detection2d.detections.append(detection)
157 |         
158 |         self.detection_pub.publish(detection2d)
159 | 
160 | 
161 | def main():
162 |     yolo = yolov4()
163 |     rospy.init_node('yolov4_trt_ros', anonymous=True)
164 |     try:
165 |         rospy.spin()
166 |     except KeyboardInterrupt:
167 |         rospy.on_shutdown(yolo.clean_up())
168 |         print("Shutting down")
169 | 
170 | 
171 | if __name__ == '__main__':
172 |     main()
173 | 


--------------------------------------------------------------------------------
/trt_yolo_v4_batch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import time
  5 | 
  6 | import cv2
  7 | import pycuda.autoinit  # For initializing CUDA driver
  8 | import pycuda.driver as cuda
  9 | 
 10 | from utils.yolo_classes import get_cls_dict
 11 | from utils.display import open_window, set_display, show_fps
 12 | from utils.visualization import BBoxVisualization
 13 | from utils.yolo_with_plugins_batch import TrtYOLO
 14 | 
 15 | import rospy
 16 | import rospkg
 17 | from yolov4_trt_ros.msg import Detector2DArray
 18 | from yolov4_trt_ros.msg import Detector2D
 19 | from vision_msgs.msg import BoundingBox2D
 20 | from sensor_msgs.msg import Image
 21 | from cv_bridge import CvBridge, CvBridgeError
 22 | 
 23 | 
 24 | class yolov4(object):
 25 |     def __init__(self):
 26 |         """ Constructor """
 27 | 
 28 |         self.bridge = CvBridge()
 29 |         self.init_params()
 30 |         self.init_yolo()
 31 |         self.cuda_ctx = cuda.Device(0).make_context()
 32 |         self.trt_yolo = TrtYOLO(
 33 |             (self.model_path + self.model), (self.h, self.w), self.category_num)
 34 | 
 35 |     def __del__(self):
 36 |         """ Destructor """
 37 | 
 38 |         self.cuda_ctx.pop()
 39 |         del self.trt_yolo
 40 |         del self.cuda_ctx
 41 | 
 42 |     def clean_up(self):
 43 |         """ Backup destructor: Release cuda memory """
 44 | 
 45 |         if self.trt_yolo is not None:
 46 |             self.cuda_ctx.pop()
 47 |             del self.trt_yolo
 48 |             del self.cuda_ctx
 49 | 
 50 |     def init_params(self):
 51 |         """ Initializes ros parameters """
 52 | 
 53 |         rospack = rospkg.RosPack()
 54 |         package_path = rospack.get_path("yolov4_trt_ros")
 55 |         #self.video_topic1 = rospy.get_param(
 56 |         #    "/video_topic1", "/zed/zed_node/left_raw/image_raw_color")
 57 |         self.video_topic1 = rospy.get_param(
 58 |             "/video_topic1", "video_source/raw")
 59 |         self.video_topic2 = rospy.get_param(
 60 |             "/video_topic2", "/zed/zed_node/left_raw/image_raw_color")
 61 |         self.video_topic3 = rospy.get_param(
 62 |             "/video_topic3", "/zed/zed_node/right_raw/image_raw_color")
 63 |         self.video_topic4 = rospy.get_param(
 64 |             "/video_topic4", "/zed/zed_node/right_raw/image_raw_color")
 65 |         self.model = rospy.get_param("/model", "yolov4Custom")
 66 |         self.model_path = rospy.get_param(
 67 |             "/model_path", package_path + "/yolo/")
 68 |         self.category_num = rospy.get_param("/category_number", 10)
 69 |         self.input_shape = rospy.get_param("/input_shape", "416")
 70 |         self.conf_th = rospy.get_param("/confidence_threshold", 0.5)
 71 |         self.show_img = rospy.get_param("/show_image", True)
 72 |         self.image_sub1 = rospy.Subscriber(
 73 |             self.video_topic1, Image, self.img_callback1, queue_size=1, buff_size=2**26)
 74 |         self.image_sub2 = rospy.Subscriber(
 75 |             self.video_topic2, Image, self.img_callback2, queue_size=1, buff_size=2**26)
 76 |         self.image_sub3 = rospy.Subscriber(
 77 |             self.video_topic3, Image, self.img_callback3, queue_size=1, buff_size=2**26)
 78 |         self.image_sub4 = rospy.Subscriber(
 79 |             self.video_topic4, Image, self.img_callback4, queue_size=1, buff_size=2**26)
 80 |         self.detection_pub = rospy.Publisher(
 81 |             "detections", Detector2DArray, queue_size=1)
 82 |         self.overlay_pub = rospy.Publisher(
 83 |             "/result/overlay", Image, queue_size=1)
 84 | 
 85 |     def init_yolo(self):
 86 |         """ Initialises yolo parameters required for trt engine """
 87 | 
 88 |         if self.model.find('-') == -1:
 89 |             self.model = self.model + "-" + self.input_shape
 90 | 
 91 |         yolo_dim = self.model.split('-')[-1]
 92 | 
 93 |         if 'x' in yolo_dim:
 94 |             dim_split = yolo_dim.split('x')
 95 |             if len(dim_split) != 2:
 96 |                 raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
 97 |             self.w, self.h = int(dim_split[0]), int(dim_split[1])
 98 |         else:
 99 |             self.h = self.w = int(yolo_dim)
100 |         if self.h % 32 != 0 or self.w % 32 != 0:
101 |             raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
102 | 
103 |         cls_dict = get_cls_dict(self.category_num)
104 |         self.vis = BBoxVisualization(cls_dict)
105 | 
106 |     def img_callback1(self, ros_img):
107 |         """Continuously capture images from camera and do object detection """
108 | 
109 |         # converts from ros_img to cv_img for processing
110 |         try:
111 |             cv_img = self.bridge.imgmsg_to_cv2(
112 |                 ros_img, desired_encoding="bgr8")
113 |             rospy.logdebug("ROS Image converted for processing")
114 |         except CvBridgeError as e:
115 |             rospy.loginfo("Failed to convert image %s", str(e))
116 | 
117 |         tic = time.time()
118 |         boxes, confs, clss = self.trt_yolo.detect1(cv_img, self.conf_th)
119 |         cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
120 |         toc = time.time()
121 |         fps = 1.0 / (toc - tic)
122 |         print("Cam1: " + str(fps) + "\n")
123 |         self.publisher(boxes, confs, clss, "Cam1")
124 |         
125 |         if self.show_img:
126 |             cv_img = show_fps(cv_img, fps)
127 |             cv2.imshow("Cam 1", cv_img)
128 |             cv2.waitKey(1)
129 |         
130 |         # converts back to ros_img type for publishing
131 |         try:
132 |             overlay_img = self.bridge.cv2_to_imgmsg(
133 |                 cv_img, encoding="passthrough")
134 |             rospy.logdebug("CV Image converted for publishing")
135 |             self.overlay_pub.publish(overlay_img)
136 |         except CvBridgeError as e:
137 |             rospy.loginfo("Failed to convert image %s", str(e))
138 | 
139 |     def img_callback2(self, ros_img):
140 |         """Continuously capture images from camera and do object detection """
141 | 
142 |         # converts from ros_img to cv_img for processing
143 |         # time both cv_bridge fns
144 |         try:
145 |             cv_img = self.bridge.imgmsg_to_cv2(
146 |                 ros_img, desired_encoding="bgr8")
147 |             rospy.logdebug("ROS Image converted for processing")
148 |         except CvBridgeError as e:
149 |             rospy.loginfo("Failed to convert image %s", str(e))
150 | 
151 |         tic = time.time()
152 |         boxes, confs, clss = self.trt_yolo.detect2(cv_img, self.conf_th)
153 |         cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
154 |         toc = time.time()
155 |         fps = 1.0 / (toc - tic)
156 |         print("Cam2: " + str(fps) + "\n")
157 |         self.publisher(boxes, confs, clss, "Cam2")
158 | 
159 |         # time 1
160 |         """
161 |         if self.show_img:
162 |             cv_img = show_fps(cv_img, fps)
163 |             cv2.imshow("Cam 2", cv_img)
164 |             # time 2
165 |             cv2.waitKey(1)
166 |         """
167 |         
168 |         # converts back to ros_img type for publishing
169 |         try:
170 |             overlay_img = self.bridge.cv2_to_imgmsg(
171 |                 cv_img, encoding="passthrough")
172 |             rospy.logdebug("CV Image converted for publishing")
173 |             self.overlay_pub.publish(overlay_img)
174 |         except CvBridgeError as e:
175 |             rospy.loginfo("Failed to convert image %s", str(e))
176 | 
177 |     def img_callback3(self, ros_img):
178 |         """Continuously capture images from camera and do object detection """
179 | 
180 |         # converts from ros_img to cv_img for processing
181 |         try:
182 |             cv_img = self.bridge.imgmsg_to_cv2(
183 |                 ros_img, desired_encoding="bgr8")
184 |             rospy.logdebug("ROS Image converted for processing")
185 |         except CvBridgeError as e:
186 |             rospy.loginfo("Failed to convert image %s", str(e))
187 | 
188 |         tic = time.time()
189 |         boxes, confs, clss = self.trt_yolo.detect3(cv_img, self.conf_th)
190 |         cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
191 |         toc = time.time()
192 |         fps = 1.0 / (toc - tic)
193 |         print("Cam3: " + str(fps) + "\n")
194 |         self.publisher(boxes, confs, clss, "Cam3")
195 |         """
196 |             if self.show_img:
197 |                 cv_img = show_fps(cv_img, fps)
198 |                 cv2.imshow("Cam 3", cv_img)
199 |                 cv2.waitKey(1)
200 |         """
201 |         # converts back to ros_img type for publishing
202 |         try:
203 |             overlay_img = self.bridge.cv2_to_imgmsg(
204 |                 cv_img, encoding="passthrough")
205 |             rospy.logdebug("CV Image converted for publishing")
206 |             self.overlay_pub.publish(overlay_img)
207 |         except CvBridgeError as e:
208 |             rospy.loginfo("Failed to convert image %s", str(e))
209 | 
210 |     def img_callback4(self, ros_img):
211 |         """Continuously capture images from camera and do object detection """
212 | 
213 |         # converts from ros_img to cv_img for processing
214 |         try:
215 |             cv_img = self.bridge.imgmsg_to_cv2(
216 |                 ros_img, desired_encoding="bgr8")
217 |             rospy.logdebug("ROS Image converted for processing")
218 |         except CvBridgeError as e:
219 |             rospy.loginfo("Failed to convert image %s", str(e))
220 | 
221 |         tic = time.time()
222 |         boxes, confs, clss = self.trt_yolo.detect4(cv_img, self.conf_th)
223 |         cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
224 |         toc = time.time()
225 |         fps = 1.0 / (toc - tic)
226 |         print("Cam4: " + str(fps) + "\n")
227 |         self.publisher(boxes, confs, clss, "Cam4")
228 |         """
229 |             if self.show_img:
230 |                 cv_img = show_fps(cv_img, fps)
231 |                 cv2.imshow("Cam 3", cv_img)
232 |                 cv2.waitKey(1)
233 |         """
234 |         # converts back to ros_img type for publishing
235 |         try:
236 |             overlay_img = self.bridge.cv2_to_imgmsg(
237 |                 cv_img, encoding="passthrough")
238 |             rospy.logdebug("CV Image converted for publishing")
239 |             self.overlay_pub.publish(overlay_img)
240 |         except CvBridgeError as e:
241 |             rospy.loginfo("Failed to convert image %s", str(e))
242 | 
243 |     def publisher(self, boxes, confs, clss, frame):
244 |         """ Publishes to detector_msgs
245 | 
246 |         Parameters:
247 |         boxes (List(List(int))) : Bounding boxes of all objects
248 |         confs (List(double))	: Probability scores of all objects
249 |         clss  (List(int))	: Class ID of all classes
250 |         """
251 |         detection2d = Detector2DArray()
252 |         detection = Detector2D()
253 |         detection2d.header.stamp = rospy.Time.now()
254 |         detection2d.header.frame_id = "camera"  # change accordingly
255 | 
256 |         for i in range(len(boxes)):
257 |             # boxes : xmin, ymin, xmax, ymax
258 |             for _ in boxes:
259 |                 detection.header.stamp = rospy.Time.now()
260 |                 detection.header.frame_id = frame  # change accordingly
261 |                 detection.results.id = clss[i]
262 |                 detection.results.score = confs[i]
263 | 
264 |                 detection.bbox.center.x = boxes[i][0] + \
265 |                     (boxes[i][2] - boxes[i][0])/2
266 |                 detection.bbox.center.y = boxes[i][1] + \
267 |                     (boxes[i][3] - boxes[i][1])/2
268 |                 detection.bbox.center.theta = 0.0  # change if required
269 | 
270 |                 detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2])
271 |                 detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3])
272 | 
273 |             detection2d.detections.append(detection)
274 | 
275 |         self.detection_pub.publish(detection2d)
276 | 
277 | 
278 | def main():
279 |     yolo = yolov4()
280 |     rospy.init_node('yolov4_trt_ros', anonymous=True)
281 |     try:
282 |         rospy.spin()
283 |     except KeyboardInterrupt:
284 |         del yolo
285 |         rospy.on_shutdown(yolo.clean_up())
286 |         print("Shutting down")
287 | 
288 | 
289 | if __name__ == '__main__':
290 |     main()
291 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/utils/__init__.py


--------------------------------------------------------------------------------
/utils/display.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """display.py
 3 | """
 4 | 
 5 | 
 6 | import cv2
 7 | 
 8 | 
 9 | def open_window(window_name, title, width=None, height=None):
10 |     """Open the display window."""
11 |     cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
12 |     cv2.setWindowTitle(window_name, title)
13 |     if width and height:
14 |         cv2.resizeWindow(window_name, width, height)
15 | 
16 | 
17 | def show_help_text(img, help_text):
18 |     """Draw help text on image."""
19 |     cv2.putText(img, help_text, (11, 20), cv2.FONT_HERSHEY_PLAIN, 1.0,
20 |                 (32, 32, 32), 4, cv2.LINE_AA)
21 |     cv2.putText(img, help_text, (10, 20), cv2.FONT_HERSHEY_PLAIN, 1.0,
22 |                 (240, 240, 240), 1, cv2.LINE_AA)
23 |     return img
24 | 
25 | 
26 | def show_fps(img, fps):
27 |     """Draw fps number at top-left corner of the image."""
28 |     font = cv2.FONT_HERSHEY_PLAIN
29 |     line = cv2.LINE_AA
30 |     fps_text = 'FPS: {:.2f}'.format(fps)
31 |     cv2.putText(img, fps_text, (11, 20), font, 1.0, (32, 32, 32), 4, line)
32 |     cv2.putText(img, fps_text, (10, 20), font, 1.0, (240, 240, 240), 1, line)
33 |     return img
34 | 
35 | 
36 | def set_display(window_name, full_scrn):
37 |     """Set disply window to either full screen or normal."""
38 |     if full_scrn:
39 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
40 |                               cv2.WINDOW_FULLSCREEN)
41 |     else:
42 |         cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
43 |                               cv2.WINDOW_NORMAL)
44 | 


--------------------------------------------------------------------------------
/utils/visualization.py:
--------------------------------------------------------------------------------
  1 | """visualization.py
  2 | 
  3 | The BBoxVisualization class implements drawing of nice looking
  4 | bounding boxes based on object detection results.
  5 | """
  6 | 
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | 
 11 | 
 12 | # Constants
 13 | ALPHA = 0.5
 14 | FONT = cv2.FONT_HERSHEY_PLAIN
 15 | TEXT_SCALE = 1.0
 16 | TEXT_THICKNESS = 1
 17 | BLACK = (0, 0, 0)
 18 | WHITE = (255, 255, 255)
 19 | 
 20 | 
 21 | def gen_colors(num_colors):
 22 |     """Generate different colors.
 23 | 
 24 |     # Arguments
 25 |       num_colors: total number of colors/classes.
 26 | 
 27 |     # Output
 28 |       bgrs: a list of (B, G, R) tuples which correspond to each of
 29 |             the colors/classes.
 30 |     """
 31 |     import random
 32 |     import colorsys
 33 | 
 34 |     hsvs = [[float(x) / num_colors, 1., 0.7] for x in range(num_colors)]
 35 |     random.seed(1234)
 36 |     random.shuffle(hsvs)
 37 |     rgbs = list(map(lambda x: list(colorsys.hsv_to_rgb(*x)), hsvs))
 38 |     bgrs = [(int(rgb[2] * 255), int(rgb[1] * 255),  int(rgb[0] * 255))
 39 |             for rgb in rgbs]
 40 |     return bgrs
 41 | 
 42 | 
 43 | def draw_boxed_text(img, text, topleft, color):
 44 |     """Draw a transluent boxed text in white, overlayed on top of a
 45 |     colored patch surrounded by a black border. FONT, TEXT_SCALE,
 46 |     TEXT_THICKNESS and ALPHA values are constants (fixed) as defined
 47 |     on top.
 48 | 
 49 |     # Arguments
 50 |       img: the input image as a numpy array.
 51 |       text: the text to be drawn.
 52 |       topleft: XY coordinate of the topleft corner of the boxed text.
 53 |       color: color of the patch, i.e. background of the text.
 54 | 
 55 |     # Output
 56 |       img: note the original image is modified inplace.
 57 |     """
 58 |     assert img.dtype == np.uint8
 59 |     img_h, img_w, _ = img.shape
 60 |     if topleft[0] >= img_w or topleft[1] >= img_h:
 61 |         return img
 62 |     margin = 3
 63 |     size = cv2.getTextSize(text, FONT, TEXT_SCALE, TEXT_THICKNESS)
 64 |     w = size[0][0] + margin * 2
 65 |     h = size[0][1] + margin * 2
 66 |     # the patch is used to draw boxed text
 67 |     patch = np.zeros((h, w, 3), dtype=np.uint8)
 68 |     patch[...] = color
 69 |     cv2.putText(patch, text, (margin+1, h-margin-2), FONT, TEXT_SCALE,
 70 |                 WHITE, thickness=TEXT_THICKNESS, lineType=cv2.LINE_8)
 71 |     cv2.rectangle(patch, (0, 0), (w-1, h-1), BLACK, thickness=1)
 72 |     w = min(w, img_w - topleft[0])  # clip overlay at image boundary
 73 |     h = min(h, img_h - topleft[1])
 74 |     # Overlay the boxed text onto region of interest (roi) in img
 75 |     roi = img[topleft[1]:topleft[1]+h, topleft[0]:topleft[0]+w, :]
 76 |     cv2.addWeighted(patch[0:h, 0:w, :], ALPHA, roi, 1 - ALPHA, 0, roi)
 77 |     return img
 78 | 
 79 | 
 80 | class BBoxVisualization():
 81 |     """BBoxVisualization class implements nice drawing of boudning boxes.
 82 | 
 83 |     # Arguments
 84 |       cls_dict: a dictionary used to translate class id to its name.
 85 |     """
 86 | 
 87 |     def __init__(self, cls_dict):
 88 |         self.cls_dict = cls_dict
 89 |         self.colors = gen_colors(len(cls_dict))
 90 | 
 91 |     def draw_bboxes(self, img, boxes, confs, clss):
 92 |         """Draw detected bounding boxes on the original image."""
 93 |         for bb, cf, cl in zip(boxes, confs, clss):
 94 |             cl = int(cl)
 95 |             x_min, y_min, x_max, y_max = bb[0], bb[1], bb[2], bb[3]
 96 |             color = self.colors[cl]
 97 |             cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2)
 98 |             txt_loc = (max(x_min+2, 0), max(y_min+2, 0))
 99 |             cls_name = self.cls_dict.get(cl, 'CLS{}'.format(cl))
100 |             txt = '{} {:.2f}'.format(cls_name, cf)
101 |             img = draw_boxed_text(img, txt, txt_loc, color)
102 |         return img
103 | 


--------------------------------------------------------------------------------
/utils/yolo_classes.py:
--------------------------------------------------------------------------------
 1 | CLASSES_LIST = [
 2 |     'bike',
 3 |     'bus',
 4 |     'car',
 5 |     'motor',
 6 |     'person',
 7 |     'rider',
 8 |     'traffic light',
 9 |     'traffic sign',
10 |     'train',
11 |     'truck',
12 | ]
13 | 
14 | def get_cls_dict(category_num):
15 |     """Get the class ID to name translation dictionary."""
16 |     if category_num == len(CLASSES_LIST):
17 |         return {i: n for i, n in enumerate(CLASSES_LIST)}
18 |     else:
19 |         return {i: 'CLS%d' % i for i in range(category_num)}
20 | 


--------------------------------------------------------------------------------
/utils/yolo_with_plugins.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import ctypes
  4 | 
  5 | import rospkg
  6 | import numpy as np
  7 | import cv2
  8 | import tensorrt as trt
  9 | import pycuda.driver as cuda
 10 | import pycuda.autoinit
 11 | 
 12 | rospack = rospkg.RosPack()
 13 | plugin_path = rospack.get_path("yolov4_trt_ros") + "/plugins/libyolo_layer.so"
 14 | 
 15 | try:
 16 |     ctypes.cdll.LoadLibrary(plugin_path)
 17 | except OSError as e:
 18 |     raise SystemExit('ERROR: failed to load ./plugins/libyolo_layer.so.  '
 19 |                      'Did you forget to do a "make" in the "./plugins/" '
 20 |                      'subdirectory?')
 21 | 
 22 | 
 23 | def _preprocess_yolo(img, input_shape):
 24 |     """Preprocess an image before TRT YOLO inferencing.
 25 | 
 26 |     # Args
 27 |         img: int8 numpy array of shape (img_h, img_w, 3)
 28 |         input_shape: a tuple of (H, W)
 29 | 
 30 |     # Returns
 31 |         preprocessed img: float32 numpy array of shape (3, H, W)
 32 |     """
 33 |     img = cv2.resize(img, (input_shape[1], input_shape[0]))
 34 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 35 |     img = img.transpose((2, 0, 1)).astype(np.float32)
 36 |     img /= 255.0
 37 |     return img
 38 | 
 39 | 
 40 | def _nms_boxes(detections, nms_threshold):
 41 |     """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding
 42 |     boxes with their confidence scores and return an array with the
 43 |     indexes of the bounding boxes we want to keep.
 44 | 
 45 |     # Args
 46 |         detections: Nx7 numpy arrays of
 47 |                     [[x, y, w, h, box_confidence, class_id, class_prob],
 48 |                      ......]
 49 |     """
 50 |     x_coord = detections[:, 0]
 51 |     y_coord = detections[:, 1]
 52 |     width = detections[:, 2]
 53 |     height = detections[:, 3]
 54 |     box_confidences = detections[:, 4]
 55 | 
 56 |     areas = width * height
 57 |     ordered = box_confidences.argsort()[::-1]
 58 | 
 59 |     keep = list()
 60 |     while ordered.size > 0:
 61 |         # Index of the current element:
 62 |         i = ordered[0]
 63 |         keep.append(i)
 64 |         xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
 65 |         yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
 66 |         xx2 = np.minimum(x_coord[i] + width[i],
 67 |                          x_coord[ordered[1:]] + width[ordered[1:]])
 68 |         yy2 = np.minimum(y_coord[i] + height[i],
 69 |                          y_coord[ordered[1:]] + height[ordered[1:]])
 70 | 
 71 |         width1 = np.maximum(0.0, xx2 - xx1 + 1)
 72 |         height1 = np.maximum(0.0, yy2 - yy1 + 1)
 73 |         intersection = width1 * height1
 74 |         union = (areas[i] + areas[ordered[1:]] - intersection)
 75 |         iou = intersection / union
 76 |         indexes = np.where(iou <= nms_threshold)[0]
 77 |         ordered = ordered[indexes + 1]
 78 | 
 79 |     keep = np.array(keep)
 80 |     return keep
 81 | 
 82 | 
 83 | def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold=0.5):
 84 |     """Postprocess TensorRT outputs.
 85 | 
 86 |     # Args
 87 |         trt_outputs: a list of 2 or 3 tensors, where each tensor
 88 |                     contains a multiple of 7 float32 numbers in
 89 |                     the order of [x, y, w, h, box_confidence, class_id, class_prob]
 90 |         conf_th: confidence threshold
 91 | 
 92 |     # Returns
 93 |         boxes, scores, classes (after NMS)
 94 |     """
 95 |     # concatenate outputs of all yolo layers
 96 |     detections = np.concatenate(
 97 |         [o.reshape(-1, 7) for o in trt_outputs], axis=0)
 98 | 
 99 |     # drop detections with score lower than conf_th
100 |     box_scores = detections[:, 4] * detections[:, 6]
101 |     pos = np.where(box_scores >= conf_th)
102 |     detections = detections[pos]
103 | 
104 |     # scale x, y, w, h from [0, 1] to pixel values
105 |     detections[:, 0] *= img_w
106 |     detections[:, 1] *= img_h
107 |     detections[:, 2] *= img_w
108 |     detections[:, 3] *= img_h
109 | 
110 |     # NMS
111 |     nms_detections = np.zeros((0, 7), dtype=detections.dtype)
112 |     for class_id in set(detections[:, 5]):
113 |         idxs = np.where(detections[:, 5] == class_id)
114 |         cls_detections = detections[idxs]
115 |         keep = _nms_boxes(cls_detections, nms_threshold)
116 |         nms_detections = np.concatenate(
117 |             [nms_detections, cls_detections[keep]], axis=0)
118 |     if len(nms_detections) == 0:
119 |         boxes = np.zeros((0, 4), dtype=np.int)
120 |         scores = np.zeros((0, 1), dtype=np.float32)
121 |         classes = np.zeros((0, 1), dtype=np.float32)
122 |     else:
123 |         xx = nms_detections[:, 0].reshape(-1, 1)
124 |         yy = nms_detections[:, 1].reshape(-1, 1)
125 |         ww = nms_detections[:, 2].reshape(-1, 1)
126 |         hh = nms_detections[:, 3].reshape(-1, 1)
127 |         boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5
128 |         boxes = boxes.astype(np.int)
129 |         scores = nms_detections[:, 4] * nms_detections[:, 6]
130 |         classes = nms_detections[:, 5]
131 |     return boxes, scores, classes
132 | 
133 | 
134 | class HostDeviceMem(object):
135 |     """Simple helper data class that's a little nicer to use than a 2-tuple."""
136 | 
137 |     def __init__(self, host_mem, device_mem):
138 |         self.host = host_mem
139 |         self.device = device_mem
140 | 
141 |     def __str__(self):
142 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
143 | 
144 |     def __repr__(self):
145 |         return self.__str__()
146 | 
147 | 
148 | def allocate_buffers(engine, grid_sizes):
149 |     """Allocates all host/device in/out buffers required for an engine.
150 |        Checked!
151 |     """
152 | 
153 |     inputs = []
154 |     outputs = []
155 |     bindings = []
156 |     output_idx = 0
157 |     stream = cuda.Stream()
158 |     for binding in engine:
159 |         size = trt.volume(engine.get_binding_shape(binding)) * \
160 |             engine.max_batch_size
161 |         dtype = trt.nptype(engine.get_binding_dtype(binding))
162 |         # Allocate host and device buffers
163 |         host_mem = cuda.pagelocked_empty(size, dtype)
164 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
165 | 
166 |         # Append the device buffer to device bindings.
167 |         bindings.append(int(device_mem))
168 | 
169 |         # Append to the appropriate list.
170 |         if engine.binding_is_input(binding):
171 |             inputs.append(HostDeviceMem(host_mem, device_mem))
172 | 
173 |         else:
174 |             # each grid has 3 anchors, each anchor generates a detection
175 |             # output of 7 float32 values
176 |             assert size == grid_sizes[output_idx] * \
177 |                 3 * 7 * engine.max_batch_size
178 |             outputs.append(HostDeviceMem(host_mem, device_mem))
179 |             output_idx += 1
180 | 
181 |     return inputs, outputs, bindings, stream
182 | 
183 | 
184 | def get_yolo_grid_sizes(model_name, h, w):
185 |     """Get grid sizes (w*h) for all yolo layers in the model."""
186 |     if 'yolov3' in model_name:
187 |         if 'tiny' in model_name:
188 |             return [(h // 32) * (w // 32), (h // 16) * (w // 16)]
189 |         else:
190 |             return [(h // 32) * (w // 32), (h // 16) * (w // 16), (h // 8) * (w // 8)]
191 |     elif 'yolov4' in model_name:
192 |         if 'tiny' in model_name:
193 |             return [(h // 32) * (w // 32), (h // 16) * (w // 16)]
194 |         else:
195 |             return [(h // 8) * (w // 8), (h // 16) * (w // 16), (w // 32) * (h // 32)]
196 |     else:
197 |         raise ValueError('ERROR: unknown model (%s)!' % args.model)
198 | 
199 | 
200 | class TrtYOLO(object):
201 |     """TrtYOLO class encapsulates things needed to run TRT YOLO."""
202 | 
203 |     def _load_engine(self):
204 |         TRTbin = '%s.trt' % self.model
205 |         with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
206 |             return runtime.deserialize_cuda_engine(f.read())
207 | 
208 |     def __init__(self, model, input_shape, category_num=80, cuda_ctx=None):
209 |         """Initialize TensorRT plugins, engine and context."""
210 |         self.model = model
211 |         self.input_shape = input_shape
212 |         self.category_num = category_num
213 |         self.cuda_ctx = cuda.Device(0).make_context()
214 |         self.trt_logger = trt.Logger(trt.Logger.INFO)
215 |         self.engine = self._load_engine()
216 |         self.context = self.engine.create_execution_context()
217 |         grid_sizes = get_yolo_grid_sizes(
218 |             self.model, self.input_shape[0], self.input_shape[1])
219 |         self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers(
220 |             self.engine, grid_sizes)
221 | 
222 |     def __del__(self):
223 |         """Free CUDA memories."""
224 |         del self.outputs
225 |         del self.inputs
226 |         del self.stream
227 | 
228 |     def allocate_buffers(self, engine, grid_sizes):
229 |         """Allocates all host/device in/out buffers required for an engine.
230 |            Checked!
231 |         """
232 | 
233 |         inputs = []
234 |         outputs = []
235 |         bindings = []
236 |         output_idx = 0
237 |         stream = cuda.Stream()
238 | 
239 |         for binding in engine:
240 |             size = trt.volume(engine.get_binding_shape(
241 |                 binding)) * engine.max_batch_size
242 |             dtype = trt.nptype(engine.get_binding_dtype(binding))
243 |             # Allocate host and device buffers
244 |             host_mem = cuda.pagelocked_empty(size, dtype)
245 |             device_mem = cuda.mem_alloc(host_mem.nbytes)
246 | 
247 |             # Append the device buffer to device bindings.
248 |             bindings.append(int(device_mem))
249 | 
250 |             # Append to the appropriate list.
251 |             if engine.binding_is_input(binding):
252 |                 inputs.append(HostDeviceMem(host_mem, device_mem))
253 | 
254 |             else:
255 |                 # each grid has 3 anchors, each anchor generates a detection
256 |                 # output of 7 float32 values
257 |                 assert size == grid_sizes[output_idx] * \
258 |                     3 * 7 * engine.max_batch_size
259 |                 outputs.append(HostDeviceMem(host_mem, device_mem))
260 |                 output_idx += 1
261 | 
262 |         return inputs, outputs, bindings, stream
263 | 
264 |     def detect(self, img, conf_th=0.3):
265 |         """ Detect objects in the input image """
266 | 
267 |         img_resized = _preprocess_yolo(img, self.input_shape)
268 |         trt_outputs = []
269 | 
270 |         # Set host input to the image. The do_inference() function
271 |         # will copy the input to the GPU before executing.
272 |         self.inputs[0].host = np.ascontiguousarray(img_resized)
273 | 
274 |         if self.cuda_ctx:
275 |             self.cuda_ctx.push()
276 | 
277 |         self.bindings = [int(i) for i in self.bindings]
278 | 
279 |         context = self.context
280 |         bindings = self.bindings
281 |         inputs = self.inputs
282 |         outputs = self.outputs
283 |         stream = self.stream
284 | 
285 |         if trt.__version__[0] < '7':
286 |             # Transfer input data to the GPU.
287 |             [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
288 |             # Run inference.
289 |             context.execute_async(batch_size=1,
290 |                                 bindings=bindings,
291 |                                 stream_handle=stream.handle)
292 |             # Transfer predictions back from the GPU.
293 |             [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
294 |             # Synchronize the stream
295 |             stream.synchronize()
296 |             # Return only the host outputs.
297 |         else:
298 |             # Transfer input data to the GPU.
299 |             [cuda.memcpy_htod_async(inp.device, inp.host, stream)
300 |             for inp in inputs]
301 |             # Run inference.
302 |             context.execute_async_v2(
303 |                 bindings=self.bindings, stream_handle=stream.handle)
304 |             # Transfer predictions back from the GPU.
305 |             [cuda.memcpy_dtoh_async(out.host, out.device, stream)
306 |             for out in outputs]
307 |             # Synchronize the stream
308 |             stream.synchronize()
309 | 
310 |         trt_outputs = [out.host for out in outputs]
311 | 
312 |         del context
313 | 
314 |         if self.cuda_ctx:
315 |             self.cuda_ctx.pop()
316 | 
317 |         boxes, scores, classes = _postprocess_yolo(
318 |             trt_outputs, img.shape[1], img.shape[0], conf_th)
319 | 
320 |         # clip x1, y1, x2, y2 within original image
321 |         boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
322 |         boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
323 |         return boxes, scores, classes
324 | 


--------------------------------------------------------------------------------
/utils/yolo_with_plugins_batch.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import ctypes
  4 | 
  5 | import rospkg
  6 | import numpy as np
  7 | import cv2
  8 | import tensorrt as trt
  9 | import pycuda.driver as cuda
 10 | import pycuda.autoinit
 11 | from multiprocessing import Process
 12 | 
 13 | rospack = rospkg.RosPack()
 14 | plugin_path = rospack.get_path("yolov4_trt_ros") + "/plugins/libyolo_layer.so"
 15 | 
 16 | try:
 17 |     ctypes.cdll.LoadLibrary(plugin_path)
 18 | except OSError as e:
 19 |     raise SystemExit('ERROR: failed to load ./plugins/libyolo_layer.so.  '
 20 |                      'Did you forget to do a "make" in the "./plugins/" '
 21 |                      'subdirectory?')
 22 | 
 23 | 
 24 | def _preprocess_yolo(img, input_shape):
 25 |     """Preprocess an image before TRT YOLO inferencing.
 26 | 
 27 |     # Args
 28 |         img: int8 numpy array of shape (img_h, img_w, 3)
 29 |         input_shape: a tuple of (H, W)
 30 | 
 31 |     # Returns
 32 |         preprocessed img: float32 numpy array of shape (3, H, W)
 33 |     """
 34 |     img = cv2.resize(img, (input_shape[1], input_shape[0]))
 35 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 36 |     img = img.transpose((2, 0, 1)).astype(np.float32)
 37 |     img /= 255.0
 38 |     return img
 39 | 
 40 | 
 41 | def _nms_boxes(detections, nms_threshold):
 42 |     """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding
 43 |     boxes with their confidence scores and return an array with the
 44 |     indexes of the bounding boxes we want to keep.
 45 | 
 46 |     # Args
 47 |         detections: Nx7 numpy arrays of
 48 |                     [[x, y, w, h, box_confidence, class_id, class_prob],
 49 |                      ......]
 50 |     """
 51 |     x_coord = detections[:, 0]
 52 |     y_coord = detections[:, 1]
 53 |     width = detections[:, 2]
 54 |     height = detections[:, 3]
 55 |     box_confidences = detections[:, 4]
 56 | 
 57 |     areas = width * height
 58 |     ordered = box_confidences.argsort()[::-1]
 59 | 
 60 |     keep = list()
 61 |     while ordered.size > 0:
 62 |         # Index of the current element:
 63 |         i = ordered[0]
 64 |         keep.append(i)
 65 |         xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
 66 |         yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
 67 |         xx2 = np.minimum(x_coord[i] + width[i],
 68 |                          x_coord[ordered[1:]] + width[ordered[1:]])
 69 |         yy2 = np.minimum(y_coord[i] + height[i],
 70 |                          y_coord[ordered[1:]] + height[ordered[1:]])
 71 | 
 72 |         width1 = np.maximum(0.0, xx2 - xx1 + 1)
 73 |         height1 = np.maximum(0.0, yy2 - yy1 + 1)
 74 |         intersection = width1 * height1
 75 |         union = (areas[i] + areas[ordered[1:]] - intersection)
 76 |         iou = intersection / union
 77 |         indexes = np.where(iou <= nms_threshold)[0]
 78 |         ordered = ordered[indexes + 1]
 79 | 
 80 |     keep = np.array(keep)
 81 |     return keep
 82 | 
 83 | 
 84 | def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold=0.5):
 85 |     """Postprocess TensorRT outputs.
 86 | 
 87 |     # Args
 88 |         trt_outputs: a list of 2 or 3 tensors, where each tensor
 89 |                     contains a multiple of 7 float32 numbers in
 90 |                     the order of [x, y, w, h, box_confidence, class_id, class_prob]
 91 |         conf_th: confidence threshold
 92 | 
 93 |     # Returns
 94 |         boxes, scores, classes (after NMS)
 95 |     """
 96 |     # concatenate outputs of all yolo layers
 97 |     detections = np.concatenate(
 98 |         [o.reshape(-1, 7) for o in trt_outputs], axis=0)
 99 | 
100 |     # drop detections with score lower than conf_th
101 |     box_scores = detections[:, 4] * detections[:, 6]
102 |     pos = np.where(box_scores >= conf_th)
103 |     detections = detections[pos]
104 | 
105 |     # scale x, y, w, h from [0, 1] to pixel values
106 |     detections[:, 0] *= img_w
107 |     detections[:, 1] *= img_h
108 |     detections[:, 2] *= img_w
109 |     detections[:, 3] *= img_h
110 | 
111 |     # NMS
112 |     nms_detections = np.zeros((0, 7), dtype=detections.dtype)
113 |     for class_id in set(detections[:, 5]):
114 |         idxs = np.where(detections[:, 5] == class_id)
115 |         cls_detections = detections[idxs]
116 |         keep = _nms_boxes(cls_detections, nms_threshold)
117 |         nms_detections = np.concatenate(
118 |             [nms_detections, cls_detections[keep]], axis=0)
119 |     if len(nms_detections) == 0:
120 |         boxes = np.zeros((0, 4), dtype=np.int)
121 |         scores = np.zeros((0, 1), dtype=np.float32)
122 |         classes = np.zeros((0, 1), dtype=np.float32)
123 |     else:
124 |         xx = nms_detections[:, 0].reshape(-1, 1)
125 |         yy = nms_detections[:, 1].reshape(-1, 1)
126 |         ww = nms_detections[:, 2].reshape(-1, 1)
127 |         hh = nms_detections[:, 3].reshape(-1, 1)
128 |         boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5
129 |         boxes = boxes.astype(np.int)
130 |         scores = nms_detections[:, 4] * nms_detections[:, 6]
131 |         classes = nms_detections[:, 5]
132 |     return boxes, scores, classes
133 | 
134 | 
135 | class HostDeviceMem(object):
136 |     """Simple helper data class that's a little nicer to use than a 2-tuple."""
137 | 
138 |     def __init__(self, host_mem, device_mem):
139 |         self.host = host_mem
140 |         self.device = device_mem
141 | 
142 |     def __str__(self):
143 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
144 | 
145 |     def __repr__(self):
146 |         return self.__str__()
147 | 
148 | 
149 | def allocate_buffers(engine, grid_sizes):
150 |     """Allocates all host/device in/out buffers required for an engine.
151 |        Checked!
152 |     """
153 | 
154 |     inputs = []
155 |     outputs = []
156 |     bindings = []
157 |     output_idx = 0
158 |     stream = cuda.Stream()
159 |     for binding in engine:
160 |         size = trt.volume(engine.get_binding_shape(binding)) * \
161 |             engine.max_batch_size
162 |         dtype = trt.nptype(engine.get_binding_dtype(binding))
163 |         # Allocate host and device buffers
164 |         host_mem = cuda.pagelocked_empty(size, dtype)
165 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
166 | 
167 |         # Append the device buffer to device bindings.
168 |         bindings.append(int(device_mem))
169 | 
170 |         # Append to the appropriate list.
171 |         if engine.binding_is_input(binding):
172 |             inputs.append(HostDeviceMem(host_mem, device_mem))
173 | 
174 |         else:
175 |             # each grid has 3 anchors, each anchor generates a detection
176 |             # output of 7 float32 values
177 |             assert size == grid_sizes[output_idx] * \
178 |                 3 * 7 * engine.max_batch_size
179 |             outputs.append(HostDeviceMem(host_mem, device_mem))
180 |             output_idx += 1
181 | 
182 |     return inputs, outputs, bindings, stream
183 | 
184 | 
185 | def get_yolo_grid_sizes(model_name, h, w):
186 |     """Get grid sizes (w*h) for all yolo layers in the model."""
187 |     if 'yolov3' in model_name:
188 |         if 'tiny' in model_name:
189 |             return [(h // 32) * (w // 32), (h // 16) * (w // 16)]
190 |         else:
191 |             return [(h // 32) * (w // 32), (h // 16) * (w // 16), (h // 8) * (w // 8)]
192 |     elif 'yolov4' in model_name:
193 |         if 'tiny' in model_name:
194 |             return [(h // 32) * (w // 32), (h // 16) * (w // 16)]
195 |         else:
196 |             return [(h // 8) * (w // 8), (h // 16) * (w // 16), (w // 32) * (h // 32)]
197 |     else:
198 |         raise ValueError('ERROR: unknown model (%s)!' % args.model)
199 | 
200 | 
201 | class TrtYOLO(object):
202 |     """TrtYOLO class encapsulates things needed to run TRT YOLO."""
203 | 
204 |     def _load_engine(self):
205 |         TRTbin = '%s.trt' % self.model
206 |         with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
207 |             return runtime.deserialize_cuda_engine(f.read())
208 | 
209 |     def __init__(self, model, input_shape, category_num=80, cuda_ctx=None):
210 |         """Initialize TensorRT plugins, engine and context."""
211 |         self.model = model
212 |         self.input_shape = input_shape
213 |         self.category_num = category_num
214 |         self.cuda_ctx = cuda.Device(0).make_context()
215 |         self.trt_logger = trt.Logger(trt.Logger.INFO)
216 |         self.engine = self._load_engine()
217 |         self.context = self.engine.create_execution_context()
218 |         grid_sizes = get_yolo_grid_sizes(
219 |             self.model, self.input_shape[0], self.input_shape[1])
220 |         self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers(
221 |             self.engine, grid_sizes)
222 |         self.overall_boxes = []
223 |         self.overall_scores = []
224 |         self.overall_classes = []
225 | 
226 |     def __del__(self):
227 |         """Free CUDA memories."""
228 |         del self.outputs
229 |         del self.inputs
230 |         del self.stream
231 | 
232 |     def allocate_buffers(self, engine, grid_sizes):
233 |         """Allocates all host/device in/out buffers required for an engine.
234 |            Checked!
235 |         """
236 | 
237 |         inputs = []
238 |         outputs = []
239 |         bindings = []
240 |         output_idx = 0
241 |         stream = cuda.Stream()
242 | 
243 |         for binding in engine:
244 |             size = trt.volume(engine.get_binding_shape(
245 |                 binding)) * engine.max_batch_size
246 |             dtype = trt.nptype(engine.get_binding_dtype(binding))
247 |             # Allocate host and device buffers
248 |             host_mem = cuda.pagelocked_empty(size, dtype)
249 |             device_mem = cuda.mem_alloc(host_mem.nbytes)
250 | 
251 |             # Append the device buffer to device bindings.
252 |             bindings.append(int(device_mem))
253 | 
254 |             # Append to the appropriate list.
255 |             if engine.binding_is_input(binding):
256 |                 inputs.append(HostDeviceMem(host_mem, device_mem))
257 | 
258 |             else:
259 |                 # each grid has 3 anchors, each anchor generates a detection
260 |                 # output of 7 float32 values
261 |                 assert size == grid_sizes[output_idx] * \
262 |                     3 * 7 * engine.max_batch_size
263 |                 outputs.append(HostDeviceMem(host_mem, device_mem))
264 |                 output_idx += 1
265 | 
266 |         return inputs, outputs, bindings, stream
267 | 
268 |     def detect1(self, img, conf_th=0.3):
269 |         """ Detect objects in the input image """
270 | 
271 |         img_resized = _preprocess_yolo(img, self.input_shape)
272 |         trt_outputs = []
273 | 
274 |         # Set host input to the image. The do_inference() function
275 |         # will copy the input to the GPU before executing.
276 |         self.inputs[0].host = np.ascontiguousarray(img_resized)
277 | 
278 |         # if self.cuda_ctx:
279 |         self.cuda_ctx.push()
280 | 
281 |         self.bindings = [int(i) for i in self.bindings]
282 | 
283 |         context = self.context
284 |         bindings = self.bindings
285 |         inputs = self.inputs
286 |         outputs = self.outputs
287 |         stream = self.stream
288 | 
289 |         # Transfer input data to the GPU.
290 |         [cuda.memcpy_htod_async(inp.device, inp.host, stream)
291 |          for inp in inputs]
292 |         # Run inference.
293 |         context.execute_async_v2(
294 |             bindings=self.bindings, stream_handle=stream.handle)
295 |         # Transfer predictions back from the GPU.
296 |         [cuda.memcpy_dtoh_async(out.host, out.device, stream)
297 |          for out in outputs]
298 |         # Synchronize the stream
299 |         stream.synchronize()
300 | 
301 |         trt_outputs = [out.host for out in outputs]
302 | 
303 |         del context
304 | 
305 |         # if self.cuda_ctx:
306 |         self.cuda_ctx.pop()
307 | 
308 |         boxes, scores, classes = _postprocess_yolo(
309 |             trt_outputs, img.shape[1], img.shape[0], conf_th)
310 | 
311 |         # clip x1, y1, x2, y2 within original image
312 |         boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
313 |         boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
314 |         return boxes, scores, classes
315 | 
316 |     def detect2(self, img, conf_th=0.3):
317 |         """ Detect objects in the input image """
318 | 
319 |         img_resized = _preprocess_yolo(img, self.input_shape)
320 |         trt_outputs = []
321 | 
322 |         # Set host input to the image. The do_inference() function
323 |         # will copy the input to the GPU before executing.
324 |         self.inputs[0].host = np.ascontiguousarray(img_resized)
325 | 
326 |         # if self.cuda_ctx:
327 |         self.cuda_ctx.push()
328 | 
329 |         self.bindings = [int(i) for i in self.bindings]
330 | 
331 |         context = self.context
332 |         bindings = self.bindings
333 |         inputs = self.inputs
334 |         outputs = self.outputs
335 |         stream = self.stream
336 | 
337 |         # Transfer input data to the GPU.
338 |         [cuda.memcpy_htod_async(inp.device, inp.host, stream)
339 |          for inp in inputs]
340 |         # Run inference.
341 |         context.execute_async_v2(
342 |             bindings=self.bindings, stream_handle=stream.handle)
343 |         # Transfer predictions back from the GPU.
344 |         [cuda.memcpy_dtoh_async(out.host, out.device, stream)
345 |          for out in outputs]
346 |         # Synchronize the stream
347 |         stream.synchronize()
348 | 
349 |         trt_outputs = [out.host for out in outputs]
350 | 
351 |         del context
352 | 
353 |         # if self.cuda_ctx:
354 |         self.cuda_ctx.pop()
355 | 
356 |         boxes, scores, classes = _postprocess_yolo(
357 |             trt_outputs, img.shape[1], img.shape[0], conf_th)
358 | 
359 |         # clip x1, y1, x2, y2 within original image
360 |         boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
361 |         boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
362 |         return boxes, scores, classes
363 | 
364 |     def detect3(self, img, conf_th=0.3):
365 |         """ Detect objects in the input image """
366 | 
367 |         img_resized = _preprocess_yolo(img, self.input_shape)
368 |         trt_outputs = []
369 | 
370 |         # Set host input to the image. The do_inference() function
371 |         # will copy the input to the GPU before executing.
372 |         self.inputs[0].host = np.ascontiguousarray(img_resized)
373 | 
374 |         # if self.cuda_ctx:
375 |         self.cuda_ctx.push()
376 | 
377 |         self.bindings = [int(i) for i in self.bindings]
378 | 
379 |         context = self.context
380 |         bindings = self.bindings
381 |         inputs = self.inputs
382 |         outputs = self.outputs
383 |         stream = self.stream
384 | 
385 |         # Transfer input data to the GPU.
386 |         [cuda.memcpy_htod_async(inp.device, inp.host, stream)
387 |          for inp in inputs]
388 |         # Run inference.
389 |         context.execute_async_v2(
390 |             bindings=self.bindings, stream_handle=stream.handle)
391 |         # Transfer predictions back from the GPU.
392 |         [cuda.memcpy_dtoh_async(out.host, out.device, stream)
393 |          for out in outputs]
394 |         # Synchronize the stream
395 |         stream.synchronize()
396 | 
397 |         trt_outputs = [out.host for out in outputs]
398 | 
399 |         del context
400 | 
401 |         # if self.cuda_ctx:
402 |         self.cuda_ctx.pop()
403 | 
404 |         boxes, scores, classes = _postprocess_yolo(
405 |             trt_outputs, img.shape[1], img.shape[0], conf_th)
406 | 
407 |         # clip x1, y1, x2, y2 within original image
408 |         boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
409 |         boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
410 |         return boxes, scores, classes
411 | 
412 |     def detect4(self, img, conf_th=0.3):
413 |         """ Detect objects in the input image """
414 | 
415 |         img_resized = _preprocess_yolo(img, self.input_shape)
416 |         trt_outputs = []
417 | 
418 |         # Set host input to the image. The do_inference() function
419 |         # will copy the input to the GPU before executing.
420 |         self.inputs[0].host = np.ascontiguousarray(img_resized)
421 | 
422 |         # if self.cuda_ctx:
423 |         self.cuda_ctx.push()
424 | 
425 |         self.bindings = [int(i) for i in self.bindings]
426 | 
427 |         context = self.context
428 |         bindings = self.bindings
429 |         inputs = self.inputs
430 |         outputs = self.outputs
431 |         stream = self.stream
432 | 
433 |         # Transfer input data to the GPU.
434 |         [cuda.memcpy_htod_async(inp.device, inp.host, stream)
435 |          for inp in inputs]
436 |         # Run inference.
437 |         context.execute_async_v2(
438 |             bindings=self.bindings, stream_handle=stream.handle)
439 |         # Transfer predictions back from the GPU.
440 |         [cuda.memcpy_dtoh_async(out.host, out.device, stream)
441 |          for out in outputs]
442 |         # Synchronize the stream
443 |         stream.synchronize()
444 | 
445 |         trt_outputs = [out.host for out in outputs]
446 | 
447 |         del context
448 | 
449 |         # if self.cuda_ctx:
450 |         self.cuda_ctx.pop()
451 | 
452 |         boxes, scores, classes = _postprocess_yolo(
453 |             trt_outputs, img.shape[1], img.shape[0], conf_th)
454 | 
455 |         # clip x1, y1, x2, y2 within original image
456 |         boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
457 |         boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
458 |         return boxes, scores, classes
459 | 


--------------------------------------------------------------------------------
/yolo/__pycache__/plugins.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/yolo/__pycache__/plugins.cpython-36.pyc


--------------------------------------------------------------------------------
/yolo/convert_yolo_trt.sh:
--------------------------------------------------------------------------------
 1 | echo "YOLOv3 or YOLOv4. Input 3 or 4"
 2 | read model_type
 3 | 
 4 | echo "Do you want to download the model? [Y/N]"
 5 | read download
 6 | 
 7 | if [[ $download == 'Y' ]]
 8 | then
 9 |     if [[ $model_type == 3 ]]
10 |     then
11 | 	wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -q --show-progress --no-clobber
12 | 	wget https://pjreddie.com/media/files/yolov3.weights -q --show-progress --no-clobber
13 |     else
14 | 	wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg -q --show-progress --no-clobber
15 | 	wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -q --show-progress --no-clobber
16 |     fi
17 | fi
18 | 
19 | echo "What is the input shape? Input 288 or 416 or 608"
20 | read input_shape
21 | 
22 | if [[ $model_type == 3 ]]
23 | then
24 |     if [[ $input_shape == 288 ]]
25 |     then
26 |         echo "Creating yolov3-288.cfg and yolov3-288.weights"
27 |         cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov3-288.cfg
28 |         ln -sf yolov3.weights yolov3-288.weights
29 |     fi
30 |     if [[ $input_shape == 416 ]]
31 |     then
32 |         echo "Creating yolov3-416.cfg and yolov3-416.weights"
33 |         cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov3-416.cfg
34 |         ln -sf yolov3.weights yolov3-416.weights
35 |     fi
36 |     if [[ $input_shape == 608 ]]
37 |     then
38 |         echo "Creating yolov3-608.cfg and yolov3-608.weights"
39 |         cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' > yolov3-608.cfg
40 |         ln -sf yolov3.weights yolov3-608.weights
41 |     fi
42 | else
43 |     if [[ $input_shape == 288 ]]
44 |     then
45 |         echo "Creating yolov4-288.cfg and yolov4-288.weights"
46 |         cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov4-288.cfg
47 |         ln -sf yolov4.weights yolov4-288.weights
48 |     fi
49 |     if [[ $input_shape == 416 ]]
50 |     then
51 |         echo "Creating yolov4-416.cfg and yolov4-416.weights"
52 |         cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov4-416.cfg
53 |         ln -sf yolov4.weights yolov4-416.weights
54 |     fi
55 |     if [[ $input_shape == 608 ]]
56 |     then
57 |         echo "Creating yolov4-608.cfg and yolov4-608.weights"
58 |         cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' > yolov4-608.cfg
59 |         ln -sf yolov4.weights yolov4-608.weights
60 |     fi
61 | fi
62 | 
63 | echo "How many categories are there?"
64 | read category_num
65 | model_name="yolov${model_type}-${input_shape}"
66 | 
67 | # convert from yolo to onnx
68 | python3 yolo_to_onnx.py -m $model_name -c $category_num
69 | 
70 | echo "Done converting to .onnx"
71 | echo "..."
72 | echo "Now converting to .trt"
73 | 
74 | # convert from onnx to trt
75 | python3 onnx_to_tensorrt.py -m $model_name -c $category_num --verbose
76 | 
77 | echo "Conversion from yolo to trt done!"
78 | 


--------------------------------------------------------------------------------
/yolo/download_yolo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # yolov3-tiny
 6 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg -q --show-progress --no-clobber
 7 | #wget https://pjreddie.com/media/files/yolov3-tiny.weights -q --show-progress --no-clobber
 8 | 
 9 | # yolov3
10 | #wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -q --show-progress --no-clobber
11 | #wget https://pjreddie.com/media/files/yolov3.weights -q --show-progress --no-clobber
12 | 
13 | # yolov3-spp
14 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg -q --show-progress --no-clobber
15 | #wget https://pjreddie.com/media/files/yolov3-spp.weights -q --show-progress --no-clobber
16 | 
17 | # yolov4-tiny
18 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg -q --show-progress --no-clobber
19 | #wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights -q --show-progress --no-clobber
20 | 
21 | # yolov4
22 | wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg -q --show-progress --no-clobber
23 | wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -q --show-progress --no-clobber
24 | 
25 | #echo
26 | #echo "Creating yolov3-tiny-288.cfg and yolov3-tiny-288.weights"
27 | #cat yolov3-tiny.cfg | sed -e '8s/width=416/width=288/' | sed -e '9s/height=416/height=288/' > yolov3-tiny-288.cfg
28 | #echo >> yolov3-tiny-288.cfg
29 | #ln -sf yolov3-tiny.weights yolov3-tiny-288.weights
30 | #echo "Creating yolov3-tiny-416.cfg and yolov3-tiny-416.weights"
31 | #cp yolov3-tiny.cfg yolov3-tiny-416.cfg
32 | #echo >> yolov3-tiny-416.cfg
33 | #ln -sf yolov3-tiny.weights yolov3-tiny-416.weights
34 | 
35 | #echo "Creating yolov3-288.cfg and yolov3-288.weights"
36 | #cat yolov3.cfg | sed -e '8s/width=608/width=288/' | sed -e '9s/height=608/height=288/' > yolov3-288.cfg
37 | #ln -sf yolov3.weights yolov3-288.weights
38 | #echo "Creating yolov3-416.cfg and yolov3-416.weights"
39 | #cat yolov3.cfg | sed -e '8s/width=608/width=416/' | sed -e '9s/height=608/height=416/' > yolov3-416.cfg
40 | #ln -sf yolov3.weights yolov3-416.weights
41 | #echo "Creating yolov3-608.cfg and yolov3-608.weights"
42 | #cp yolov3.cfg yolov3-608.cfg
43 | #ln -sf yolov3.weights yolov3-608.weights
44 | 
45 | #echo "Creating yolov3-spp-288.cfg and yolov3-spp-288.weights"
46 | #cat yolov3-spp.cfg | sed -e '8s/width=608/width=288/' | sed -e '9s/height=608/height=288/' > yolov3-spp-288.cfg
47 | #ln -sf yolov3-spp.weights yolov3-spp-288.weights
48 | #echo "Creating yolov3-spp-416.cfg and yolov3-spp-416.weights"
49 | #cat yolov3-spp.cfg | sed -e '8s/width=608/width=416/' | sed -e '9s/height=608/height=416/' > yolov3-spp-416.cfg
50 | #ln -sf yolov3-spp.weights yolov3-spp-416.weights
51 | #echo "Creating yolov3-spp-608.cfg and yolov3-spp-608.weights"
52 | #cp yolov3-spp.cfg yolov3-spp-608.cfg
53 | #ln -sf yolov3-spp.weights yolov3-spp-608.weights
54 | 
55 | #echo "Creating yolov4-tiny-288.cfg and yolov4-tiny-288.weights"
56 | #cat yolov4-tiny.cfg | sed -e '6s/batch=64/batch=1/' | sed -e '8s/width=416/width=288/' | sed -e '9s/height=416/height=288/' > yolov4-tiny-288.cfg
57 | #echo >> yolov4-tiny-288.cfg
58 | #ln -sf yolov4-tiny.weights yolov4-tiny-288.weights
59 | #echo "Creating yolov4-tiny-416.cfg and yolov4-tiny-416.weights"
60 | #cat yolov4-tiny.cfg | sed -e '6s/batch=64/batch=1/' > yolov4-tiny-416.cfg
61 | #echo >> yolov4-tiny-416.cfg
62 | #ln -sf yolov4-tiny.weights yolov4-tiny-416.weights
63 | 
64 | #echo "Creating yolov4-288.cfg and yolov4-288.weights"
65 | #cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov4-288.cfg
66 | #ln -sf yolov4.weights yolov4-288.weights
67 | echo "Creating yolov4-416.cfg and yolov4-416.weights"
68 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov4-416.cfg
69 | ln -sf yolov4.weights yolov4-416.weights
70 | #echo "Creating yolov4-608.cfg and yolov4-608.weights"
71 | #cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' > yolov4-608.cfg
72 | #ln -sf yolov4.weights yolov4-608.weights
73 | 
74 | #echo
75 | #echo "Done."
76 | 
77 | 


--------------------------------------------------------------------------------
/yolo/onnx_to_tensorrt.py:
--------------------------------------------------------------------------------
  1 | # onnx_to_tensorrt.py
  2 | #
  3 | # Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
  4 | #
  5 | # NOTICE TO LICENSEE:
  6 | #
  7 | # This source code and/or documentation ("Licensed Deliverables") are
  8 | # subject to NVIDIA intellectual property rights under U.S. and
  9 | # international Copyright laws.
 10 | #
 11 | # These Licensed Deliverables contained herein is PROPRIETARY and
 12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and
 13 | # conditions of a form of NVIDIA software license agreement by and
 14 | # between NVIDIA and Licensee ("License Agreement") or electronically
 15 | # accepted by Licensee.  Notwithstanding any terms or conditions to
 16 | # the contrary in the License Agreement, reproduction or disclosure
 17 | # of the Licensed Deliverables to any third party without the express
 18 | # written consent of NVIDIA is prohibited.
 19 | #
 20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 33 | # OF THESE LICENSED DELIVERABLES.
 34 | #
 35 | # U.S. Government End Users.  These Licensed Deliverables are a
 36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 37 | # 1995), consisting of "commercial computer software" and "commercial
 38 | # computer software documentation" as such terms are used in 48
 39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 40 | # only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 42 | # U.S. Government End Users acquire the Licensed Deliverables with
 43 | # only those rights set forth herein.
 44 | #
 45 | # Any use of the Licensed Deliverables in individual and commercial
 46 | # software must include, in the user documentation and internal
 47 | # comments to the code, the above Disclaimer and U.S. Government End
 48 | # Users Notice.
 49 | #
 50 | 
 51 | 
 52 | from __future__ import print_function
 53 | 
 54 | import os
 55 | import argparse
 56 | 
 57 | import tensorrt as trt
 58 | 
 59 | from plugins import add_yolo_plugins
 60 | 
 61 | 
 62 | EXPLICIT_BATCH = []
 63 | if trt.__version__[0] >= '7':
 64 |     EXPLICIT_BATCH.append(
 65 |         1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
 66 | 
 67 | 
 68 | def build_engine(onnx_file_path, category_num=80, verbose=False):
 69 |     """Build a TensorRT engine from an ONNX file."""
 70 |     TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
 71 |     with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
 72 |         builder.max_workspace_size = 1 << 28
 73 |         builder.max_batch_size = 1
 74 |         builder.fp16_mode = True
 75 |         #builder.strict_type_constraints = True
 76 | 
 77 |         # Parse model file
 78 |         print('Loading ONNX file from path {}...'.format(onnx_file_path))
 79 |         with open(onnx_file_path, 'rb') as model:
 80 |             if not parser.parse(model.read()):
 81 |                 print('ERROR: Failed to parse the ONNX file.')
 82 |                 for error in range(parser.num_errors):
 83 |                     print(parser.get_error(error))
 84 |                 return None
 85 |         if trt.__version__[0] >= '7':
 86 |             # The actual yolo*.onnx is generated with batch size 64.
 87 |             # Reshape input to batch size 1
 88 |             shape = list(network.get_input(0).shape)
 89 |             shape[0] = 1
 90 |             network.get_input(0).shape = shape
 91 | 
 92 |         print('Adding yolo_layer plugins...')
 93 |         model_name = onnx_file_path[:-5]
 94 |         network = add_yolo_plugins(
 95 |             network, model_name, category_num, TRT_LOGGER)
 96 | 
 97 |         print('Building an engine.  This would take a while...')
 98 |         print('(Use "--verbose" to enable verbose logging.)')
 99 |         engine = builder.build_cuda_engine(network)
100 |         print('Completed creating engine.')
101 |         return engine
102 | 
103 | 
104 | def main():
105 |     """Create a TensorRT engine for ONNX-based YOLO."""
106 |     parser = argparse.ArgumentParser()
107 |     parser.add_argument(
108 |         '-v', '--verbose', action='store_true',
109 |         help='enable verbose output (for debugging)')
110 |     parser.add_argument(
111 |         '-c', '--category_num', type=int, default=80,
112 |         help='number of object categories [80]')
113 |     parser.add_argument(
114 |         '-m', '--model', type=str, required=True,
115 |         help=('[yolov3|yolov3-tiny|yolov3-spp|yolov4|yolov4-tiny]-'
116 |               '[{dimension}], where dimension could be a single '
117 |               'number (e.g. 288, 416, 608) or WxH (e.g. 416x256)'))
118 |     args = parser.parse_args()
119 | 
120 |     onnx_file_path = '%s.onnx' % args.model
121 |     if not os.path.isfile(onnx_file_path):
122 |         raise SystemExit('ERROR: file (%s) not found!  You might want to run yolo_to_onnx.py first to generate it.' % onnx_file_path)
123 |     engine_file_path = '%s.trt' % args.model
124 |     engine = build_engine(onnx_file_path, args.category_num, args.verbose)
125 |     with open(engine_file_path, 'wb') as f:
126 |         f.write(engine.serialize())
127 |     print('Serialized the TensorRT engine to file: %s' % engine_file_path)
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     main()
132 | 


--------------------------------------------------------------------------------
/yolo/plugins.py:
--------------------------------------------------------------------------------
  1 | """plugins.py
  2 | 
  3 | I referenced the code from https://github.com/dongfangduoshou123/YoloV3-TensorRT/blob/master/seralizeEngineFromPythonAPI.py
  4 | """
  5 | 
  6 | 
  7 | import ctypes
  8 | 
  9 | import numpy as np
 10 | import tensorrt as trt
 11 | 
 12 | try:
 13 |     ctypes.cdll.LoadLibrary('../plugins/libyolo_layer.so')
 14 | except OSError as e:
 15 |     raise SystemExit('ERROR: failed to load ../plugins/libyolo_layer.so.  '
 16 |                      'Did you forget to do a "make" in the "../plugins/" '
 17 |                      'subdirectory?')
 18 | 
 19 | 
 20 | def get_input_wh(model_name):
 21 |     """Get input_width and input_height of the model."""
 22 |     yolo_dim = model_name.split('-')[-1]
 23 |     if 'x' in yolo_dim:
 24 |         dim_split = yolo_dim.split('x')
 25 |         if len(dim_split) != 2:
 26 |             raise ValueError('ERROR: bad yolo_dim (%s)!' % yolo_dim)
 27 |         w, h = int(dim_split[0]), int(dim_split[1])
 28 |     else:
 29 |         h = w = int(yolo_dim)
 30 |     if h % 32 != 0 or w % 32 != 0:
 31 |         raise ValueError('ERROR: bad yolo_dim (%s)!' % yolo_dim)
 32 |     return w, h
 33 | 
 34 | 
 35 | def get_yolo_whs(model_name, w, h):
 36 |     """Get yolo_width and yolo_height for all yolo layers in the model."""
 37 |     if 'yolov3' in model_name:
 38 |         if 'tiny' in model_name:
 39 |             return [[w // 32, h // 32], [w // 16, h // 16]]
 40 |         else:
 41 |             return [[w // 32, h // 32], [w // 16, h // 16], [w // 8, h // 8]]
 42 |     elif 'yolov4' in model_name:
 43 |         if 'tiny' in model_name:
 44 |             return [[w // 32, h // 32], [w // 16, h // 16]]
 45 |         else:
 46 |             return [[w // 8, h // 8], [w // 16, h // 16], [w // 32, h // 32]]
 47 |     else:
 48 |         raise ValueError('ERROR: unknown model (%s)!' % args.model)
 49 | 
 50 | 
 51 | def verify_classes(model_name, num_classes):
 52 |     """Verify 'classes=??' in cfg matches user-specified num_classes."""
 53 |     cfg_file_path = model_name + '.cfg'
 54 |     with open(cfg_file_path, 'r') as f:
 55 |         cfg_lines = f.readlines()
 56 |     classes_lines = [l.strip() for l in cfg_lines if l.startswith('classes')]
 57 |     classes = [int(l.split('=')[-1]) for l in classes_lines]
 58 |     return all([c == num_classes for c in classes])
 59 | 
 60 | 
 61 | def get_anchors(model_name):
 62 |     """Get anchors of all yolo layers from the cfg file."""
 63 |     cfg_file_path = model_name + '.cfg'
 64 |     with open(cfg_file_path, 'r') as f:
 65 |         cfg_lines = f.readlines()
 66 |     yolo_lines = [l.strip() for l in cfg_lines if l.startswith('[yolo]')]
 67 |     mask_lines = [l.strip() for l in cfg_lines if l.startswith('mask')]
 68 |     anch_lines = [l.strip() for l in cfg_lines if l.startswith('anchors')]
 69 |     assert len(mask_lines) == len(yolo_lines)
 70 |     assert len(anch_lines) == len(yolo_lines)
 71 |     anchor_list = eval('[%s]' % anch_lines[0].split('=')[-1])
 72 |     mask_strs = [l.split('=')[-1] for l in mask_lines]
 73 |     masks = [eval('[%s]' % s)  for s in mask_strs]
 74 |     anchors = []
 75 |     for mask in masks:
 76 |         curr_anchors = []
 77 |         for m in mask:
 78 |             curr_anchors.append(anchor_list[m * 2])
 79 |             curr_anchors.append(anchor_list[m * 2 + 1])
 80 |         anchors.append(curr_anchors)
 81 |     return anchors
 82 | 
 83 | 
 84 | def get_scales(model_name):
 85 |     """Get scale_x_y's of all yolo layers from the cfg file."""
 86 |     cfg_file_path = model_name + '.cfg'
 87 |     with open(cfg_file_path, 'r') as f:
 88 |         cfg_lines = f.readlines()
 89 |     yolo_lines = [l.strip() for l in cfg_lines if l.startswith('[yolo]')]
 90 |     scale_lines = [l.strip() for l in cfg_lines if l.startswith('scale_x_y')]
 91 |     if len(scale_lines) == 0:
 92 |         return [1.0] * len(yolo_lines)
 93 |     else:
 94 |         assert len(scale_lines) == len(yolo_lines)
 95 |         return [float(l.split('=')[-1]) for l in scale_lines]
 96 | 
 97 | 
 98 | def get_plugin_creator(plugin_name, logger):
 99 |     """Get the TensorRT plugin creator."""
100 |     trt.init_libnvinfer_plugins(logger, '')
101 |     plugin_creator_list = trt.get_plugin_registry().plugin_creator_list
102 |     for c in plugin_creator_list:
103 |         if c.name == plugin_name:
104 |             return c
105 |     return None
106 | 
107 | 
108 | def add_yolo_plugins(network, model_name, num_classes, logger):
109 |     """Add yolo plugins into a TensorRT network."""
110 |     input_width, input_height = get_input_wh(model_name)
111 |     yolo_whs = get_yolo_whs(model_name, input_width, input_height)
112 |     if not verify_classes(model_name, num_classes):
113 |         raise ValueError('bad num_classes (%d)' % num_classes)
114 |     anchors = get_anchors(model_name)
115 |     if len(anchors) != len(yolo_whs):
116 |         raise ValueError('bad number of yolo layers: %d vs. %d' %
117 |                          (len(anchors), len(yolo_whs)))
118 |     if network.num_outputs != len(anchors):
119 |         raise ValueError('bad number of network outputs: %d vs. %d' %
120 |                          (network.num_outputs, len(anchors)))
121 |     scales = get_scales(model_name)
122 |     if any([s < 1.0 for s in scales]):
123 |         raise ValueError('bad scale_x_y: %s' % str(scales))
124 | 
125 |     plugin_creator = get_plugin_creator('YoloLayer_TRT', logger)
126 |     if not plugin_creator:
127 |         raise RuntimeError('cannot get YoloLayer_TRT plugin creator')
128 |     old_tensors = [network.get_output(i) for i in range(network.num_outputs)]
129 |     new_tensors = [None] * network.num_outputs
130 |     for i, old_tensor in enumerate(old_tensors):
131 |         new_tensors[i] = network.add_plugin_v2(
132 |             [old_tensor],
133 |             plugin_creator.create_plugin('YoloLayer_TRT', trt.PluginFieldCollection([
134 |                 trt.PluginField("yoloWidth", np.array(yolo_whs[i][0], dtype=np.int32), trt.PluginFieldType.INT32),
135 |                 trt.PluginField("yoloHeight", np.array(yolo_whs[i][1], dtype=np.int32), trt.PluginFieldType.INT32),
136 |                 trt.PluginField("inputWidth", np.array(input_width, dtype=np.int32), trt.PluginFieldType.INT32),
137 |                 trt.PluginField("inputHeight", np.array(input_height, dtype=np.int32), trt.PluginFieldType.INT32),
138 |                 trt.PluginField("numClasses", np.array(num_classes, dtype=np.int32), trt.PluginFieldType.INT32),
139 |                 trt.PluginField("numAnchors", np.array(len(anchors[i]) // 2, dtype=np.int32), trt.PluginFieldType.INT32),
140 |                 trt.PluginField("anchors", np.ascontiguousarray(anchors[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
141 |                 trt.PluginField("scaleXY", np.array(scales[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
142 |             ]))
143 |         ).get_output(0)
144 | 
145 |     for new_tensor in new_tensors:
146 |         network.mark_output(new_tensor)
147 |     for old_tensor in old_tensors:
148 |         network.unmark_output(old_tensor)
149 | 
150 |     return network
151 | 


--------------------------------------------------------------------------------
/yolo/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.15.1
2 | onnx==1.4.1
3 | pycuda>=2017.1.1
4 | 


--------------------------------------------------------------------------------
/yolo/yolo_to_onnx.py:
--------------------------------------------------------------------------------
  1 | # yolo_to_onnx.py
  2 | #
  3 | # Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
  4 | #
  5 | # NOTICE TO LICENSEE:
  6 | #
  7 | # This source code and/or documentation ("Licensed Deliverables") are
  8 | # subject to NVIDIA intellectual property rights under U.S. and
  9 | # international Copyright laws.
 10 | #
 11 | # These Licensed Deliverables contained herein is PROPRIETARY and
 12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and
 13 | # conditions of a form of NVIDIA software license agreement by and
 14 | # between NVIDIA and Licensee ("License Agreement") or electronically
 15 | # accepted by Licensee.  Notwithstanding any terms or conditions to
 16 | # the contrary in the License Agreement, reproduction or disclosure
 17 | # of the Licensed Deliverables to any third party without the express
 18 | # written consent of NVIDIA is prohibited.
 19 | #
 20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 33 | # OF THESE LICENSED DELIVERABLES.
 34 | #
 35 | # U.S. Government End Users.  These Licensed Deliverables are a
 36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 37 | # 1995), consisting of "commercial computer software" and "commercial
 38 | # computer software documentation" as such terms are used in 48
 39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 40 | # only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 42 | # U.S. Government End Users acquire the Licensed Deliverables with
 43 | # only those rights set forth herein.
 44 | #
 45 | # Any use of the Licensed Deliverables in individual and commercial
 46 | # software must include, in the user documentation and internal
 47 | # comments to the code, the above Disclaimer and U.S. Government End
 48 | # Users Notice.
 49 | #
 50 | 
 51 | 
 52 | from __future__ import print_function
 53 | 
 54 | import os
 55 | import sys
 56 | import hashlib
 57 | import argparse
 58 | from collections import OrderedDict
 59 | 
 60 | import numpy as np
 61 | import onnx
 62 | from onnx import helper, TensorProto
 63 | 
 64 | from plugins import verify_classes, get_input_wh
 65 | 
 66 | 
 67 | class DarkNetParser(object):
 68 |     """Definition of a parser for DarkNet-based YOLO model."""
 69 | 
 70 |     def __init__(self, supported_layers):
 71 |         """Initializes a DarkNetParser object.
 72 | 
 73 |         Keyword argument:
 74 |         supported_layers -- a string list of supported layers in DarkNet naming convention,
 75 |         parameters are only added to the class dictionary if a parsed layer is included.
 76 |         """
 77 | 
 78 |         # A list of YOLO layers containing dictionaries with all layer
 79 |         # parameters:
 80 |         self.layer_configs = OrderedDict()
 81 |         self.supported_layers = supported_layers
 82 |         self.layer_counter = 0
 83 | 
 84 |     def parse_cfg_file(self, cfg_file_path):
 85 |         """Takes the yolov?.cfg file and parses it layer by layer,
 86 |         appending each layer's parameters as a dictionary to layer_configs.
 87 | 
 88 |         Keyword argument:
 89 |         cfg_file_path
 90 |         """
 91 |         with open(cfg_file_path, 'r') as cfg_file:
 92 |             remainder = cfg_file.read()
 93 |             while remainder is not None:
 94 |                 layer_dict, layer_name, remainder = self._next_layer(remainder)
 95 |                 if layer_dict is not None:
 96 |                     self.layer_configs[layer_name] = layer_dict
 97 |         return self.layer_configs
 98 | 
 99 |     def _next_layer(self, remainder):
100 |         """Takes in a string and segments it by looking for DarkNet delimiters.
101 |         Returns the layer parameters and the remaining string after the last delimiter.
102 |         Example for the first Conv layer in yolo.cfg ...
103 | 
104 |         [convolutional]
105 |         batch_normalize=1
106 |         filters=32
107 |         size=3
108 |         stride=1
109 |         pad=1
110 |         activation=leaky
111 | 
112 |         ... becomes the following layer_dict return value:
113 |         {'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32,
114 |         'batch_normalize': 1, 'type': 'convolutional', 'size': 3}.
115 | 
116 |         '001_convolutional' is returned as layer_name, and all lines that follow in yolo.cfg
117 |         are returned as the next remainder.
118 | 
119 |         Keyword argument:
120 |         remainder -- a string with all raw text after the previously parsed layer
121 |         """
122 |         remainder = remainder.split('[', 1)
123 |         if len(remainder) == 2:
124 |             remainder = remainder[1]
125 |         else:
126 |             return None, None, None
127 |         remainder = remainder.split(']', 1)
128 |         if len(remainder) == 2:
129 |             layer_type, remainder = remainder
130 |         else:
131 |             return None, None, None
132 |         if remainder.replace(' ', '')[0] == '#':
133 |             remainder = remainder.split('\n', 1)[1]
134 | 
135 |         out = remainder.split('\n\n', 1)
136 |         if len(out) == 2:
137 |             layer_param_block, remainder = out[0], out[1]
138 |         else:
139 |             layer_param_block, remainder = out[0], ''
140 |         if layer_type == 'yolo':
141 |             layer_param_lines = []
142 |         else:
143 |             layer_param_lines = layer_param_block.split('\n')[1:]
144 |         layer_name = str(self.layer_counter).zfill(3) + '_' + layer_type
145 |         layer_dict = dict(type=layer_type)
146 |         if layer_type in self.supported_layers:
147 |             for param_line in layer_param_lines:
148 |                 if param_line[0] == '#':
149 |                     continue
150 |                 param_type, param_value = self._parse_params(param_line)
151 |                 layer_dict[param_type] = param_value
152 |         self.layer_counter += 1
153 |         return layer_dict, layer_name, remainder
154 | 
155 |     def _parse_params(self, param_line):
156 |         """Identifies the parameters contained in one of the cfg file and returns
157 |         them in the required format for each parameter type, e.g. as a list, an int or a float.
158 | 
159 |         Keyword argument:
160 |         param_line -- one parsed line within a layer block
161 |         """
162 |         param_line = param_line.replace(' ', '')
163 |         param_type, param_value_raw = param_line.split('=')
164 |         param_value = None
165 |         if param_type == 'layers':
166 |             layer_indexes = list()
167 |             for index in param_value_raw.split(','):
168 |                 layer_indexes.append(int(index))
169 |             param_value = layer_indexes
170 |         elif isinstance(param_value_raw, str) and not param_value_raw.isalpha():
171 |             condition_param_value_positive = param_value_raw.isdigit()
172 |             condition_param_value_negative = param_value_raw[0] == '-' and \
173 |                 param_value_raw[1:].isdigit()
174 |             if condition_param_value_positive or condition_param_value_negative:
175 |                 param_value = int(param_value_raw)
176 |             else:
177 |                 param_value = float(param_value_raw)
178 |         else:
179 |             param_value = str(param_value_raw)
180 |         return param_type, param_value
181 | 
182 | 
183 | class MajorNodeSpecs(object):
184 |     """Helper class used to store the names of ONNX output names,
185 |     corresponding to the output of a DarkNet layer and its output channels.
186 |     Some DarkNet layers are not created and there is no corresponding ONNX node,
187 |     but we still need to track them in order to set up skip connections.
188 |     """
189 | 
190 |     def __init__(self, name, channels):
191 |         """ Initialize a MajorNodeSpecs object.
192 | 
193 |         Keyword arguments:
194 |         name -- name of the ONNX node
195 |         channels -- number of output channels of this node
196 |         """
197 |         self.name = name
198 |         self.channels = channels
199 |         self.created_onnx_node = False
200 |         if name is not None and isinstance(channels, int) and channels > 0:
201 |             self.created_onnx_node = True
202 | 
203 | 
204 | class ConvParams(object):
205 |     """Helper class to store the hyper parameters of a Conv layer,
206 |     including its prefix name in the ONNX graph and the expected dimensions
207 |     of weights for convolution, bias, and batch normalization.
208 | 
209 |     Additionally acts as a wrapper for generating safe names for all
210 |     weights, checking on feasible combinations.
211 |     """
212 | 
213 |     def __init__(self, node_name, batch_normalize, conv_weight_dims):
214 |         """Constructor based on the base node name (e.g. 101_convolutional), the batch
215 |         normalization setting, and the convolutional weights shape.
216 | 
217 |         Keyword arguments:
218 |         node_name -- base name of this YOLO convolutional layer
219 |         batch_normalize -- bool value if batch normalization is used
220 |         conv_weight_dims -- the dimensions of this layer's convolutional weights
221 |         """
222 |         self.node_name = node_name
223 |         self.batch_normalize = batch_normalize
224 |         assert len(conv_weight_dims) == 4
225 |         self.conv_weight_dims = conv_weight_dims
226 | 
227 |     def generate_param_name(self, param_category, suffix):
228 |         """Generates a name based on two string inputs,
229 |         and checks if the combination is valid."""
230 |         assert suffix
231 |         assert param_category in ['bn', 'conv']
232 |         assert(suffix in ['scale', 'mean', 'var', 'weights', 'bias'])
233 |         if param_category == 'bn':
234 |             assert self.batch_normalize
235 |             assert suffix in ['scale', 'bias', 'mean', 'var']
236 |         elif param_category == 'conv':
237 |             assert suffix in ['weights', 'bias']
238 |             if suffix == 'bias':
239 |                 assert not self.batch_normalize
240 |         param_name = self.node_name + '_' + param_category + '_' + suffix
241 |         return param_name
242 | 
243 | class UpsampleParams(object):
244 |     #Helper class to store the scale parameter for an Upsample node.
245 | 
246 |     def __init__(self, node_name, value):
247 |         """Constructor based on the base node name (e.g. 86_Upsample),
248 |         and the value of the scale input tensor.
249 | 
250 |         Keyword arguments:
251 |         node_name -- base name of this YOLO Upsample layer
252 |         value -- the value of the scale input to the Upsample layer as a numpy array
253 |         """
254 |         self.node_name = node_name
255 |         self.value = value
256 | 
257 |     def generate_param_name(self):
258 |         """Generates the scale parameter name for the Upsample node."""
259 |         param_name = self.node_name + '_' + 'scale'
260 |         return param_name
261 | 
262 | class WeightLoader(object):
263 |     """Helper class used for loading the serialized weights of a binary file stream
264 |     and returning the initializers and the input tensors required for populating
265 |     the ONNX graph with weights.
266 |     """
267 | 
268 |     def __init__(self, weights_file_path):
269 |         """Initialized with a path to the YOLO .weights file.
270 | 
271 |         Keyword argument:
272 |         weights_file_path -- path to the weights file.
273 |         """
274 |         self.weights_file = self._open_weights_file(weights_file_path)
275 | 
276 |     def load_upsample_scales(self, upsample_params):
277 |         """Returns the initializers with the value of the scale input
278 |         tensor given by upsample_params.
279 | 
280 |         Keyword argument:
281 |         upsample_params -- a UpsampleParams object
282 |         """
283 |         initializer = list()
284 |         inputs = list()
285 |         name = upsample_params.generate_param_name()
286 |         shape = upsample_params.value.shape
287 |         data = upsample_params.value
288 |         scale_init = helper.make_tensor(
289 |             name, TensorProto.FLOAT, shape, data)
290 |         scale_input = helper.make_tensor_value_info(
291 |             name, TensorProto.FLOAT, shape)
292 |         initializer.append(scale_init)
293 |         inputs.append(scale_input)
294 |         return initializer, inputs
295 | 
296 | 
297 |     def load_conv_weights(self, conv_params):
298 |         """Returns the initializers with weights from the weights file and
299 |         the input tensors of a convolutional layer for all corresponding ONNX nodes.
300 | 
301 |         Keyword argument:
302 |         conv_params -- a ConvParams object
303 |         """
304 |         initializer = list()
305 |         inputs = list()
306 |         if conv_params.batch_normalize:
307 |             bias_init, bias_input = self._create_param_tensors(
308 |                 conv_params, 'bn', 'bias')
309 |             bn_scale_init, bn_scale_input = self._create_param_tensors(
310 |                 conv_params, 'bn', 'scale')
311 |             bn_mean_init, bn_mean_input = self._create_param_tensors(
312 |                 conv_params, 'bn', 'mean')
313 |             bn_var_init, bn_var_input = self._create_param_tensors(
314 |                 conv_params, 'bn', 'var')
315 |             initializer.extend(
316 |                 [bn_scale_init, bias_init, bn_mean_init, bn_var_init])
317 |             inputs.extend([bn_scale_input, bias_input,
318 |                            bn_mean_input, bn_var_input])
319 |         else:
320 |             bias_init, bias_input = self._create_param_tensors(
321 |                 conv_params, 'conv', 'bias')
322 |             initializer.append(bias_init)
323 |             inputs.append(bias_input)
324 |         conv_init, conv_input = self._create_param_tensors(
325 |             conv_params, 'conv', 'weights')
326 |         initializer.append(conv_init)
327 |         inputs.append(conv_input)
328 |         return initializer, inputs
329 | 
330 |     def _open_weights_file(self, weights_file_path):
331 |         """Opens a YOLO DarkNet file stream and skips the header.
332 | 
333 |         Keyword argument:
334 |         weights_file_path -- path to the weights file.
335 |         """
336 |         weights_file = open(weights_file_path, 'rb')
337 |         length_header = 5
338 |         np.ndarray(shape=(length_header, ), dtype='int32',
339 |                    buffer=weights_file.read(length_header * 4))
340 |         return weights_file
341 | 
342 |     def _create_param_tensors(self, conv_params, param_category, suffix):
343 |         """Creates the initializers with weights from the weights file together with
344 |         the input tensors.
345 | 
346 |         Keyword arguments:
347 |         conv_params -- a ConvParams object
348 |         param_category -- the category of parameters to be created ('bn' or 'conv')
349 |         suffix -- a string determining the sub-type of above param_category (e.g.,
350 |         'weights' or 'bias')
351 |         """
352 |         param_name, param_data, param_data_shape = self._load_one_param_type(
353 |             conv_params, param_category, suffix)
354 | 
355 |         initializer_tensor = helper.make_tensor(
356 |             param_name, TensorProto.FLOAT, param_data_shape, param_data)
357 |         input_tensor = helper.make_tensor_value_info(
358 |             param_name, TensorProto.FLOAT, param_data_shape)
359 |         return initializer_tensor, input_tensor
360 | 
361 |     def _load_one_param_type(self, conv_params, param_category, suffix):
362 |         """Deserializes the weights from a file stream in the DarkNet order.
363 | 
364 |         Keyword arguments:
365 |         conv_params -- a ConvParams object
366 |         param_category -- the category of parameters to be created ('bn' or 'conv')
367 |         suffix -- a string determining the sub-type of above param_category (e.g.,
368 |         'weights' or 'bias')
369 |         """
370 |         param_name = conv_params.generate_param_name(param_category, suffix)
371 |         channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims
372 |         if param_category == 'bn':
373 |             param_shape = [channels_out]
374 |         elif param_category == 'conv':
375 |             if suffix == 'weights':
376 |                 param_shape = [channels_out, channels_in, filter_h, filter_w]
377 |                 #print(param_shape)
378 |             elif suffix == 'bias':
379 |                 param_shape = [channels_out]
380 |         param_size = np.product(np.array(param_shape))
381 |         param_data = np.ndarray(
382 |             shape=param_shape,
383 |             dtype='float32',
384 |             buffer=self.weights_file.read(param_size * 4))
385 |         param_data = param_data.flatten().astype(float)
386 |         return param_name, param_data, param_shape
387 | 
388 | 
389 | class GraphBuilderONNX(object):
390 |     """Class for creating an ONNX graph from a previously generated list of layer dictionaries."""
391 | 
392 |     def __init__(self, model_name, output_tensors):
393 |         """Initialize with all DarkNet default parameters used creating
394 |         YOLO, and specify the output tensors as an OrderedDict for their
395 |         output dimensions with their names as keys.
396 | 
397 |         Keyword argument:
398 |         output_tensors -- the output tensors as an OrderedDict containing the keys'
399 |         output dimensions
400 |         """
401 |         self.model_name = model_name
402 |         self.output_tensors = output_tensors
403 |         self._nodes = list()
404 |         self.graph_def = None
405 |         self.input_tensor = None
406 |         self.epsilon_bn = 1e-5
407 |         self.momentum_bn = 0.99
408 |         self.alpha_lrelu = 0.1
409 |         self.param_dict = OrderedDict()
410 |         self.major_node_specs = list()
411 |         self.batch_size = 1
412 |         self.route_spec = 0  # keeping track of the current active 'route'
413 | 
414 |     def build_onnx_graph(
415 |             self,
416 |             layer_configs,
417 |             weights_file_path,
418 |             verbose=True):
419 |         """Iterate over all layer configs (parsed from the DarkNet
420 |         representation of YOLO), create an ONNX graph, populate it with
421 |         weights from the weights file and return the graph definition.
422 | 
423 |         Keyword arguments:
424 |         layer_configs -- an OrderedDict object with all parsed layers' configurations
425 |         weights_file_path -- location of the weights file
426 |         verbose -- toggles if the graph is printed after creation (default: True)
427 |         """
428 |         for layer_name in layer_configs.keys():
429 |             layer_dict = layer_configs[layer_name]
430 |             major_node_specs = self._make_onnx_node(layer_name, layer_dict)
431 |             if major_node_specs.name is not None:
432 |                 self.major_node_specs.append(major_node_specs)
433 |         # remove dummy 'route' and 'yolo' nodes
434 |         self.major_node_specs = [node for node in self.major_node_specs
435 |                                       if 'dummy' not in node.name]
436 |         outputs = list()
437 |         for tensor_name in self.output_tensors.keys():
438 |             output_dims = [self.batch_size, ] + \
439 |                 self.output_tensors[tensor_name]
440 |             output_tensor = helper.make_tensor_value_info(
441 |                 tensor_name, TensorProto.FLOAT, output_dims)
442 |             outputs.append(output_tensor)
443 |         inputs = [self.input_tensor]
444 |         weight_loader = WeightLoader(weights_file_path)
445 |         initializer = list()
446 |         # If a layer has parameters, add them to the initializer and input lists.
447 |         for layer_name in self.param_dict.keys():
448 |             _, layer_type = layer_name.split('_', 1)
449 |             params = self.param_dict[layer_name]
450 |             if layer_type == 'convolutional':
451 |                 #print('%s  ' % layer_name, end='')
452 |                 initializer_layer, inputs_layer = weight_loader.load_conv_weights(
453 |                     params)
454 |                 initializer.extend(initializer_layer)
455 |                 inputs.extend(inputs_layer)
456 |             elif layer_type == 'upsample':
457 |                 initializer_layer, inputs_layer = weight_loader.load_upsample_scales(
458 |                     params)
459 |                 initializer.extend(initializer_layer)
460 |                 inputs.extend(inputs_layer)
461 |         del weight_loader
462 |         self.graph_def = helper.make_graph(
463 |             nodes=self._nodes,
464 |             name=self.model_name,
465 |             inputs=inputs,
466 |             outputs=outputs,
467 |             initializer=initializer
468 |         )
469 |         if verbose:
470 |             print(helper.printable_graph(self.graph_def))
471 |         model_def = helper.make_model(self.graph_def,
472 |                                       producer_name='NVIDIA TensorRT sample')
473 |         return model_def
474 | 
475 |     def _make_onnx_node(self, layer_name, layer_dict):
476 |         """Take in a layer parameter dictionary, choose the correct function for
477 |         creating an ONNX node and store the information important to graph creation
478 |         as a MajorNodeSpec object.
479 | 
480 |         Keyword arguments:
481 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
482 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
483 |         """
484 |         layer_type = layer_dict['type']
485 |         if self.input_tensor is None:
486 |             if layer_type == 'net':
487 |                 major_node_output_name, major_node_output_channels = self._make_input_tensor(
488 |                     layer_name, layer_dict)
489 |                 major_node_specs = MajorNodeSpecs(major_node_output_name,
490 |                                                   major_node_output_channels)
491 |             else:
492 |                 raise ValueError('The first node has to be of type "net".')
493 |         else:
494 |             node_creators = dict()
495 |             node_creators['convolutional'] = self._make_conv_node
496 |             node_creators['maxpool'] = self._make_maxpool_node
497 |             node_creators['shortcut'] = self._make_shortcut_node
498 |             node_creators['route'] = self._make_route_node
499 |             node_creators['upsample'] = self._make_upsample_node
500 |             node_creators['yolo'] = self._make_yolo_node
501 | 
502 |             if layer_type in node_creators.keys():
503 |                 major_node_output_name, major_node_output_channels = \
504 |                     node_creators[layer_type](layer_name, layer_dict)
505 |                 major_node_specs = MajorNodeSpecs(major_node_output_name,
506 |                                                   major_node_output_channels)
507 |             else:
508 |                 print(
509 |                     'Layer of type %s not supported, skipping ONNX node generation.' %
510 |                     layer_type)
511 |                 major_node_specs = MajorNodeSpecs(layer_name,
512 |                                                   None)
513 |         return major_node_specs
514 | 
515 |     def _make_input_tensor(self, layer_name, layer_dict):
516 |         """Create an ONNX input tensor from a 'net' layer and store the batch size.
517 | 
518 |         Keyword arguments:
519 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
520 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
521 |         """
522 |         #batch_size = layer_dict['batch']
523 |         channels = layer_dict['channels']
524 |         height = layer_dict['height']
525 |         width = layer_dict['width']
526 |         #self.batch_size = batch_size
527 |         input_tensor = helper.make_tensor_value_info(
528 |             str(layer_name), TensorProto.FLOAT, [
529 |                 self.batch_size, channels, height, width])
530 |         self.input_tensor = input_tensor
531 |         return layer_name, channels
532 | 
533 |     def _get_previous_node_specs(self, target_index=0):
534 |         """Get a previously ONNX node.
535 | 
536 |         Target index can be passed for jumping to a specific index.
537 | 
538 |         Keyword arguments:
539 |         target_index -- optional for jumping to a specific index,
540 |                         default: 0 for the previous element, while
541 |                         taking 'route' spec into account
542 |         """
543 |         if target_index == 0:
544 |             if self.route_spec != 0:
545 |                 previous_node = self.major_node_specs[self.route_spec]
546 |                 assert 'dummy' not in previous_node.name
547 |                 self.route_spec = 0
548 |             else:
549 |                 previous_node = self.major_node_specs[-1]
550 |         else:
551 |             previous_node = self.major_node_specs[target_index]
552 |         assert previous_node.created_onnx_node
553 |         return previous_node
554 | 
555 |     def _make_conv_node(self, layer_name, layer_dict):
556 |         """Create an ONNX Conv node with optional batch normalization and
557 |         activation nodes.
558 | 
559 |         Keyword arguments:
560 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
561 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
562 |         """
563 |         previous_node_specs = self._get_previous_node_specs()
564 |         inputs = [previous_node_specs.name]
565 |         previous_channels = previous_node_specs.channels
566 |         kernel_size = layer_dict['size']
567 |         stride = layer_dict['stride']
568 |         filters = layer_dict['filters']
569 |         batch_normalize = False
570 |         if 'batch_normalize' in layer_dict.keys(
571 |         ) and layer_dict['batch_normalize'] == 1:
572 |             batch_normalize = True
573 | 
574 |         kernel_shape = [kernel_size, kernel_size]
575 |         weights_shape = [filters, previous_channels] + kernel_shape
576 |         conv_params = ConvParams(layer_name, batch_normalize, weights_shape)
577 | 
578 |         strides = [stride, stride]
579 |         dilations = [1, 1]
580 |         weights_name = conv_params.generate_param_name('conv', 'weights')
581 |         inputs.append(weights_name)
582 |         if not batch_normalize:
583 |             bias_name = conv_params.generate_param_name('conv', 'bias')
584 |             inputs.append(bias_name)
585 | 
586 |         conv_node = helper.make_node(
587 |             'Conv',
588 |             inputs=inputs,
589 |             outputs=[layer_name],
590 |             kernel_shape=kernel_shape,
591 |             strides=strides,
592 |             auto_pad='SAME_LOWER',
593 |             dilations=dilations,
594 |             name=layer_name
595 |         )
596 |         self._nodes.append(conv_node)
597 |         inputs = [layer_name]
598 |         layer_name_output = layer_name
599 | 
600 |         if batch_normalize:
601 |             layer_name_bn = layer_name + '_bn'
602 |             bn_param_suffixes = ['scale', 'bias', 'mean', 'var']
603 |             for suffix in bn_param_suffixes:
604 |                 bn_param_name = conv_params.generate_param_name('bn', suffix)
605 |                 inputs.append(bn_param_name)
606 |             batchnorm_node = helper.make_node(
607 |                 'BatchNormalization',
608 |                 inputs=inputs,
609 |                 outputs=[layer_name_bn],
610 |                 epsilon=self.epsilon_bn,
611 |                 momentum=self.momentum_bn,
612 |                 name=layer_name_bn
613 |             )
614 |             self._nodes.append(batchnorm_node)
615 |             inputs = [layer_name_bn]
616 |             layer_name_output = layer_name_bn
617 | 
618 |         if layer_dict['activation'] == 'leaky':
619 |             layer_name_lrelu = layer_name + '_lrelu'
620 | 
621 |             lrelu_node = helper.make_node(
622 |                 'LeakyRelu',
623 |                 inputs=inputs,
624 |                 outputs=[layer_name_lrelu],
625 |                 name=layer_name_lrelu,
626 |                 alpha=self.alpha_lrelu
627 |             )
628 |             self._nodes.append(lrelu_node)
629 |             inputs = [layer_name_lrelu]
630 |             layer_name_output = layer_name_lrelu
631 |         elif layer_dict['activation'] == 'mish':
632 |             layer_name_softplus = layer_name + '_softplus'
633 |             layer_name_tanh = layer_name + '_tanh'
634 |             layer_name_mish = layer_name + '_mish'
635 | 
636 |             softplus_node = helper.make_node(
637 |                 'Softplus',
638 |                 inputs=inputs,
639 |                 outputs=[layer_name_softplus],
640 |                 name=layer_name_softplus,
641 |             )
642 |             self._nodes.append(softplus_node)
643 |             tanh_node = helper.make_node(
644 |                 'Tanh',
645 |                 inputs=[layer_name_softplus],
646 |                 outputs=[layer_name_tanh],
647 |                 name=layer_name_tanh,
648 |             )
649 |             self._nodes.append(tanh_node)
650 | 
651 |             inputs.append(layer_name_tanh)
652 |             mish_node = helper.make_node(
653 |                 'Mul',
654 |                 inputs=inputs,
655 |                 outputs=[layer_name_mish],
656 |                 name=layer_name_mish,
657 |             )
658 |             self._nodes.append(mish_node)
659 | 
660 |             inputs = [layer_name_mish]
661 |             layer_name_output = layer_name_mish
662 |         elif layer_dict['activation'] == 'linear':
663 |             pass
664 |         else:
665 |             print('Activation not supported.')
666 | 
667 |         self.param_dict[layer_name] = conv_params
668 |         return layer_name_output, filters
669 | 
670 |     def _make_shortcut_node(self, layer_name, layer_dict):
671 |         """Create an ONNX Add node with the shortcut properties from
672 |         the DarkNet-based graph.
673 | 
674 |         Keyword arguments:
675 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
676 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
677 |         """
678 |         shortcut_index = layer_dict['from']
679 |         activation = layer_dict['activation']
680 |         assert activation == 'linear'
681 | 
682 |         first_node_specs = self._get_previous_node_specs()
683 |         second_node_specs = self._get_previous_node_specs(
684 |             target_index=shortcut_index)
685 |         assert first_node_specs.channels == second_node_specs.channels
686 |         channels = first_node_specs.channels
687 |         inputs = [first_node_specs.name, second_node_specs.name]
688 |         shortcut_node = helper.make_node(
689 |             'Add',
690 |             inputs=inputs,
691 |             outputs=[layer_name],
692 |             name=layer_name,
693 |         )
694 |         self._nodes.append(shortcut_node)
695 |         return layer_name, channels
696 | 
697 |     def _make_route_node(self, layer_name, layer_dict):
698 |         """If the 'layers' parameter from the DarkNet configuration is only one index, continue
699 |         node creation at the indicated (negative) index. Otherwise, create an ONNX Concat node
700 |         with the route properties from the DarkNet-based graph.
701 | 
702 |         Keyword arguments:
703 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
704 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
705 |         """
706 |         route_node_indexes = layer_dict['layers']
707 |         if len(route_node_indexes) == 1:
708 |             if 'groups' in layer_dict.keys():
709 |                 # for CSPNet-kind of architecture
710 |                 assert 'group_id' in layer_dict.keys()
711 |                 groups = layer_dict['groups']
712 |                 group_id = int(layer_dict['group_id'])
713 |                 assert group_id < groups
714 |                 index = route_node_indexes[0]
715 |                 if index > 0:
716 |                     # +1 for input node (same reason as below)
717 |                     index += 1
718 |                 route_node_specs = self._get_previous_node_specs(
719 |                     target_index=index)
720 |                 assert route_node_specs.channels % groups == 0
721 |                 channels = route_node_specs.channels // groups
722 | 
723 |                 outputs = [layer_name + '_dummy%d' % i for i in range(groups)]
724 |                 outputs[group_id] = layer_name
725 |                 route_node = helper.make_node(
726 |                     'Split',
727 |                     axis=1,
728 |                     split=[channels] * groups,
729 |                     inputs=[route_node_specs.name],
730 |                     outputs=outputs,
731 |                     name=layer_name,
732 |                 )
733 |                 self._nodes.append(route_node)
734 |             else:
735 |                 if route_node_indexes[0] < 0:
736 |                     # route should skip self, thus -1
737 |                     self.route_spec = route_node_indexes[0] - 1
738 |                 elif route_node_indexes[0] > 0:
739 |                     # +1 for input node (same reason as below)
740 |                     self.route_spec = route_node_indexes[0] + 1
741 |                 # This dummy route node would be removed in the end.
742 |                 layer_name = layer_name + '_dummy'
743 |                 channels = 1
744 |         else:
745 |             assert 'groups' not in layer_dict.keys(), \
746 |                 'groups not implemented for multiple-input route layer!'
747 |             inputs = list()
748 |             channels = 0
749 |             for index in route_node_indexes:
750 |                 if index > 0:
751 |                     # Increment by one because we count the input as
752 |                     # a node (DarkNet does not)
753 |                     index += 1
754 |                 route_node_specs = self._get_previous_node_specs(
755 |                     target_index=index)
756 |                 inputs.append(route_node_specs.name)
757 |                 channels += route_node_specs.channels
758 |             assert inputs
759 |             assert channels > 0
760 | 
761 |             route_node = helper.make_node(
762 |                 'Concat',
763 |                 axis=1,
764 |                 inputs=inputs,
765 |                 outputs=[layer_name],
766 |                 name=layer_name,
767 |             )
768 |             self._nodes.append(route_node)
769 |         return layer_name, channels
770 | 
771 |     def _make_upsample_node(self, layer_name, layer_dict):
772 |         """Create an ONNX Upsample node with the properties from
773 |         the DarkNet-based graph.
774 | 
775 |         Keyword arguments:
776 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
777 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
778 |         """
779 |         upsample_factor = float(layer_dict['stride'])
780 |         # Create the scales array with node parameters
781 |         scales = np.array([1.0, 1.0, upsample_factor, upsample_factor]).astype(np.float32)
782 |         previous_node_specs = self._get_previous_node_specs()
783 |         inputs = [previous_node_specs.name]
784 | 
785 |         channels = previous_node_specs.channels
786 |         assert channels > 0
787 |         upsample_params = UpsampleParams(layer_name, scales)
788 |         scales_name = upsample_params.generate_param_name()
789 |         # For ONNX opset >= 9, the Upsample node takes the scales array
790 |         # as an input.
791 |         inputs.append(scales_name)
792 | 
793 |         upsample_node = helper.make_node(
794 |             'Upsample',
795 |             mode='nearest',
796 |             inputs=inputs,
797 |             outputs=[layer_name],
798 |             name=layer_name,
799 |         )
800 |         self._nodes.append(upsample_node)
801 |         self.param_dict[layer_name] = upsample_params
802 |         return layer_name, channels
803 | 
804 |     def _make_maxpool_node(self, layer_name, layer_dict):
805 |         """Create an ONNX Maxpool node with the properties from
806 |         the DarkNet-based graph.
807 | 
808 |         Keyword arguments:
809 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
810 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
811 |         """
812 |         stride = layer_dict['stride']
813 |         kernel_size = layer_dict['size']
814 |         previous_node_specs = self._get_previous_node_specs()
815 |         inputs = [previous_node_specs.name]
816 |         channels = previous_node_specs.channels
817 |         kernel_shape = [kernel_size, kernel_size]
818 |         strides = [stride, stride]
819 |         assert channels > 0
820 |         maxpool_node = helper.make_node(
821 |             'MaxPool',
822 |             inputs=inputs,
823 |             outputs=[layer_name],
824 |             kernel_shape=kernel_shape,
825 |             strides=strides,
826 |             auto_pad='SAME_UPPER',
827 |             name=layer_name,
828 |         )
829 |         self._nodes.append(maxpool_node)
830 |         return layer_name, channels
831 | 
832 |     def _make_yolo_node(self, layer_name, layer_dict):
833 |         """Create an ONNX Yolo node.
834 | 
835 |         These are dummy nodes which would be removed in the end.
836 |         """
837 |         channels = 1
838 |         return layer_name + '_dummy', channels
839 | 
840 | 
841 | def generate_md5_checksum(local_path):
842 |     """Returns the MD5 checksum of a local file.
843 | 
844 |     Keyword argument:
845 |     local_path -- path of the file whose checksum shall be generated
846 |     """
847 |     with open(local_path, 'rb') as local_file:
848 |         data = local_file.read()
849 |         return hashlib.md5(data).hexdigest()
850 | 
851 | 
852 | def main():
853 |     """Run the DarkNet-to-ONNX conversion for YOLO (v3 or v4)."""
854 |     if sys.version_info[0] < 3:
855 |         raise SystemExit('ERROR: This modified version of yolov3_to_onnx.py '
856 |                          'script is only compatible with python3...')
857 | 
858 |     parser = argparse.ArgumentParser()
859 |     parser.add_argument(
860 |         '-c', '--category_num', type=int, default=80,
861 |         help='number of object categories [80]')
862 |     parser.add_argument(
863 |         '-m', '--model', type=str, required=True,
864 |         help=('[yolov3|yolov3-tiny|yolov3-spp|yolov4|yolov4-tiny]-'
865 |               '[{dimension}], where dimension could be a single '
866 |               'number (e.g. 288, 416, 608) or WxH (e.g. 416x256)'))
867 |     args = parser.parse_args()
868 |     if args.category_num <= 0:
869 |         raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
870 | 
871 |     cfg_file_path = '%s.cfg' % args.model
872 |     if not os.path.isfile(cfg_file_path):
873 |         raise SystemExit('ERROR: file (%s) not found!' % cfg_file_path)
874 |     weights_file_path = '%s.weights' % args.model
875 |     if not os.path.isfile(weights_file_path):
876 |         raise SystemExit('ERROR: file (%s) not found!' % weights_file_path)
877 |     output_file_path = '%s.onnx' % args.model
878 | 
879 |     if not verify_classes(args.model, args.category_num):
880 |         raise SystemExit('ERROR: bad category_num (%d)' % args.category_num)
881 | 
882 |     # Derive yolo input width/height from model name.
883 |     # For example, "yolov4-416x256" -> width=416, height=256
884 |     w, h = get_input_wh(args.model)
885 | 
886 |     # These are the only layers DarkNetParser will extract parameters
887 |     # from.  The three layers of type 'yolo' are not parsed in detail
888 |     # because they are included in the post-processing later.
889 |     supported_layers = ['net', 'convolutional', 'maxpool',
890 |                         'shortcut', 'route', 'upsample', 'yolo']
891 | 
892 |     # Create a DarkNetParser object, and the use it to generate an
893 |     # OrderedDict with all layer's configs from the cfg file.
894 |     print('Parsing DarkNet cfg file...')
895 |     parser = DarkNetParser(supported_layers)
896 |     layer_configs = parser.parse_cfg_file(cfg_file_path)
897 | 
898 |     # We do not need the parser anymore after we got layer_configs.
899 |     del parser
900 | 
901 |     # In above layer_config, there are three outputs that we need to
902 |     # know the output shape of (in CHW format).
903 |     output_tensor_dims = OrderedDict()
904 |     c = (args.category_num + 5) * 3
905 |     if 'yolov3' in args.model:
906 |         if 'tiny' in args.model:
907 |             output_tensor_dims['016_convolutional'] = [c, h // 32, w // 32]
908 |             output_tensor_dims['023_convolutional'] = [c, h // 16, w // 16]
909 |         elif 'spp' in args.model:
910 |             output_tensor_dims['089_convolutional'] = [c, h // 32, w // 32]
911 |             output_tensor_dims['101_convolutional'] = [c, h // 16, w // 16]
912 |             output_tensor_dims['113_convolutional'] = [c, h //  8, w //  8]
913 |         else:
914 |             output_tensor_dims['082_convolutional'] = [c, h // 32, w // 32]
915 |             output_tensor_dims['094_convolutional'] = [c, h // 16, w // 16]
916 |             output_tensor_dims['106_convolutional'] = [c, h //  8, w //  8]
917 |     elif 'yolov4' in args.model:
918 |         if 'tiny' in args.model:
919 |             output_tensor_dims['030_convolutional'] = [c, h // 32, w // 32]
920 |             output_tensor_dims['037_convolutional'] = [c, h // 16, w // 16]
921 |         else:
922 |             output_tensor_dims['139_convolutional'] = [c, h //  8, w //  8]
923 |             output_tensor_dims['150_convolutional'] = [c, h // 16, w // 16]
924 |             output_tensor_dims['161_convolutional'] = [c, h // 32, w // 32]
925 |     else:
926 |         raise SystemExit('ERROR: unknown model (%s)!' % args.model)
927 | 
928 |     # Create a GraphBuilderONNX object with the specified output tensor
929 |     # dimensions.
930 |     print('Building ONNX graph...')
931 |     builder = GraphBuilderONNX(args.model, output_tensor_dims)
932 | 
933 |     # Now generate an ONNX graph with weights from the previously parsed
934 |     # layer configurations and the weights file.
935 |     yolo_model_def = builder.build_onnx_graph(
936 |         layer_configs=layer_configs,
937 |         weights_file_path=weights_file_path,
938 |         verbose=True)
939 | 
940 |     # Once we have the model definition, we do not need the builder anymore.
941 |     del builder
942 | 
943 |     # Perform a sanity check on the ONNX model definition.
944 |     print('Checking ONNX model...')
945 |     onnx.checker.check_model(yolo_model_def)
946 | 
947 |     # Serialize the generated ONNX graph to this file.
948 |     print('Saving ONNX file...')
949 |     onnx.save(yolo_model_def, output_file_path)
950 | 
951 |     print('Done.')
952 | 
953 | 
954 | if __name__ == '__main__':
955 |     main()
956 | 


--------------------------------------------------------------------------------