├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── dependencies
├── install_protobuf-3.8.0.sh
└── install_pycuda.sh
├── docs
├── results.gif
├── yolov4_custom_1.png
├── yolov4_custom_2.png
└── yolov4_custom_3.png
├── launch
├── video_source.launch
├── yolov3_trt.launch
├── yolov4_trt.launch
└── yolov4_trt_batch.launch
├── msg
├── BoundingBox2D.msg
├── Detector2D.msg
├── Detector2DArray.msg
└── ObjectHypothesis.msg
├── package.xml
├── plugins
├── Makefile
├── README.md
├── libyolo_layer.so
├── yolo_layer.cu
├── yolo_layer.h
└── yolo_layer.o
├── setup.py
├── src
├── image_converter.cpp
├── image_converter.h
├── node_video_source.cpp
├── ros_compat.cpp
└── ros_compat.h
├── trt_yolo_v3.py
├── trt_yolo_v4.py
├── trt_yolo_v4_batch.py
├── utils
├── __init__.py
├── display.py
├── visualization.py
├── yolo_classes.py
├── yolo_with_plugins.py
└── yolo_with_plugins_batch.py
└── yolo
├── __pycache__
└── plugins.cpython-36.pyc
├── convert_yolo_trt.sh
├── download_yolo.sh
├── onnx_to_tensorrt.py
├── plugins.py
├── requirements.txt
└── yolo_to_onnx.py
/.gitignore:
--------------------------------------------------------------------------------
1 | yolo/yolov4-416.trt
2 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.0.2)
2 | project(yolov4_trt_ros)
3 |
4 | find_package(OpenCV 3.0 REQUIRED)
5 | find_package(jetson-utils REQUIRED)
6 | find_package(jetson-inference REQUIRED)
7 | find_package(CUDA REQUIRED)
8 |
9 | catkin_python_setup()
10 |
11 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/package.xml ${CMAKE_CURRENT_SOURCE_DIR}/package.xml COPYONLY)
12 |
13 | find_package(catkin REQUIRED COMPONENTS
14 | image_transport
15 | rospy
16 | sensor_msgs
17 | vision_msgs
18 | cv_bridge
19 | message_generation
20 | )
21 |
22 | add_message_files(
23 | FILES
24 | Detector2D.msg
25 | Detector2DArray.msg
26 | BoundingBox2D.msg
27 | ObjectHypothesis.msg
28 | )
29 |
30 | generate_messages(DEPENDENCIES std_msgs vision_msgs)
31 |
32 | catkin_package(
33 | CATKIN_DEPENDS nodelet rospy image_transport sensor_msgs vision_msgs cv_bridge message_runtime
34 | )
35 |
36 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # enable c++11 (TensorRT requirement)
37 | include_directories(${catkin_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS})
38 | add_definitions(-DROS1)
39 |
40 | set(common_src src/image_converter.cpp src/ros_compat.cpp)
41 |
42 | add_executable(video_source src/node_video_source.cpp ${common_src})
43 | target_link_libraries(video_source ${catkin_LIBRARIES} jetson-inference)
44 |
45 | catkin_install_python(PROGRAMS trt_yolo_v4_batch.py
46 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
47 | )
48 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Indra Kurniawan
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YOLOv4 with TensorRT engine
2 |
3 | This package contains the yolov4_trt_node that performs the inference using NVIDIA's TensorRT engine
4 |
5 | This package works for both YOLOv3 and YOLOv4. Do change the commands accordingly, corresponding to the YOLO model used.
6 |
7 |
8 | 
9 |
10 | ---
11 | ## Setting up the environment
12 |
13 | ### Install dependencies
14 |
15 | ### Current Environment:
16 |
17 | - Jetson Xavier AGX
18 | - ROS Melodic
19 | - Ubuntu 18.04
20 | - Jetpack 4.4
21 | - TensorRT 7+
22 |
23 | #### Dependencies:
24 |
25 | - OpenCV 3.x
26 | - numpy 1.15.1
27 | - Protobuf 3.8.0
28 | - Pycuda 2019.1.2
29 | - onnx 1.4.1 (depends on Protobuf)
30 |
31 | ### Install all dependencies with below commands
32 |
33 | ```
34 | Install pycuda (takes awhile)
35 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/dependencies
36 | $ ./install_pycuda.sh
37 |
38 | Install Protobuf (takes awhile)
39 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/dependencies
40 | $ ./install_protobuf-3.8.0.sh
41 |
42 | Install onnx (depends on Protobuf above)
43 | $ sudo pip3 install onnx==1.4.1
44 | ```
45 |
46 | * Please also install [jetson-inference](https://github.com/dusty-nv/ros_deep_learning#jetson-inference)
47 | * Note: This package uses similar nodes to ros_deep_learning package. Please place a CATKIN_IGNORE in that package to avoid similar node name catkin_make error
48 | * If these scripts do not work for you, do refer to this amazing repository by [jefflgaol](https://github.com/jefflgaol/Install-Packages-Jetson-ARM-Family) on installing the above packages and more on Jetson ARM devices.
49 | ---
50 | ## Setting up the package
51 |
52 | ### 1. Clone project into catkin_ws and build it
53 |
54 | ```
55 | $ cd ~/catkin_ws && catkin_make
56 | $ source devel/setup.bash
57 | ```
58 |
59 | ### 2. Make libyolo_layer.so
60 |
61 | ```
62 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/plugins
63 | $ make
64 | ```
65 |
66 | This will generate a libyolo_layer.so file
67 |
68 | ### 3. Place your yolo.weights and yolo.cfg file in the yolo folder
69 |
70 | ```
71 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/yolo
72 | ```
73 | ** Please name the yolov4.weights and yolov4.cfg file as follows:
74 | - yolov4.weights
75 | - yolov4.cfg
76 |
77 | Run the conversion script to convert to TensorRT engine file
78 |
79 | ```
80 | $ ./convert_yolo_trt
81 | ```
82 |
83 | - Input the appropriate arguments
84 | - This conversion might take awhile
85 | - The optimised TensorRT engine would now be saved as yolov3-416.trt / yolov4-416.trt
86 |
87 | ### 4. Change the class labels
88 |
89 | ```
90 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/utils
91 | $ vim yolo_classes.py
92 | ```
93 |
94 | - Change the class labels to suit your model
95 |
96 | ### 5. Change the video_input and topic_name
97 |
98 | ```
99 | $ cd ${HOME}/catkin_ws/src/yolov4_trt_ros/launch
100 | ```
101 |
102 | - `yolov3_trt.launch` : change the topic_name
103 |
104 | - `yolov4_trt.launch` : change the topic_name
105 |
106 | - `video_source.launch` : change the input format (refer to this [Link](https://github.com/dusty-nv/jetson-inference/blob/master/docs/aux-streaming.md)
107 |
108 | * video_source.launch requires jetson-inference to be installed
109 | * Default input is CSI camera
110 |
111 | ---
112 | ## Using the package
113 |
114 | ### Running the package
115 |
116 | Note: Run the launch files separately in different terminals
117 |
118 | ### 1. Run the video_source
119 |
120 | ```
121 | # For csi input
122 | $ roslaunch yolov4_trt_ros video_source.launch input:=csi://0
123 |
124 | # For video input
125 | $ roslaunch yolov4_trt_ros video_source.launch input:=/path_to_video/video.mp4
126 |
127 | # For USB camera
128 | $ roslaunch yolov4_trt_ros video_source.launch input:=v4l2://0
129 | ```
130 |
131 | ### 2. Run the yolo detector
132 |
133 | ```
134 | # For YOLOv3 (single input)
135 | $ roslaunch yolov4_trt_ros yolov3_trt.launch
136 |
137 | # For YOLOv4 (single input)
138 | $ roslaunch yolov4_trt_ros yolov4_trt.launch
139 |
140 | # For YOLOv4 (multiple input)
141 | $ roslaunch yolov4_trt_ros yolov4_trt_batch.launch
142 | ```
143 |
144 | ### 3. For maximum performance
145 |
146 | ```
147 | $ cd /usr/bin/
148 | $ sudo ./nvpmodel -m 0 # Enable 2 Denver CPU
149 | $ sudo ./jetson_clock # Maximise CPU/GPU performance
150 | ```
151 |
152 | * These commands are found/referred in this [forum post](https://forums.developer.nvidia.com/t/nvpmodel-and-jetson-clocks/58659/2)
153 | * Please ensure the jetson device is cooled appropriately to prevent overheating
154 |
155 | ### Parameters
156 |
157 | - str model = "yolov3" or "yolov4"
158 | - str model_path = "/abs_path_to_model/"
159 | - int input_shape = 288/416/608
160 | - int category_num = 80
161 | - double conf_th = 0.5
162 | - bool show_img = True
163 |
164 | - Default Input FPS from CSI camera = 30.0
165 | * To change this, go to jetson-inference/utils/camera/gstCamera.cpp
166 |
167 | ```
168 | # In line 359, change this line
169 | mOptions.frameRate = 15
170 |
171 | # To desired frame_rate
172 | mOptions.frameRate = desired_frame_rate
173 | ```
174 | ---
175 | ## Results obtained
176 |
177 | ### Inference Results
178 | #### Single Camera Input
179 |
180 | | Model | Hardware | FPS | Inference Time (ms) |
181 | |:---------|:--:|:---------:|:----------------:|
182 | | Yolov4-416| Xavier AGX | 40.0 | 0.025 |
183 | | Yolov4-416| Jetson Tx2 | 16.0 | 0.0625 |
184 |
185 |
186 | * Will be adding inference tests for YOLOv3/4 and YOLOv3-tiny/YOLOv4-tiny for different Jetson devices and multiple camera inputs inference tests in the future
187 |
188 | ### 1. Screenshots
189 |
190 | ### Video_in: .mp4 video (640x360)
191 | ### Tests Done on Xavier AGX
192 | ### Avg FPS : ~38 FPS
193 |
194 | 
195 |
196 | 
197 |
198 | 
199 |
200 | ---
201 | ## Licenses and References
202 |
203 | ### 1. TensorRT samples from [jkjung-avt](https://github.com/jkjung-avt/)
204 |
205 | Many thanks for his project with tensorrt samples. I have referenced his source code and adapted it to ROS for robotics applications.
206 |
207 | I also used the pycuda and protobuf installation script from his project
208 |
209 | Those code are under [MIT License](https://github.com/jkjung-avt/tensorrt_demos/blob/master/LICENSE)
210 |
211 | ### 2. Jetson-inference from [dusty-nv](https://github.com/dusty-nv/ros_deep_learning#jetson-inference)
212 |
213 | Many thanks for his work on the Jetson Inference with ROS. I have used his video_source input from his project for capturing video inputs.
214 |
215 | Those code are under [NVIDIA License](https://github.com/dusty-nv/jetson-inference/blob/master/LICENSE.md)
--------------------------------------------------------------------------------
/dependencies/install_protobuf-3.8.0.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | folder=${HOME}/src
6 | mkdir -p $folder
7 |
8 | echo "** Install requirements"
9 | sudo apt-get install -y autoconf libtool
10 |
11 | echo "** Download protobuf-3.8.0 sources"
12 | cd $folder
13 | if [ ! -f protobuf-python-3.8.0.zip ]; then
14 | wget https://github.com/protocolbuffers/protobuf/releases/download/v3.8.0/protobuf-python-3.8.0.zip
15 | fi
16 | if [ ! -f protoc-3.8.0-linux-aarch_64.zip ]; then
17 | wget https://github.com/protocolbuffers/protobuf/releases/download/v3.8.0/protoc-3.8.0-linux-aarch_64.zip
18 | fi
19 |
20 | echo "** Install protoc"
21 | unzip protobuf-python-3.8.0.zip
22 | unzip protoc-3.8.0-linux-aarch_64.zip -d protoc-3.8.0
23 | sudo cp protoc-3.8.0/bin/protoc /usr/local/bin/protoc
24 |
25 | echo "** Build and install protobuf-3.8.0 libraries"
26 | export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp
27 | cd protobuf-3.8.0/
28 | ./autogen.sh
29 | ./configure --prefix=/usr/local
30 | make -j$(nproc)
31 | make check
32 | sudo make install
33 | sudo ldconfig
34 |
35 | echo "** Update python3 protobuf module"
36 | # remove previous installation of python3 protobuf module
37 | sudo pip3 uninstall -y protobuf
38 | sudo pip3 install Cython
39 | cd python/
40 | python3 setup.py build --cpp_implementation
41 | python3 setup.py test --cpp_implementation
42 | sudo python3 setup.py install --cpp_implementation
43 |
44 | echo "** Build protobuf-3.8.0 successfully"
45 |
--------------------------------------------------------------------------------
/dependencies/install_pycuda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Reference for installing 'pycuda': https://wiki.tiker.net/PyCuda/Installation/Linux/Ubuntu
4 |
5 | set -e
6 |
7 | if ! which nvcc > /dev/null; then
8 | echo "ERROR: nvcc not found"
9 | exit
10 | fi
11 |
12 | arch=$(uname -m)
13 | folder=${HOME}/src
14 | mkdir -p $folder
15 |
16 | echo "** Install requirements"
17 | sudo apt-get install -y build-essential python3-dev
18 | sudo apt-get install -y libboost-python-dev libboost-thread-dev
19 | sudo pip3 install setuptools
20 |
21 | boost_pylib=$(basename /usr/lib/${arch}-linux-gnu/libboost_python*-py3?.so)
22 | boost_pylibname=${boost_pylib%.so}
23 | boost_pyname=${boost_pylibname/lib/}
24 |
25 | echo "** Download pycuda-2019.1.2 sources"
26 | pushd $folder
27 | if [ ! -f pycuda-2019.1.2.tar.gz ]; then
28 | wget https://files.pythonhosted.org/packages/5e/3f/5658c38579b41866ba21ee1b5020b8225cec86fe717e4b1c5c972de0a33c/pycuda-2019.1.2.tar.gz
29 | fi
30 |
31 | echo "** Build and install pycuda-2019.1.2"
32 | CPU_CORES=$(nproc)
33 | echo "** cpu cores available: " $CPU_CORES
34 | tar xzvf pycuda-2019.1.2.tar.gz
35 | cd pycuda-2019.1.2
36 | python3 ./configure.py --python-exe=/usr/bin/python3 --cuda-root=/usr/local/cuda --cudadrv-lib-dir=/usr/lib/${arch}-linux-gnu --boost-inc-dir=/usr/include --boost-lib-dir=/usr/lib/${arch}-linux-gnu --boost-python-libname=${boost_pyname} --boost-thread-libname=boost_thread --no-use-shipped-boost
37 | make -j$CPU_CORES
38 | python3 setup.py build
39 | sudo python3 setup.py install
40 |
41 | popd
42 |
43 | python3 -c "import pycuda; print('pycuda version:', pycuda.VERSION)"
44 |
--------------------------------------------------------------------------------
/docs/results.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/results.gif
--------------------------------------------------------------------------------
/docs/yolov4_custom_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_1.png
--------------------------------------------------------------------------------
/docs/yolov4_custom_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_2.png
--------------------------------------------------------------------------------
/docs/yolov4_custom_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/docs/yolov4_custom_3.png
--------------------------------------------------------------------------------
/launch/video_source.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/launch/yolov3_trt.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/launch/yolov4_trt.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/launch/yolov4_trt_batch.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/msg/BoundingBox2D.msg:
--------------------------------------------------------------------------------
1 | # A 2D bounding box that can be rotated about its center.
2 | # All dimensions are in pixels, but represented using floating-point
3 | # values to allow sub-pixel precision. If an exact pixel crop is required
4 | # for a rotated bounding box, it can be calculated using Bresenham's line
5 | # algorithm.
6 |
7 | # The 2D position (in pixels) and orientation of the bounding box center.
8 | geometry_msgs/Pose2D center
9 |
10 | # The size (in pixels) of the bounding box surrounding the object relative
11 | # to the pose of its center.
12 | float64 size_x
13 | float64 size_y
14 |
--------------------------------------------------------------------------------
/msg/Detector2D.msg:
--------------------------------------------------------------------------------
1 | # Defines a 2D detection result.
2 | #
3 | # This is similar to a 2D classification, but includes position information,
4 | # allowing a classification result for a specific crop or image point to
5 | # to be located in the larger image.
6 |
7 | Header header
8 |
9 | # Class probabilities
10 | ObjectHypothesis results
11 |
12 | # 2D bounding box surrounding the object.
13 | BoundingBox2D bbox
14 |
--------------------------------------------------------------------------------
/msg/Detector2DArray.msg:
--------------------------------------------------------------------------------
1 | # A list of 2D detections, for a multi-object 2D detector.
2 |
3 | Header header
4 |
5 | # A list of the detected proposals. A multi-proposal detector might generate
6 | # this list with many candidate detections generated from a single input.
7 | Detector2D[] detections
8 |
--------------------------------------------------------------------------------
/msg/ObjectHypothesis.msg:
--------------------------------------------------------------------------------
1 | # An object hypothesis that contains no position information.
2 |
3 | # The unique numeric ID of object detected. To get additional information about
4 | # this ID, such as its human-readable name, listeners should perform a lookup
5 | # in a metadata database. See vision_msgs/VisionInfo.msg for more detail.
6 | int64 id
7 |
8 | # The probability or confidence value of the detected object. By convention,
9 | # this value should lie in the range [0-1].
10 | float64 score
11 |
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | yolov4_trt_ros
4 | 0.0.0
5 | The yolov4_trt_ros package
6 |
7 | indra
8 |
9 | BSD
10 | catkin
11 |
12 | cv_bridge
13 | image_transport
14 | rospy
15 | sensor_msgs
16 | vision_msgs
17 | nodelet
18 | message_generation
19 |
20 | cv_bridge
21 | image_transport
22 | rospy
23 | sensor_msgs
24 | vision_msgs
25 | nodelet
26 |
27 | cv_bridge
28 | image_transport
29 | rospy
30 | sensor_msgs
31 | vision_msgs
32 | nodelet
33 | message_runtime
34 |
35 |
--------------------------------------------------------------------------------
/plugins/Makefile:
--------------------------------------------------------------------------------
1 | CC=g++
2 | LD=ld
3 | CXXFLAGS=-Wall -std=c++11 -g -O
4 |
5 | NVCC=nvcc
6 |
7 | cpu_arch=$(shell uname -m)
8 | ifeq ($(cpu_arch), aarch64) # Jetson
9 | chip_id=$(shell cat /sys/module/tegra_fuse/parameters/tegra_chip_id)
10 | ifeq ($(chip_id), 33) # Nano and TX1
11 | compute=53
12 | else ifeq ($(chip_id), 24) # TX2
13 | compute=62
14 | else ifeq ($(chip_id), 25) # Xavier NX
15 | compute=72
16 | else
17 | $(error Cannot determine cuda compute automatically, so please modify Makefile manually)
18 | endif
19 | else ifeq ($(cpu_arch), x86_64) # x86_64 PC
20 | $(warning "compute=75" is for GeForce RTX-2080 Ti. Please make sure CUDA compute is set correctly for your system in the Makefile.)
21 | compute=75
22 | else
23 | $(error Unkown CPU architecture: neither "aarch" nor "x86_64")
24 | endif
25 |
26 | NVCCFLAGS=-m64 -gencode arch=compute_$(compute),code=sm_$(compute) \
27 | -gencode arch=compute_$(compute),code=compute_$(compute)
28 |
29 | # These are the directories where I installed TensorRT on my x86_64 PC.
30 | TENSORRT_INCS=-I"/usr/local/TensorRT-7.1.3.4/include"
31 | TENSORRT_LIBS=-L"/usr/local/TensorRT-7.1.3.4/lib"
32 |
33 | # INCS and LIBS
34 | INCS=-I"/usr/local/cuda/include" $(TENSORRT_INCS) -I"/usr/local/include" -I"plugin"
35 | LIBS=-L"/usr/local/cuda/lib64" $(TENSORRT_LIBS) -L"/usr/local/lib" -Wl,--start-group -lnvinfer -lnvparsers -lnvinfer_plugin -lcudnn -lcublas -lcudart_static -lnvToolsExt -lcudart -lrt -ldl -lpthread -Wl,--end-group
36 |
37 | .PHONY: all clean
38 |
39 | all: libyolo_layer.so
40 |
41 | clean:
42 | rm -f *.so *.o
43 |
44 | libyolo_layer.so: yolo_layer.o
45 | $(CC) -shared -o $@ $< $(LIBS)
46 |
47 | yolo_layer.o: yolo_layer.cu yolo_layer.h
48 | $(NVCC) -ccbin $(CC) $(INCS) $(NVCCFLAGS) -Xcompiler -fPIC -c -o $@ $<
49 |
--------------------------------------------------------------------------------
/plugins/README.md:
--------------------------------------------------------------------------------
1 | The "yolo_layer.h" and "yolo_layer.cu" were taken and modified from [wang-xinyu/tensorrtx/yolov4](https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4). The original code is under [MIT License](https://github.com/wang-xinyu/tensorrtx/blob/master/LICENSE).
2 |
--------------------------------------------------------------------------------
/plugins/libyolo_layer.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/plugins/libyolo_layer.so
--------------------------------------------------------------------------------
/plugins/yolo_layer.cu:
--------------------------------------------------------------------------------
1 | /*
2 | * yolo_layer.cu
3 | *
4 | * This code was originally written by wang-xinyu under MIT license.
5 | * I took it from:
6 | *
7 | * https://github.com/wang-xinyu/tensorrtx/tree/master/yolov4
8 | *
9 | * and made necessary modifications.
10 | *
11 | * - JK Jung
12 | */
13 |
14 | #include "yolo_layer.h"
15 |
16 | using namespace Yolo;
17 |
18 | namespace
19 | {
20 | // Write values into buffer
21 | template
22 | void write(char*& buffer, const T& val)
23 | {
24 | *reinterpret_cast(buffer) = val;
25 | buffer += sizeof(T);
26 | }
27 |
28 | // Read values from buffer
29 | template
30 | void read(const char*& buffer, T& val)
31 | {
32 | val = *reinterpret_cast(buffer);
33 | buffer += sizeof(T);
34 | }
35 | } // namespace
36 |
37 | namespace nvinfer1
38 | {
39 | YoloLayerPlugin::YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y)
40 | {
41 | mYoloWidth = yolo_width;
42 | mYoloHeight = yolo_height;
43 | mNumAnchors = num_anchors;
44 | memcpy(mAnchorsHost, anchors, num_anchors * 2 * sizeof(float));
45 | mNumClasses = num_classes;
46 | mInputWidth = input_width;
47 | mInputHeight = input_height;
48 | mScaleXY = scale_x_y;
49 |
50 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float)));
51 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice));
52 | }
53 |
54 | YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
55 | {
56 | const char *d = reinterpret_cast(data), *a = d;
57 | read(d, mThreadCount);
58 | read(d, mYoloWidth);
59 | read(d, mYoloHeight);
60 | read(d, mNumAnchors);
61 | memcpy(mAnchorsHost, d, MAX_ANCHORS * 2 * sizeof(float));
62 | d += MAX_ANCHORS * 2 * sizeof(float);
63 | read(d, mNumClasses);
64 | read(d, mInputWidth);
65 | read(d, mInputHeight);
66 | read(d, mScaleXY);
67 |
68 | CHECK(cudaMalloc(&mAnchors, MAX_ANCHORS * 2 * sizeof(float)));
69 | CHECK(cudaMemcpy(mAnchors, mAnchorsHost, mNumAnchors * 2 * sizeof(float), cudaMemcpyHostToDevice));
70 |
71 | assert(d == a + length);
72 | }
73 |
74 | void YoloLayerPlugin::serialize(void* buffer) const
75 | {
76 | char* d = static_cast(buffer), *a = d;
77 | write(d, mThreadCount);
78 | write(d, mYoloWidth);
79 | write(d, mYoloHeight);
80 | write(d, mNumAnchors);
81 | memcpy(d, mAnchorsHost, MAX_ANCHORS * 2 * sizeof(float));
82 | d += MAX_ANCHORS * 2 * sizeof(float);
83 | write(d, mNumClasses);
84 | write(d, mInputWidth);
85 | write(d, mInputHeight);
86 | write(d, mScaleXY);
87 |
88 | assert(d == a + getSerializationSize());
89 | }
90 |
91 | size_t YoloLayerPlugin::getSerializationSize() const
92 | {
93 | return sizeof(mThreadCount) + \
94 | sizeof(mYoloWidth) + sizeof(mYoloHeight) + \
95 | sizeof(mNumAnchors) + MAX_ANCHORS * 2 * sizeof(float) + \
96 | sizeof(mNumClasses) + \
97 | sizeof(mInputWidth) + sizeof(mInputHeight) + \
98 | sizeof(mScaleXY);
99 | }
100 |
101 | int YoloLayerPlugin::initialize()
102 | {
103 | return 0;
104 | }
105 |
106 | void YoloLayerPlugin::terminate()
107 | {
108 | CHECK(cudaFree(mAnchors));
109 | }
110 |
111 | Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
112 | {
113 | //output the result to channel
114 | int totalsize = mYoloWidth * mYoloHeight * mNumAnchors * sizeof(Detection) / sizeof(float);
115 | return Dims3(totalsize, 1, 1);
116 | }
117 |
118 | void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
119 | {
120 | mPluginNamespace = pluginNamespace;
121 | }
122 |
123 | const char* YoloLayerPlugin::getPluginNamespace() const
124 | {
125 | return mPluginNamespace;
126 | }
127 |
128 | // Return the DataType of the plugin output at the requested index
129 | DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
130 | {
131 | return DataType::kFLOAT;
132 | }
133 |
134 | // Return true if output tensor is broadcast across a batch.
135 | bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
136 | {
137 | return false;
138 | }
139 |
140 | // Return true if plugin can use input that is broadcast across batch without replication.
141 | bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
142 | {
143 | return false;
144 | }
145 |
146 | void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
147 | {
148 | }
149 |
150 | // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
151 | void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
152 | {
153 | }
154 |
155 | // Detach the plugin object from its execution context.
156 | void YoloLayerPlugin::detachFromContext()
157 | {
158 | }
159 |
160 | const char* YoloLayerPlugin::getPluginType() const
161 | {
162 | return "YoloLayer_TRT";
163 | }
164 |
165 | const char* YoloLayerPlugin::getPluginVersion() const
166 | {
167 | return "1";
168 | }
169 |
170 | void YoloLayerPlugin::destroy()
171 | {
172 | delete this;
173 | }
174 |
175 | // Clone the plugin
176 | IPluginV2IOExt* YoloLayerPlugin::clone() const
177 | {
178 | YoloLayerPlugin *p = new YoloLayerPlugin(mYoloWidth, mYoloHeight, mNumAnchors, (float*) mAnchorsHost, mNumClasses, mInputWidth, mInputHeight, mScaleXY);
179 | p->setPluginNamespace(mPluginNamespace);
180 | return p;
181 | }
182 |
183 | inline __device__ float sigmoidGPU(float x) { return 1.0f / (1.0f + __expf(-x)); }
184 |
185 | inline __device__ float scale_sigmoidGPU(float x, float scale)
186 | {
187 | return scale * sigmoidGPU(x) - (scale - 1.0f) * 0.5f;
188 | }
189 |
190 | // CalDetection(): This kernel processes 1 yolo layer calculation. It
191 | // distributes calculations so that 1 GPU thread would be responsible
192 | // for each grid/anchor combination.
193 | // NOTE: The output (x, y, w, h) are between 0.0 and 1.0
194 | // (relative to orginal image width and height).
195 | __global__ void CalDetection(const float *input, float *output, int yolo_width, int yolo_height, int num_anchors,
196 | const float *anchors, int num_classes, int input_w, int input_h, float scale_x_y)
197 | {
198 | int idx = threadIdx.x + blockDim.x * blockIdx.x;
199 | Detection* det = ((Detection*) output) + idx;
200 | int total_grids = yolo_width * yolo_height;
201 | if (idx >= total_grids * num_anchors) return;
202 |
203 | int anchor_idx = idx / total_grids;
204 | idx = idx - total_grids * anchor_idx;
205 | int info_len = 5 + num_classes;
206 | const float* cur_input = input + anchor_idx * (info_len * total_grids);
207 |
208 | int class_id;
209 | float max_cls_logit = -CUDART_INF_F; // minus infinity
210 | for (int i = 5; i < info_len; ++i) {
211 | float l = cur_input[idx + i * total_grids];
212 | if (l > max_cls_logit) {
213 | max_cls_logit = l;
214 | class_id = i - 5;
215 | }
216 | }
217 | float max_cls_prob = sigmoidGPU(max_cls_logit);
218 | float box_prob = sigmoidGPU(cur_input[idx + 4 * total_grids]);
219 | //if (max_cls_prob < IGNORE_THRESH || box_prob < IGNORE_THRESH)
220 | // return;
221 |
222 | int row = idx / yolo_width;
223 | int col = idx % yolo_width;
224 |
225 | det->bbox[0] = (col + scale_sigmoidGPU(cur_input[idx + 0 * total_grids], scale_x_y)) / yolo_width; // [0, 1]
226 | det->bbox[1] = (row + scale_sigmoidGPU(cur_input[idx + 1 * total_grids], scale_x_y)) / yolo_height; // [0, 1]
227 | det->bbox[2] = __expf(cur_input[idx + 2 * total_grids]) * anchors[2 * anchor_idx] / input_w; // [0, 1]
228 | det->bbox[3] = __expf(cur_input[idx + 3 * total_grids]) * anchors[2 * anchor_idx + 1] / input_h; // [0, 1]
229 |
230 | det->bbox[0] -= det->bbox[2] / 2; // shift from center to top-left
231 | det->bbox[1] -= det->bbox[3] / 2;
232 |
233 | det->det_confidence = box_prob;
234 | det->class_id = class_id;
235 | det->class_confidence = max_cls_prob;
236 | }
237 |
238 | void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize)
239 | {
240 | int num_elements = batchSize * mNumAnchors * mYoloWidth * mYoloHeight;
241 |
242 | //CHECK(cudaMemset(output, 0, num_elements * sizeof(Detection)));
243 |
244 | CalDetection<<<(num_elements + mThreadCount - 1) / mThreadCount, mThreadCount>>>
245 | (inputs[0], output, mYoloWidth, mYoloHeight, mNumAnchors, (const float*) mAnchors, mNumClasses, mInputWidth, mInputHeight, mScaleXY);
246 | }
247 |
248 | int YoloLayerPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)
249 | {
250 | forwardGpu((const float* const*)inputs, (float*)outputs[0], stream, batchSize);
251 | return 0;
252 | }
253 |
254 | YoloPluginCreator::YoloPluginCreator()
255 | {
256 | mPluginAttributes.clear();
257 |
258 | mFC.nbFields = mPluginAttributes.size();
259 | mFC.fields = mPluginAttributes.data();
260 | }
261 |
262 | const char* YoloPluginCreator::getPluginName() const
263 | {
264 | return "YoloLayer_TRT";
265 | }
266 |
267 | const char* YoloPluginCreator::getPluginVersion() const
268 | {
269 | return "1";
270 | }
271 |
272 | const PluginFieldCollection* YoloPluginCreator::getFieldNames()
273 | {
274 | return &mFC;
275 | }
276 |
277 | IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
278 | {
279 | assert(!strcmp(name, getPluginName()));
280 | const PluginField* fields = fc->fields;
281 | int yolo_width, yolo_height, num_anchors = 0;
282 | float anchors[MAX_ANCHORS * 2];
283 | int num_classes;
284 | int input_width, input_height;
285 | float scale_x_y = 1.0;
286 |
287 | for (int i = 0; i < fc->nbFields; ++i)
288 | {
289 | const char* attrName = fields[i].name;
290 | if (!strcmp(attrName, "yoloWidth"))
291 | {
292 | assert(fields[i].type == PluginFieldType::kINT32);
293 | yolo_width = *(static_cast(fields[i].data));
294 | }
295 | else if (!strcmp(attrName, "yoloHeight"))
296 | {
297 | assert(fields[i].type == PluginFieldType::kINT32);
298 | yolo_height = *(static_cast(fields[i].data));
299 | }
300 | else if (!strcmp(attrName, "numAnchors"))
301 | {
302 | assert(fields[i].type == PluginFieldType::kINT32);
303 | num_anchors = *(static_cast(fields[i].data));
304 | }
305 | else if (!strcmp(attrName, "numClasses"))
306 | {
307 | assert(fields[i].type == PluginFieldType::kINT32);
308 | num_classes = *(static_cast(fields[i].data));
309 | }
310 | else if (!strcmp(attrName, "inputWidth"))
311 | {
312 | assert(fields[i].type == PluginFieldType::kINT32);
313 | input_width = *(static_cast(fields[i].data));
314 | }
315 | else if (!strcmp(attrName, "inputHeight"))
316 | {
317 | assert(fields[i].type == PluginFieldType::kINT32);
318 | input_height = *(static_cast(fields[i].data));
319 | }
320 | else if (!strcmp(attrName, "anchors")){
321 | assert(num_anchors > 0 && num_anchors <= MAX_ANCHORS);
322 | assert(fields[i].type == PluginFieldType::kFLOAT32);
323 | memcpy(anchors, static_cast(fields[i].data), num_anchors * 2 * sizeof(float));
324 | }
325 | else if (!strcmp(attrName, "scaleXY"))
326 | {
327 | assert(fields[i].type == PluginFieldType::kFLOAT32);
328 | scale_x_y = *(static_cast(fields[i].data));
329 | }
330 | }
331 | assert(yolo_width > 0 && yolo_height > 0);
332 | assert(anchors[0] > 0.0f && anchors[1] > 0.0f);
333 | assert(num_classes > 0);
334 | assert(input_width > 0 && input_height > 0);
335 | assert(scale_x_y >= 1.0);
336 |
337 | YoloLayerPlugin* obj = new YoloLayerPlugin(yolo_width, yolo_height, num_anchors, anchors, num_classes, input_width, input_height, scale_x_y);
338 | obj->setPluginNamespace(mNamespace.c_str());
339 | return obj;
340 | }
341 |
342 | IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
343 | {
344 | YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
345 | obj->setPluginNamespace(mNamespace.c_str());
346 | return obj;
347 | }
348 |
349 | PluginFieldCollection YoloPluginCreator::mFC{};
350 | std::vector YoloPluginCreator::mPluginAttributes;
351 | REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
352 | } // namespace nvinfer1
353 |
--------------------------------------------------------------------------------
/plugins/yolo_layer.h:
--------------------------------------------------------------------------------
1 | #ifndef _YOLO_LAYER_H
2 | #define _YOLO_LAYER_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include "math_constants.h"
9 | #include "NvInfer.h"
10 |
11 | #define MAX_ANCHORS 6
12 |
13 | #define CHECK(status) \
14 | do { \
15 | auto ret = status; \
16 | if (ret != 0) { \
17 | std::cerr << "Cuda failure in file '" << __FILE__ \
18 | << "' line " << __LINE__ \
19 | << ": " << ret << std::endl; \
20 | abort(); \
21 | } \
22 | } while (0)
23 |
24 | namespace Yolo
25 | {
26 | static constexpr float IGNORE_THRESH = 0.01f;
27 |
28 | struct alignas(float) Detection {
29 | float bbox[4]; // x, y, w, h
30 | float det_confidence;
31 | float class_id;
32 | float class_confidence;
33 | };
34 | }
35 |
36 | namespace nvinfer1
37 | {
38 | class YoloLayerPlugin: public IPluginV2IOExt
39 | {
40 | public:
41 | YoloLayerPlugin(int yolo_width, int yolo_height, int num_anchors, float* anchors, int num_classes, int input_width, int input_height, float scale_x_y);
42 | YoloLayerPlugin(const void* data, size_t length);
43 |
44 | ~YoloLayerPlugin() override = default;
45 |
46 | int getNbOutputs() const override
47 | {
48 | return 1;
49 | }
50 |
51 | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
52 |
53 | int initialize() override;
54 |
55 | void terminate() override;
56 |
57 | virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
58 |
59 | virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
60 |
61 | virtual size_t getSerializationSize() const override;
62 |
63 | virtual void serialize(void* buffer) const override;
64 |
65 | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
66 | return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
67 | }
68 |
69 | const char* getPluginType() const override;
70 |
71 | const char* getPluginVersion() const override;
72 |
73 | void destroy() override;
74 |
75 | IPluginV2IOExt* clone() const override;
76 |
77 | void setPluginNamespace(const char* pluginNamespace) override;
78 |
79 | const char* getPluginNamespace() const override;
80 |
81 | DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
82 |
83 | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
84 |
85 | bool canBroadcastInputAcrossBatch(int inputIndex) const override;
86 |
87 | void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
88 |
89 | void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override TRTNOEXCEPT;
90 |
91 | void detachFromContext() override;
92 |
93 | private:
94 | void forwardGpu(const float* const* inputs, float* output, cudaStream_t stream, int batchSize = 1);
95 |
96 | int mThreadCount = 64;
97 | int mYoloWidth, mYoloHeight, mNumAnchors;
98 | float mAnchorsHost[MAX_ANCHORS * 2];
99 | float *mAnchors; // allocated on GPU
100 | int mNumClasses;
101 | int mInputWidth, mInputHeight;
102 | float mScaleXY;
103 |
104 | const char* mPluginNamespace;
105 | };
106 |
107 | class YoloPluginCreator : public IPluginCreator
108 | {
109 | public:
110 | YoloPluginCreator();
111 |
112 | ~YoloPluginCreator() override = default;
113 |
114 | const char* getPluginName() const override;
115 |
116 | const char* getPluginVersion() const override;
117 |
118 | const PluginFieldCollection* getFieldNames() override;
119 |
120 | IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
121 |
122 | IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
123 |
124 | void setPluginNamespace(const char* libNamespace) override
125 | {
126 | mNamespace = libNamespace;
127 | }
128 |
129 | const char* getPluginNamespace() const override
130 | {
131 | return mNamespace.c_str();
132 | }
133 |
134 | private:
135 | static PluginFieldCollection mFC;
136 | static std::vector mPluginAttributes;
137 | std::string mNamespace;
138 | };
139 | };
140 |
141 | #endif
142 |
--------------------------------------------------------------------------------
/plugins/yolo_layer.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/plugins/yolo_layer.o
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | ## ! DO NOT MANUALLY INVOKE THIS setup.py, USE CATKIN INSTEAD
2 |
3 | from distutils.core import setup
4 | from catkin_pkg.python_setup import generate_distutils_setup
5 |
6 | # fetch values from package.xml
7 | setup_args = generate_distutils_setup(
8 | packages=["yolov4_trt"], package_dir={"": "src"}
9 | )
10 |
11 | setup(**setup_args)
12 |
--------------------------------------------------------------------------------
/src/image_converter.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and associated documentation files (the "Software"),
6 | * to deal in the Software without restriction, including without limitation
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 | * and/or sell copies of the Software, and to permit persons to whom the
9 | * Software is furnished to do so, subject to the following conditions:
10 | *
11 | * The above copyright notice and this permission notice shall be included in
12 | * all copies or substantial portions of the Software.
13 | *
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | * DEALINGS IN THE SOFTWARE.
21 | */
22 |
23 | #include "image_converter.h"
24 |
25 | #include
26 | #include
27 |
28 |
29 |
30 | static imageFormat imageFormatFromEncoding( const std::string& encoding )
31 | {
32 | if( encoding == sensor_msgs::image_encodings::BGR8 )
33 | return IMAGE_BGR8;
34 | else if( encoding == sensor_msgs::image_encodings::BGRA8 )
35 | return IMAGE_BGRA8;
36 | else if( encoding == sensor_msgs::image_encodings::RGB8 )
37 | return IMAGE_RGB8;
38 | else if( encoding == sensor_msgs::image_encodings::RGBA8 )
39 | return IMAGE_RGBA8;
40 | else if( encoding == sensor_msgs::image_encodings::MONO8 )
41 | return IMAGE_GRAY8;
42 | else if( encoding == sensor_msgs::image_encodings::YUV422 )
43 | return IMAGE_UYVY;
44 | else if( encoding == sensor_msgs::image_encodings::BAYER_RGGB8 )
45 | return IMAGE_BAYER_RGGB;
46 | else if( encoding == sensor_msgs::image_encodings::BAYER_BGGR8 )
47 | return IMAGE_BAYER_BGGR;
48 | else if( encoding == sensor_msgs::image_encodings::BAYER_GBRG8 )
49 | return IMAGE_BAYER_GBRG;
50 | else if( encoding == sensor_msgs::image_encodings::BAYER_GRBG8 )
51 | return IMAGE_BAYER_GRBG;
52 |
53 | return IMAGE_UNKNOWN;
54 | }
55 |
56 | static std::string imageFormatToEncoding( imageFormat fmt )
57 | {
58 | switch(fmt)
59 | {
60 | case IMAGE_BGR8: return sensor_msgs::image_encodings::BGR8;
61 | case IMAGE_BGRA8: return sensor_msgs::image_encodings::BGRA8;
62 | case IMAGE_RGB8: return sensor_msgs::image_encodings::RGB8;
63 | case IMAGE_RGBA8: return sensor_msgs::image_encodings::RGBA8;
64 | case IMAGE_GRAY8: return sensor_msgs::image_encodings::MONO8;
65 | case IMAGE_UYVY: return sensor_msgs::image_encodings::YUV422;
66 | case IMAGE_BAYER_RGGB: return sensor_msgs::image_encodings::BAYER_RGGB8;
67 | case IMAGE_BAYER_BGGR: return sensor_msgs::image_encodings::BAYER_BGGR8;
68 | case IMAGE_BAYER_GBRG: return sensor_msgs::image_encodings::BAYER_GBRG8;
69 | case IMAGE_BAYER_GRBG: return sensor_msgs::image_encodings::BAYER_GRBG8;
70 | }
71 |
72 | return "invalid";
73 | }
74 |
75 |
76 | // constructor
77 | imageConverter::imageConverter()
78 | {
79 | mWidth = 0;
80 | mHeight = 0;
81 | mSizeInput = 0;
82 | mSizeOutput = 0;
83 |
84 | mInputCPU = NULL;
85 | mInputGPU = NULL;
86 |
87 | mOutputCPU = NULL;
88 | mOutputGPU = NULL;
89 | }
90 |
91 |
92 | // destructor
93 | imageConverter::~imageConverter()
94 | {
95 | Free();
96 | }
97 |
98 |
99 | // Free
100 | void imageConverter::Free()
101 | {
102 | if( mInputCPU != NULL )
103 | {
104 | CUDA(cudaFreeHost(mInputCPU));
105 |
106 | mInputCPU = NULL;
107 | mInputGPU = NULL;
108 | }
109 |
110 | if( mOutputCPU != NULL )
111 | {
112 | CUDA(cudaFreeHost(mOutputCPU));
113 |
114 | mOutputCPU = NULL;
115 | mOutputGPU = NULL;
116 | }
117 | }
118 |
119 | // Convert
120 | bool imageConverter::Convert( const sensor_msgs::ImageConstPtr& input )
121 | {
122 | ROS_DEBUG("converting %ux%u %s image", input->width, input->height, input->encoding.c_str());
123 |
124 | // parse the input format
125 | const imageFormat input_format = imageFormatFromEncoding(input->encoding);
126 |
127 | if( input_format == IMAGE_UNKNOWN )
128 | {
129 | ROS_ERROR("image encoding %s is not a compatible format to use with ros_deep_learning", input->encoding.c_str());
130 | return false;
131 | }
132 |
133 | // assure memory allocation
134 | if( !Resize(input->width, input->height, input_format) )
135 | return false;
136 |
137 | // copy input to shared memory
138 | memcpy(mInputCPU, input->data.data(), imageFormatSize(input_format, input->width, input->height));
139 |
140 | // convert image format
141 | if( CUDA_FAILED(cudaConvertColor(mInputGPU, input_format, mOutputGPU, InternalFormat, input->width, input->height)) )
142 | {
143 | ROS_ERROR("failed to convert %ux%u image (from %s to %s) with CUDA", mWidth, mHeight, imageFormatToStr(input_format), imageFormatToStr(InternalFormat));
144 | return false;
145 | }
146 |
147 | return true;
148 | }
149 |
150 |
151 | // Convert
152 | bool imageConverter::Convert( sensor_msgs::Image& msg, imageFormat format )
153 | {
154 | return Convert(msg, format, ImageGPU());
155 | }
156 |
157 |
158 | // Convert
159 | bool imageConverter::Convert( sensor_msgs::Image& msg, imageFormat format, PixelType* imageGPU )
160 | {
161 | if( !mInputCPU || !imageGPU || mWidth == 0 || mHeight == 0 || mSizeInput == 0 || mSizeOutput == 0 )
162 | return false;
163 |
164 | // perform colorspace conversion into the desired encoding
165 | // in this direction, we reverse use of input/output pointers
166 | if( CUDA_FAILED(cudaConvertColor(imageGPU, InternalFormat, mInputGPU, format, mWidth, mHeight)) )
167 | {
168 | ROS_ERROR("failed to convert %ux%u image (from %s to %s) with CUDA", mWidth, mHeight, imageFormatToStr(InternalFormat), imageFormatToStr(format));
169 | return false;
170 | }
171 |
172 | // calculate size of the msg
173 | const size_t msg_size = imageFormatSize(format, mWidth, mHeight);
174 |
175 | // allocate msg storage
176 | msg.data.resize(msg_size);
177 |
178 | // copy the converted image into the msg
179 | memcpy(msg.data.data(), mInputCPU, msg_size);
180 |
181 | // populate metadata
182 | msg.width = mWidth;
183 | msg.height = mHeight;
184 | msg.step = (mWidth * imageFormatDepth(format)) / 8;
185 |
186 | msg.encoding = imageFormatToEncoding(format);
187 | msg.is_bigendian = false;
188 |
189 | return true;
190 | }
191 |
192 |
193 | // Resize
194 | bool imageConverter::Resize( uint32_t width, uint32_t height, imageFormat inputFormat )
195 | {
196 | const size_t input_size = imageFormatSize(inputFormat, width, height);
197 | const size_t output_size = imageFormatSize(InternalFormat, width, height);
198 |
199 | if( input_size != mSizeInput || output_size != mSizeOutput || mWidth != width || mHeight != height )
200 | {
201 | Free();
202 |
203 | if( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputGPU, input_size) ||
204 | !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputGPU, output_size) )
205 | {
206 | ROS_ERROR("failed to allocate memory for %ux%u image conversion", width, height);
207 | return false;
208 | }
209 |
210 | ROS_INFO("allocated CUDA memory for %ux%u image conversion", width, height);
211 |
212 | mWidth = width;
213 | mHeight = height;
214 | mSizeInput = input_size;
215 | mSizeOutput = output_size;
216 | }
217 |
218 | return true;
219 | }
220 |
221 |
222 |
--------------------------------------------------------------------------------
/src/image_converter.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and associated documentation files (the "Software"),
6 | * to deal in the Software without restriction, including without limitation
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 | * and/or sell copies of the Software, and to permit persons to whom the
9 | * Software is furnished to do so, subject to the following conditions:
10 | *
11 | * The above copyright notice and this permission notice shall be included in
12 | * all copies or substantial portions of the Software.
13 | *
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | * DEALINGS IN THE SOFTWARE.
21 | */
22 |
23 | #ifndef __ROS_DEEP_LEARNING_IMAGE_CONVERTER_H_
24 | #define __ROS_DEEP_LEARNING_IMAGE_CONVERTER_H_
25 |
26 | #include
27 | #include
28 |
29 | #include "ros_compat.h"
30 |
31 |
32 | /**
33 | * GPU image conversion
34 | */
35 | class imageConverter
36 | {
37 | public:
38 | /**
39 | * Output image pixel type
40 | */
41 | typedef uchar3 PixelType;
42 |
43 | /**
44 | * Image format used for internal CUDA processing
45 | */
46 | static const imageFormat InternalFormat = IMAGE_RGB8;
47 |
48 | /**
49 | * Image format used for outputting ROS image messages
50 | */
51 | static const imageFormat ROSOutputFormat = IMAGE_BGR8;
52 |
53 | /**
54 | * Constructor
55 | */
56 | imageConverter();
57 |
58 | /**
59 | * Destructor
60 | */
61 | ~imageConverter();
62 |
63 | /**
64 | * Free the memory
65 | */
66 | void Free();
67 |
68 | /**
69 | * Convert to 32-bit RGBA floating point
70 | */
71 | bool Convert( const sensor_msgs::ImageConstPtr& input );
72 |
73 | /**
74 | * Convert to ROS sensor_msgs::Image message
75 | */
76 | bool Convert( sensor_msgs::Image& msg_out, imageFormat outputFormat );
77 |
78 | /**
79 | * Convert to ROS sensor_msgs::Image message
80 | */
81 | bool Convert( sensor_msgs::Image& msg_out, imageFormat outputFormat, PixelType* imageGPU );
82 |
83 | /**
84 | * Resize the memory (if necessary)
85 | */
86 | bool Resize( uint32_t width, uint32_t height, imageFormat inputFormat );
87 |
88 | /**
89 | * Retrieve the converted image width
90 | */
91 | inline uint32_t GetWidth() const { return mWidth; }
92 |
93 | /**
94 | * Retrieve the converted image height
95 | */
96 | inline uint32_t GetHeight() const { return mHeight; }
97 |
98 | /**
99 | * Retrieve the GPU pointer of the converted image
100 | */
101 | inline PixelType* ImageGPU() const { return mOutputGPU; }
102 |
103 | private:
104 |
105 | uint32_t mWidth;
106 | uint32_t mHeight;
107 | size_t mSizeInput;
108 | size_t mSizeOutput;
109 |
110 | void* mInputCPU;
111 | void* mInputGPU;
112 |
113 | PixelType* mOutputCPU;
114 | PixelType* mOutputGPU;
115 | };
116 |
117 | #endif
118 |
119 |
--------------------------------------------------------------------------------
/src/node_video_source.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and associated documentation files (the "Software"),
6 | * to deal in the Software without restriction, including without limitation
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 | * and/or sell copies of the Software, and to permit persons to whom the
9 | * Software is furnished to do so, subject to the following conditions:
10 | *
11 | * The above copyright notice and this permission notice shall be included in
12 | * all copies or substantial portions of the Software.
13 | *
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | * DEALINGS IN THE SOFTWARE.
21 | */
22 |
23 | #include "ros_compat.h"
24 | #include "image_converter.h"
25 |
26 | #include
27 |
28 |
29 |
30 | // globals
31 | videoSource* stream = NULL;
32 | imageConverter* image_cvt = NULL;
33 | Publisher image_pub = NULL;
34 |
35 |
36 | // aquire and publish camera frame
37 | bool aquireFrame()
38 | {
39 | imageConverter::PixelType* nextFrame = NULL;
40 |
41 | // get the latest frame
42 | if( !stream->Capture(&nextFrame, 1000) )
43 | {
44 | ROS_ERROR("failed to capture next frame");
45 | return false;
46 | }
47 |
48 | // assure correct image size
49 | if( !image_cvt->Resize(stream->GetWidth(), stream->GetHeight(), imageConverter::ROSOutputFormat) )
50 | {
51 | ROS_ERROR("failed to resize camera image converter");
52 | return false;
53 | }
54 |
55 | // populate the message
56 | sensor_msgs::Image msg;
57 |
58 | if( !image_cvt->Convert(msg, imageConverter::ROSOutputFormat, nextFrame) )
59 | {
60 | ROS_ERROR("failed to convert video stream frame to sensor_msgs::Image");
61 | return false;
62 | }
63 |
64 | // populate timestamp in header field
65 | msg.header.stamp = ROS_TIME_NOW();
66 |
67 | // publish the message
68 | image_pub->publish(msg);
69 | ROS_DEBUG("published %ux%u video frame", stream->GetWidth(), stream->GetHeight());
70 |
71 | return true;
72 | }
73 |
74 |
75 | // node main loop
76 | int main(int argc, char **argv)
77 | {
78 | /*
79 | * create node instance
80 | */
81 | ROS_CREATE_NODE("video_source");
82 |
83 | /*
84 | * declare parameters
85 | */
86 | videoOptions video_options;
87 |
88 | std::string resource_str;
89 | std::string codec_str;
90 |
91 | int video_width = video_options.width;
92 | int video_height = video_options.height;
93 |
94 | ROS_DECLARE_PARAMETER("resource", resource_str);
95 | ROS_DECLARE_PARAMETER("codec", codec_str);
96 | ROS_DECLARE_PARAMETER("width", video_width);
97 | ROS_DECLARE_PARAMETER("height", video_height);
98 | ROS_DECLARE_PARAMETER("framerate", video_options.frameRate);
99 | ROS_DECLARE_PARAMETER("loop", video_options.loop);
100 |
101 | /*
102 | * retrieve parameters
103 | */
104 | ROS_GET_PARAMETER("resource", resource_str);
105 | ROS_GET_PARAMETER("codec", codec_str);
106 | ROS_GET_PARAMETER("width", video_width);
107 | ROS_GET_PARAMETER("height", video_height);
108 | ROS_GET_PARAMETER("framerate", video_options.frameRate);
109 | ROS_GET_PARAMETER("loop", video_options.loop);
110 |
111 | if( resource_str.size() == 0 )
112 | {
113 | ROS_ERROR("resource param wasn't set - please set the node's resource parameter to the input device/filename/URL");
114 | return 0;
115 | }
116 |
117 | if( codec_str.size() != 0 )
118 | video_options.codec = videoOptions::CodecFromStr(codec_str.c_str());
119 |
120 | video_options.width = video_width;
121 | video_options.height = video_height;
122 |
123 | ROS_INFO("opening video source: %s", resource_str.c_str());
124 |
125 | /*
126 | * open video source
127 | */
128 | stream = videoSource::Create(resource_str.c_str(), video_options);
129 |
130 | if( !stream )
131 | {
132 | ROS_ERROR("failed to open video source");
133 | return 0;
134 | }
135 |
136 |
137 | /*
138 | * create image converter
139 | */
140 | image_cvt = new imageConverter();
141 |
142 | if( !image_cvt )
143 | {
144 | ROS_ERROR("failed to create imageConverter");
145 | return 0;
146 | }
147 |
148 |
149 | /*
150 | * advertise publisher topics
151 | */
152 | ROS_CREATE_PUBLISHER(sensor_msgs::Image, "raw", 2, image_pub);
153 |
154 |
155 | /*
156 | * start the camera streaming
157 | */
158 | if( !stream->Open() )
159 | {
160 | ROS_ERROR("failed to start streaming video source");
161 | return 0;
162 | }
163 |
164 |
165 | /*
166 | * start publishing video frames
167 | */
168 | while( ROS_OK() )
169 | {
170 | if( !aquireFrame() )
171 | {
172 | if( !stream->IsStreaming() )
173 | {
174 | ROS_INFO("stream is closed or reached EOS, exiting node...");
175 | break;
176 | }
177 | }
178 |
179 | if( ROS_OK() )
180 | ROS_SPIN_ONCE();
181 | }
182 |
183 |
184 | /*
185 | * free resources
186 | */
187 | delete stream;
188 | delete image_cvt;
189 |
190 | return 0;
191 | }
192 |
193 |
--------------------------------------------------------------------------------
/src/ros_compat.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and associated documentation files (the "Software"),
6 | * to deal in the Software without restriction, including without limitation
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 | * and/or sell copies of the Software, and to permit persons to whom the
9 | * Software is furnished to do so, subject to the following conditions:
10 | *
11 | * The above copyright notice and this permission notice shall be included in
12 | * all copies or substantial portions of the Software.
13 | *
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | * DEALINGS IN THE SOFTWARE.
21 | */
22 |
23 | #include "ros_compat.h"
24 |
25 | #ifdef ROS2
26 |
27 | std::string __node_name_;
28 | rclcpp::Clock::SharedPtr __global_clock_;
29 |
30 | #endif
31 |
32 |
33 |
--------------------------------------------------------------------------------
/src/ros_compat.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | *
4 | * Permission is hereby granted, free of charge, to any person obtaining a
5 | * copy of this software and associated documentation files (the "Software"),
6 | * to deal in the Software without restriction, including without limitation
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 | * and/or sell copies of the Software, and to permit persons to whom the
9 | * Software is furnished to do so, subject to the following conditions:
10 | *
11 | * The above copyright notice and this permission notice shall be included in
12 | * all copies or substantial portions of the Software.
13 | *
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | * DEALINGS IN THE SOFTWARE.
21 | */
22 |
23 | #ifndef __ROS_DEEP_LEARNING_COMPATABILITY_H_
24 | #define __ROS_DEEP_LEARNING_COMPATABILITY_H_
25 |
26 | #ifdef ROS1
27 |
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include
34 |
35 | #define ROS_CREATE_NODE(name) \
36 | ros::init(argc, argv, name); \
37 | ros::NodeHandle nh; \
38 | ros::NodeHandle private_nh("~");
39 |
40 | template
41 | using Publisher = ros::Publisher*;
42 |
43 | #define ROS_CREATE_PUBLISHER(msg, topic, queue, ptr) ros::Publisher __publisher_##ptr = private_nh.advertise(topic, queue); ptr = &__publisher_##ptr
44 | #define ROS_CREATE_PUBLISHER_STATUS(msg, topic, queue, callback, ptr) ros::Publisher __publisher_##ptr = private_nh.advertise(topic, queue, [&](const ros::SingleSubscriberPublisher& connect_msg){callback();}); ptr = &__publisher_##ptr
45 |
46 | #define ROS_CREATE_SUBSCRIBER(msg, topic, queue, callback) private_nh.subscribe(topic, queue, callback)
47 | #define ROS_SUBSCRIBER_TOPIC(subscriber) subscriber.getTopic()
48 |
49 | #define ROS_NUM_SUBSCRIBERS(publisher) publisher->getNumSubscribers()
50 | #define ROS_GET_NAMESPACE() private_nh.getNamespace()
51 | #define ROS_GET_PARAMETER(name, val) private_nh.getParam(name, val)
52 | #define ROS_GET_PARAMETER_OR(name, val, alt) private_nh.param(name, val, alt)
53 | #define ROS_SET_PARAMETER(name, val) private_nh.setParam(name, val)
54 |
55 | template static void __ros_declare_parameter( ros::NodeHandle& nh, const std::string& name, const T& default_value )
56 | {
57 | if( !nh.hasParam(name) )
58 | nh.setParam(name, default_value);
59 | }
60 |
61 | #define ROS_DECLARE_PARAMETER(name, default_value) __ros_declare_parameter(private_nh, name, default_value)
62 |
63 | #define ROS_TIME_NOW() ros::Time::now()
64 | #define ROS_SPIN() ros::spin()
65 | #define ROS_SPIN_ONCE() ros::spinOnce()
66 | #define ROS_OK() ros::ok()
67 | #define ROS_SHUTDOWN() ros::shutdown()
68 |
69 | #elif ROS2
70 |
71 | #include
72 | #include
73 | #include
74 | #include
75 | #include
76 | #include
77 |
78 | namespace vision_msgs
79 | {
80 | typedef msg::Classification2D Classification2D;
81 | typedef msg::Detection2D Detection2D;
82 | typedef msg::Detection2DArray Detection2DArray;
83 | typedef msg::ObjectHypothesis ObjectHypothesis;
84 | typedef msg::ObjectHypothesisWithPose ObjectHypothesisWithPose;
85 | typedef msg::VisionInfo VisionInfo;
86 | }
87 |
88 | namespace sensor_msgs
89 | {
90 | typedef msg::Image Image;
91 | typedef msg::Image::SharedPtr ImagePtr;
92 | typedef msg::Image::ConstSharedPtr ImageConstPtr;
93 | }
94 |
95 | namespace ros = rclcpp;
96 |
97 | extern std::string __node_name_;
98 |
99 | #define ROS_INFO(...) RCUTILS_LOG_INFO_NAMED(__node_name_.c_str(), __VA_ARGS__)
100 | #define ROS_DEBUG(...) RCUTILS_LOG_DEBUG_NAMED(__node_name_.c_str(), __VA_ARGS__)
101 | #define ROS_ERROR(...) RCUTILS_LOG_ERROR_NAMED(__node_name_.c_str(), __VA_ARGS__)
102 |
103 | #define ROS_CREATE_NODE(name) \
104 | rclcpp::init(argc, argv); \
105 | auto node = rclcpp::Node::make_shared(name, "/" name); \
106 | __node_name_ = name; \
107 | __global_clock_ = std::make_shared(RCL_ROS_TIME);
108 |
109 | template
110 | using Publisher = std::shared_ptr>;
111 |
112 | #define ROS_CREATE_PUBLISHER(msg, topic, queue, ptr) ptr = node->create_publisher(topic, queue)
113 | #define ROS_CREATE_PUBLISHER_STATUS(msg, topic, queue, callback, ptr) ptr = node->create_publisher(topic, queue); rclcpp::TimerBase::SharedPtr __timer_publisher_##ptr = node->create_wall_timer(std::chrono::milliseconds(500), \
114 | [&](){ static int __subscribers_##ptr=0; const size_t __subscription_count=ptr->get_subscription_count(); if(__subscribers_##ptr != __subscription_count) { if(__subscription_count > __subscribers_##ptr) callback(); __subscribers_##ptr=__subscription_count; }})
115 |
116 | #define ROS_CREATE_SUBSCRIBER(msg, topic, queue, callback) node->create_subscription(topic, queue, callback)
117 | #define ROS_SUBSCRIBER_TOPIC(subscriber) subscriber->get_topic_name();
118 |
119 | #define ROS_NUM_SUBSCRIBERS(publisher) publisher->get_subscription_count()
120 | #define ROS_GET_NAMESPACE() node->get_namespace()
121 | #define ROS_GET_PARAMETER(name, val) node->get_parameter(name, val)
122 | #define ROS_GET_PARAMETER_OR(name, val, alt) node->get_parameter_or(name, val, alt) // TODO set undefined params in param server
123 | #define ROS_SET_PARAMETER(name, val) node->set_parameter(rclcpp::Parameter(name, val))
124 |
125 | #define ROS_DECLARE_PARAMETER(name, default_value) node->declare_parameter(name, default_value)
126 |
127 | extern rclcpp::Clock::SharedPtr __global_clock_;
128 |
129 | #define ROS_TIME_NOW() __global_clock_->now()
130 | #define ROS_SPIN() rclcpp::spin(node)
131 | #define ROS_SPIN_ONCE() rclcpp::spin_some(node)
132 | #define ROS_OK() rclcpp::ok()
133 | #define ROS_SHUTDOWN() rclcpp::shutdown()
134 |
135 | #endif
136 | #endif
137 |
138 |
--------------------------------------------------------------------------------
/trt_yolo_v3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | import time
5 |
6 | import cv2
7 | import pycuda.autoinit # For initializing CUDA driver
8 | import pycuda.driver as cuda
9 |
10 | from utils.yolo_classes import get_cls_dict
11 | from utils.display import open_window, set_display, show_fps
12 | from utils.visualization import BBoxVisualization
13 | from utils.yolo_with_plugins import TrtYOLO
14 |
15 | import rospy
16 | import rospkg
17 | from yolov4_trt_ros.msg import Detector2DArray
18 | from yolov4_trt_ros.msg import Detector2D
19 | from vision_msgs.msg import BoundingBox2D
20 | from sensor_msgs.msg import Image
21 | from cv_bridge import CvBridge, CvBridgeError
22 |
23 |
24 | class yolov4(object):
25 | def __init__(self):
26 | """ Constructor """
27 |
28 | self.bridge = CvBridge()
29 | self.init_params()
30 | self.init_yolo()
31 | self.cuda_ctx = cuda.Device(0).make_context()
32 | self.trt_yolo = TrtYOLO(
33 | (self.model_path + self.model), (self.h, self.w), self.category_num)
34 |
35 | def __del__(self):
36 | """ Destructor """
37 |
38 | self.cuda_ctx.pop()
39 | del self.trt_yolo
40 | del self.cuda_ctx
41 |
42 | def clean_up(self):
43 | """ Backup destructor: Release cuda memory """
44 |
45 | if self.trt_yolo is not None:
46 | self.cuda_ctx.pop()
47 | del self.trt_yolo
48 | del self.cuda_ctx
49 |
50 | def init_params(self):
51 | """ Initializes ros parameters """
52 |
53 | rospack = rospkg.RosPack()
54 | package_path = rospack.get_path("yolov4_trt_ros")
55 | self.video_topic = rospy.get_param("/video_topic", "/video_source/raw")
56 | self.model = rospy.get_param("/model", "yolov3")
57 | self.model_path = rospy.get_param(
58 | "/model_path", package_path + "/yolo/")
59 | self.input_shape = rospy.get_param("/input_shape", "416")
60 | self.category_num = rospy.get_param("/category_number", 80)
61 | self.conf_th = rospy.get_param("/confidence_threshold", 0.5)
62 | self.show_img = rospy.get_param("/show_image", True)
63 | self.image_sub = rospy.Subscriber(
64 | self.video_topic, Image, self.img_callback, queue_size=1, buff_size=1920*1080*3)
65 | self.detection_pub = rospy.Publisher(
66 | "detections", Detector2DArray, queue_size=1)
67 | self.overlay_pub = rospy.Publisher(
68 | "/result/overlay", Image, queue_size=1)
69 |
70 | def init_yolo(self):
71 | """ Initialises yolo parameters required for trt engine """
72 |
73 | if self.model.find('-') == -1:
74 | self.model = self.model + "-" + self.input_shape
75 |
76 | yolo_dim = self.model.split('-')[-1]
77 |
78 | if 'x' in yolo_dim:
79 | dim_split = yolo_dim.split('x')
80 | if len(dim_split) != 2:
81 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
82 | self.w, self.h = int(dim_split[0]), int(dim_split[1])
83 | else:
84 | self.h = self.w = int(yolo_dim)
85 | if self.h % 32 != 0 or self.w % 32 != 0:
86 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
87 |
88 | cls_dict = get_cls_dict(self.category_num)
89 | self.vis = BBoxVisualization(cls_dict)
90 |
91 |
92 | def img_callback(self, ros_img):
93 | """Continuously capture images from camera and do object detection """
94 |
95 | tic = time.time()
96 |
97 | # converts from ros_img to cv_img for processing
98 | try:
99 | cv_img = self.bridge.imgmsg_to_cv2(
100 | ros_img, desired_encoding="bgr8")
101 | rospy.logdebug("ROS Image converted for processing")
102 | except CvBridgeError as e:
103 | rospy.loginfo("Failed to convert image %s", str(e))
104 |
105 | if cv_img is not None:
106 | boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th)
107 |
108 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
109 | toc = time.time()
110 | fps = 1.0 / (toc - tic)
111 |
112 | self.publisher(boxes, confs, clss)
113 |
114 | if self.show_img:
115 | cv_img = show_fps(cv_img, fps)
116 | cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img)
117 | cv2.waitKey(1)
118 |
119 | # converts back to ros_img type for publishing
120 | try:
121 | overlay_img = self.bridge.cv2_to_imgmsg(
122 | cv_img, encoding="passthrough")
123 | rospy.logdebug("CV Image converted for publishing")
124 | self.overlay_pub.publish(overlay_img)
125 | except CvBridgeError as e:
126 | rospy.loginfo("Failed to convert image %s", str(e))
127 |
128 | def publisher(self, boxes, confs, clss):
129 | """ Publishes to detector_msgs
130 |
131 | Parameters:
132 | boxes (List(List(int))) : Bounding boxes of all objects
133 | confs (List(double)) : Probability scores of all objects
134 | clss (List(int)) : Class ID of all classes
135 | """
136 | detection2d = Detector2DArray()
137 | detection = Detector2D()
138 | detection2d.header.stamp = rospy.Time.now()
139 |
140 | for i in range(len(boxes)):
141 | # boxes : xmin, ymin, xmax, ymax
142 | for _ in boxes:
143 | detection.header.stamp = rospy.Time.now()
144 | detection.header.frame_id = "camera" # change accordingly
145 | detection.results.id = clss[i]
146 | detection.results.score = confs[i]
147 |
148 | detection.bbox.center.x = boxes[i][0] + (boxes[i][2] - boxes[i][0])/2
149 | detection.bbox.center.y = boxes[i][1] + (boxes[i][3] - boxes[i][1])/2
150 | detection.bbox.center.theta = 0.0 # change if required
151 |
152 | detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2])
153 | detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3])
154 |
155 | detection2d.detections.append(detection)
156 |
157 | self.detection_pub.publish(detection2d)
158 |
159 |
160 | def main():
161 | yolo = yolov4()
162 | rospy.init_node('yolov4_detection', anonymous=True)
163 | try:
164 | rospy.spin()
165 | except KeyboardInterrupt:
166 | rospy.on_shutdown(yolo.clean_up())
167 | print("Shutting down")
168 |
169 |
170 | if __name__ == '__main__':
171 | main()
172 |
--------------------------------------------------------------------------------
/trt_yolo_v4.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | import time
5 |
6 | import cv2
7 | import pycuda.autoinit # For initializing CUDA driver
8 | import pycuda.driver as cuda
9 |
10 | from utils.yolo_classes import get_cls_dict
11 | from utils.display import open_window, set_display, show_fps
12 | from utils.visualization import BBoxVisualization
13 | from utils.yolo_with_plugins import TrtYOLO
14 |
15 | import rospy
16 | import rospkg
17 | from yolov4_trt_ros.msg import Detector2DArray
18 | from yolov4_trt_ros.msg import Detector2D
19 | from vision_msgs.msg import BoundingBox2D
20 | from sensor_msgs.msg import Image
21 | from cv_bridge import CvBridge, CvBridgeError
22 |
23 |
24 | class yolov4(object):
25 | def __init__(self):
26 | """ Constructor """
27 |
28 | self.bridge = CvBridge()
29 | self.init_params()
30 | self.init_yolo()
31 | self.cuda_ctx = cuda.Device(0).make_context()
32 | self.trt_yolo = TrtYOLO(
33 | (self.model_path + self.model), (self.h, self.w), self.category_num)
34 |
35 | def __del__(self):
36 | """ Destructor """
37 |
38 | self.cuda_ctx.pop()
39 | del self.trt_yolo
40 | del self.cuda_ctx
41 |
42 | def clean_up(self):
43 | """ Backup destructor: Release cuda memory """
44 |
45 | if self.trt_yolo is not None:
46 | self.cuda_ctx.pop()
47 | del self.trt_yolo
48 | del self.cuda_ctx
49 |
50 | def init_params(self):
51 | """ Initializes ros parameters """
52 |
53 | rospack = rospkg.RosPack()
54 | package_path = rospack.get_path("yolov4_trt_ros")
55 | self.video_topic = rospy.get_param("/video_topic", "/video_source/raw")
56 | self.model = rospy.get_param("/model", "yolov4Custom")
57 | self.model_path = rospy.get_param(
58 | "/model_path", package_path + "/yolo/")
59 | self.category_num = rospy.get_param("/category_number", 10)
60 | self.input_shape = rospy.get_param("/input_shape", "416")
61 | self.conf_th = rospy.get_param("/confidence_threshold", 0.5)
62 | self.show_img = rospy.get_param("/show_image", True)
63 | self.image_sub = rospy.Subscriber(
64 | self.video_topic, Image, self.img_callback, queue_size=1, buff_size=1920*1080*3)
65 | self.detection_pub = rospy.Publisher(
66 | "detections", Detector2DArray, queue_size=1)
67 | self.overlay_pub = rospy.Publisher(
68 | "/result/overlay", Image, queue_size=1)
69 |
70 | def init_yolo(self):
71 | """ Initialises yolo parameters required for trt engine """
72 |
73 | if self.model.find('-') == -1:
74 | self.model = self.model + "-" + self.input_shape
75 |
76 | yolo_dim = self.model.split('-')[-1]
77 |
78 | if 'x' in yolo_dim:
79 | dim_split = yolo_dim.split('x')
80 | if len(dim_split) != 2:
81 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
82 | self.w, self.h = int(dim_split[0]), int(dim_split[1])
83 | else:
84 | self.h = self.w = int(yolo_dim)
85 | if self.h % 32 != 0 or self.w % 32 != 0:
86 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
87 |
88 | cls_dict = get_cls_dict(self.category_num)
89 | self.vis = BBoxVisualization(cls_dict)
90 |
91 |
92 | def img_callback(self, ros_img):
93 | """Continuously capture images from camera and do object detection """
94 |
95 | tic = time.time()
96 |
97 | # converts from ros_img to cv_img for processing
98 | try:
99 | cv_img = self.bridge.imgmsg_to_cv2(
100 | ros_img, desired_encoding="bgr8")
101 | rospy.logdebug("ROS Image converted for processing")
102 | except CvBridgeError as e:
103 | rospy.loginfo("Failed to convert image %s", str(e))
104 |
105 | if cv_img is not None:
106 | boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th)
107 |
108 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
109 | toc = time.time()
110 | fps = 1.0 / (toc - tic)
111 |
112 | self.publisher(boxes, confs, clss)
113 |
114 | if self.show_img:
115 | cv_img = show_fps(cv_img, fps)
116 | cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img)
117 | cv2.waitKey(1)
118 |
119 | # converts back to ros_img type for publishing
120 | try:
121 | overlay_img = self.bridge.cv2_to_imgmsg(
122 | cv_img, encoding="passthrough")
123 | rospy.logdebug("CV Image converted for publishing")
124 | self.overlay_pub.publish(overlay_img)
125 | except CvBridgeError as e:
126 | rospy.loginfo("Failed to convert image %s", str(e))
127 |
128 | def publisher(self, boxes, confs, clss):
129 | """ Publishes to detector_msgs
130 |
131 | Parameters:
132 | boxes (List(List(int))) : Bounding boxes of all objects
133 | confs (List(double)) : Probability scores of all objects
134 | clss (List(int)) : Class ID of all classes
135 | """
136 | detection2d = Detector2DArray()
137 | detection = Detector2D()
138 | detection2d.header.stamp = rospy.Time.now()
139 | detection2d.header.frame_id = "camera" # change accordingly
140 |
141 | for i in range(len(boxes)):
142 | # boxes : xmin, ymin, xmax, ymax
143 | for _ in boxes:
144 | detection.header.stamp = rospy.Time.now()
145 | detection.header.frame_id = "camera" # change accordingly
146 | detection.results.id = clss[i]
147 | detection.results.score = confs[i]
148 |
149 | detection.bbox.center.x = boxes[i][0] + (boxes[i][2] - boxes[i][0])/2
150 | detection.bbox.center.y = boxes[i][1] + (boxes[i][3] - boxes[i][1])/2
151 | detection.bbox.center.theta = 0.0 # change if required
152 |
153 | detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2])
154 | detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3])
155 |
156 | detection2d.detections.append(detection)
157 |
158 | self.detection_pub.publish(detection2d)
159 |
160 |
161 | def main():
162 | yolo = yolov4()
163 | rospy.init_node('yolov4_trt_ros', anonymous=True)
164 | try:
165 | rospy.spin()
166 | except KeyboardInterrupt:
167 | rospy.on_shutdown(yolo.clean_up())
168 | print("Shutting down")
169 |
170 |
171 | if __name__ == '__main__':
172 | main()
173 |
--------------------------------------------------------------------------------
/trt_yolo_v4_batch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 | import time
5 |
6 | import cv2
7 | import pycuda.autoinit # For initializing CUDA driver
8 | import pycuda.driver as cuda
9 |
10 | from utils.yolo_classes import get_cls_dict
11 | from utils.display import open_window, set_display, show_fps
12 | from utils.visualization import BBoxVisualization
13 | from utils.yolo_with_plugins_batch import TrtYOLO
14 |
15 | import rospy
16 | import rospkg
17 | from yolov4_trt_ros.msg import Detector2DArray
18 | from yolov4_trt_ros.msg import Detector2D
19 | from vision_msgs.msg import BoundingBox2D
20 | from sensor_msgs.msg import Image
21 | from cv_bridge import CvBridge, CvBridgeError
22 |
23 |
24 | class yolov4(object):
25 | def __init__(self):
26 | """ Constructor """
27 |
28 | self.bridge = CvBridge()
29 | self.init_params()
30 | self.init_yolo()
31 | self.cuda_ctx = cuda.Device(0).make_context()
32 | self.trt_yolo = TrtYOLO(
33 | (self.model_path + self.model), (self.h, self.w), self.category_num)
34 |
35 | def __del__(self):
36 | """ Destructor """
37 |
38 | self.cuda_ctx.pop()
39 | del self.trt_yolo
40 | del self.cuda_ctx
41 |
42 | def clean_up(self):
43 | """ Backup destructor: Release cuda memory """
44 |
45 | if self.trt_yolo is not None:
46 | self.cuda_ctx.pop()
47 | del self.trt_yolo
48 | del self.cuda_ctx
49 |
50 | def init_params(self):
51 | """ Initializes ros parameters """
52 |
53 | rospack = rospkg.RosPack()
54 | package_path = rospack.get_path("yolov4_trt_ros")
55 | #self.video_topic1 = rospy.get_param(
56 | # "/video_topic1", "/zed/zed_node/left_raw/image_raw_color")
57 | self.video_topic1 = rospy.get_param(
58 | "/video_topic1", "video_source/raw")
59 | self.video_topic2 = rospy.get_param(
60 | "/video_topic2", "/zed/zed_node/left_raw/image_raw_color")
61 | self.video_topic3 = rospy.get_param(
62 | "/video_topic3", "/zed/zed_node/right_raw/image_raw_color")
63 | self.video_topic4 = rospy.get_param(
64 | "/video_topic4", "/zed/zed_node/right_raw/image_raw_color")
65 | self.model = rospy.get_param("/model", "yolov4Custom")
66 | self.model_path = rospy.get_param(
67 | "/model_path", package_path + "/yolo/")
68 | self.category_num = rospy.get_param("/category_number", 10)
69 | self.input_shape = rospy.get_param("/input_shape", "416")
70 | self.conf_th = rospy.get_param("/confidence_threshold", 0.5)
71 | self.show_img = rospy.get_param("/show_image", True)
72 | self.image_sub1 = rospy.Subscriber(
73 | self.video_topic1, Image, self.img_callback1, queue_size=1, buff_size=2**26)
74 | self.image_sub2 = rospy.Subscriber(
75 | self.video_topic2, Image, self.img_callback2, queue_size=1, buff_size=2**26)
76 | self.image_sub3 = rospy.Subscriber(
77 | self.video_topic3, Image, self.img_callback3, queue_size=1, buff_size=2**26)
78 | self.image_sub4 = rospy.Subscriber(
79 | self.video_topic4, Image, self.img_callback4, queue_size=1, buff_size=2**26)
80 | self.detection_pub = rospy.Publisher(
81 | "detections", Detector2DArray, queue_size=1)
82 | self.overlay_pub = rospy.Publisher(
83 | "/result/overlay", Image, queue_size=1)
84 |
85 | def init_yolo(self):
86 | """ Initialises yolo parameters required for trt engine """
87 |
88 | if self.model.find('-') == -1:
89 | self.model = self.model + "-" + self.input_shape
90 |
91 | yolo_dim = self.model.split('-')[-1]
92 |
93 | if 'x' in yolo_dim:
94 | dim_split = yolo_dim.split('x')
95 | if len(dim_split) != 2:
96 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
97 | self.w, self.h = int(dim_split[0]), int(dim_split[1])
98 | else:
99 | self.h = self.w = int(yolo_dim)
100 | if self.h % 32 != 0 or self.w % 32 != 0:
101 | raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
102 |
103 | cls_dict = get_cls_dict(self.category_num)
104 | self.vis = BBoxVisualization(cls_dict)
105 |
106 | def img_callback1(self, ros_img):
107 | """Continuously capture images from camera and do object detection """
108 |
109 | # converts from ros_img to cv_img for processing
110 | try:
111 | cv_img = self.bridge.imgmsg_to_cv2(
112 | ros_img, desired_encoding="bgr8")
113 | rospy.logdebug("ROS Image converted for processing")
114 | except CvBridgeError as e:
115 | rospy.loginfo("Failed to convert image %s", str(e))
116 |
117 | tic = time.time()
118 | boxes, confs, clss = self.trt_yolo.detect1(cv_img, self.conf_th)
119 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
120 | toc = time.time()
121 | fps = 1.0 / (toc - tic)
122 | print("Cam1: " + str(fps) + "\n")
123 | self.publisher(boxes, confs, clss, "Cam1")
124 |
125 | if self.show_img:
126 | cv_img = show_fps(cv_img, fps)
127 | cv2.imshow("Cam 1", cv_img)
128 | cv2.waitKey(1)
129 |
130 | # converts back to ros_img type for publishing
131 | try:
132 | overlay_img = self.bridge.cv2_to_imgmsg(
133 | cv_img, encoding="passthrough")
134 | rospy.logdebug("CV Image converted for publishing")
135 | self.overlay_pub.publish(overlay_img)
136 | except CvBridgeError as e:
137 | rospy.loginfo("Failed to convert image %s", str(e))
138 |
139 | def img_callback2(self, ros_img):
140 | """Continuously capture images from camera and do object detection """
141 |
142 | # converts from ros_img to cv_img for processing
143 | # time both cv_bridge fns
144 | try:
145 | cv_img = self.bridge.imgmsg_to_cv2(
146 | ros_img, desired_encoding="bgr8")
147 | rospy.logdebug("ROS Image converted for processing")
148 | except CvBridgeError as e:
149 | rospy.loginfo("Failed to convert image %s", str(e))
150 |
151 | tic = time.time()
152 | boxes, confs, clss = self.trt_yolo.detect2(cv_img, self.conf_th)
153 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
154 | toc = time.time()
155 | fps = 1.0 / (toc - tic)
156 | print("Cam2: " + str(fps) + "\n")
157 | self.publisher(boxes, confs, clss, "Cam2")
158 |
159 | # time 1
160 | """
161 | if self.show_img:
162 | cv_img = show_fps(cv_img, fps)
163 | cv2.imshow("Cam 2", cv_img)
164 | # time 2
165 | cv2.waitKey(1)
166 | """
167 |
168 | # converts back to ros_img type for publishing
169 | try:
170 | overlay_img = self.bridge.cv2_to_imgmsg(
171 | cv_img, encoding="passthrough")
172 | rospy.logdebug("CV Image converted for publishing")
173 | self.overlay_pub.publish(overlay_img)
174 | except CvBridgeError as e:
175 | rospy.loginfo("Failed to convert image %s", str(e))
176 |
177 | def img_callback3(self, ros_img):
178 | """Continuously capture images from camera and do object detection """
179 |
180 | # converts from ros_img to cv_img for processing
181 | try:
182 | cv_img = self.bridge.imgmsg_to_cv2(
183 | ros_img, desired_encoding="bgr8")
184 | rospy.logdebug("ROS Image converted for processing")
185 | except CvBridgeError as e:
186 | rospy.loginfo("Failed to convert image %s", str(e))
187 |
188 | tic = time.time()
189 | boxes, confs, clss = self.trt_yolo.detect3(cv_img, self.conf_th)
190 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
191 | toc = time.time()
192 | fps = 1.0 / (toc - tic)
193 | print("Cam3: " + str(fps) + "\n")
194 | self.publisher(boxes, confs, clss, "Cam3")
195 | """
196 | if self.show_img:
197 | cv_img = show_fps(cv_img, fps)
198 | cv2.imshow("Cam 3", cv_img)
199 | cv2.waitKey(1)
200 | """
201 | # converts back to ros_img type for publishing
202 | try:
203 | overlay_img = self.bridge.cv2_to_imgmsg(
204 | cv_img, encoding="passthrough")
205 | rospy.logdebug("CV Image converted for publishing")
206 | self.overlay_pub.publish(overlay_img)
207 | except CvBridgeError as e:
208 | rospy.loginfo("Failed to convert image %s", str(e))
209 |
210 | def img_callback4(self, ros_img):
211 | """Continuously capture images from camera and do object detection """
212 |
213 | # converts from ros_img to cv_img for processing
214 | try:
215 | cv_img = self.bridge.imgmsg_to_cv2(
216 | ros_img, desired_encoding="bgr8")
217 | rospy.logdebug("ROS Image converted for processing")
218 | except CvBridgeError as e:
219 | rospy.loginfo("Failed to convert image %s", str(e))
220 |
221 | tic = time.time()
222 | boxes, confs, clss = self.trt_yolo.detect4(cv_img, self.conf_th)
223 | cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
224 | toc = time.time()
225 | fps = 1.0 / (toc - tic)
226 | print("Cam4: " + str(fps) + "\n")
227 | self.publisher(boxes, confs, clss, "Cam4")
228 | """
229 | if self.show_img:
230 | cv_img = show_fps(cv_img, fps)
231 | cv2.imshow("Cam 3", cv_img)
232 | cv2.waitKey(1)
233 | """
234 | # converts back to ros_img type for publishing
235 | try:
236 | overlay_img = self.bridge.cv2_to_imgmsg(
237 | cv_img, encoding="passthrough")
238 | rospy.logdebug("CV Image converted for publishing")
239 | self.overlay_pub.publish(overlay_img)
240 | except CvBridgeError as e:
241 | rospy.loginfo("Failed to convert image %s", str(e))
242 |
243 | def publisher(self, boxes, confs, clss, frame):
244 | """ Publishes to detector_msgs
245 |
246 | Parameters:
247 | boxes (List(List(int))) : Bounding boxes of all objects
248 | confs (List(double)) : Probability scores of all objects
249 | clss (List(int)) : Class ID of all classes
250 | """
251 | detection2d = Detector2DArray()
252 | detection = Detector2D()
253 | detection2d.header.stamp = rospy.Time.now()
254 | detection2d.header.frame_id = "camera" # change accordingly
255 |
256 | for i in range(len(boxes)):
257 | # boxes : xmin, ymin, xmax, ymax
258 | for _ in boxes:
259 | detection.header.stamp = rospy.Time.now()
260 | detection.header.frame_id = frame # change accordingly
261 | detection.results.id = clss[i]
262 | detection.results.score = confs[i]
263 |
264 | detection.bbox.center.x = boxes[i][0] + \
265 | (boxes[i][2] - boxes[i][0])/2
266 | detection.bbox.center.y = boxes[i][1] + \
267 | (boxes[i][3] - boxes[i][1])/2
268 | detection.bbox.center.theta = 0.0 # change if required
269 |
270 | detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2])
271 | detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3])
272 |
273 | detection2d.detections.append(detection)
274 |
275 | self.detection_pub.publish(detection2d)
276 |
277 |
278 | def main():
279 | yolo = yolov4()
280 | rospy.init_node('yolov4_trt_ros', anonymous=True)
281 | try:
282 | rospy.spin()
283 | except KeyboardInterrupt:
284 | del yolo
285 | rospy.on_shutdown(yolo.clean_up())
286 | print("Shutting down")
287 |
288 |
289 | if __name__ == '__main__':
290 | main()
291 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/utils/__init__.py
--------------------------------------------------------------------------------
/utils/display.py:
--------------------------------------------------------------------------------
1 |
2 | """display.py
3 | """
4 |
5 |
6 | import cv2
7 |
8 |
9 | def open_window(window_name, title, width=None, height=None):
10 | """Open the display window."""
11 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
12 | cv2.setWindowTitle(window_name, title)
13 | if width and height:
14 | cv2.resizeWindow(window_name, width, height)
15 |
16 |
17 | def show_help_text(img, help_text):
18 | """Draw help text on image."""
19 | cv2.putText(img, help_text, (11, 20), cv2.FONT_HERSHEY_PLAIN, 1.0,
20 | (32, 32, 32), 4, cv2.LINE_AA)
21 | cv2.putText(img, help_text, (10, 20), cv2.FONT_HERSHEY_PLAIN, 1.0,
22 | (240, 240, 240), 1, cv2.LINE_AA)
23 | return img
24 |
25 |
26 | def show_fps(img, fps):
27 | """Draw fps number at top-left corner of the image."""
28 | font = cv2.FONT_HERSHEY_PLAIN
29 | line = cv2.LINE_AA
30 | fps_text = 'FPS: {:.2f}'.format(fps)
31 | cv2.putText(img, fps_text, (11, 20), font, 1.0, (32, 32, 32), 4, line)
32 | cv2.putText(img, fps_text, (10, 20), font, 1.0, (240, 240, 240), 1, line)
33 | return img
34 |
35 |
36 | def set_display(window_name, full_scrn):
37 | """Set disply window to either full screen or normal."""
38 | if full_scrn:
39 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
40 | cv2.WINDOW_FULLSCREEN)
41 | else:
42 | cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN,
43 | cv2.WINDOW_NORMAL)
44 |
--------------------------------------------------------------------------------
/utils/visualization.py:
--------------------------------------------------------------------------------
1 | """visualization.py
2 |
3 | The BBoxVisualization class implements drawing of nice looking
4 | bounding boxes based on object detection results.
5 | """
6 |
7 |
8 | import numpy as np
9 | import cv2
10 |
11 |
12 | # Constants
13 | ALPHA = 0.5
14 | FONT = cv2.FONT_HERSHEY_PLAIN
15 | TEXT_SCALE = 1.0
16 | TEXT_THICKNESS = 1
17 | BLACK = (0, 0, 0)
18 | WHITE = (255, 255, 255)
19 |
20 |
21 | def gen_colors(num_colors):
22 | """Generate different colors.
23 |
24 | # Arguments
25 | num_colors: total number of colors/classes.
26 |
27 | # Output
28 | bgrs: a list of (B, G, R) tuples which correspond to each of
29 | the colors/classes.
30 | """
31 | import random
32 | import colorsys
33 |
34 | hsvs = [[float(x) / num_colors, 1., 0.7] for x in range(num_colors)]
35 | random.seed(1234)
36 | random.shuffle(hsvs)
37 | rgbs = list(map(lambda x: list(colorsys.hsv_to_rgb(*x)), hsvs))
38 | bgrs = [(int(rgb[2] * 255), int(rgb[1] * 255), int(rgb[0] * 255))
39 | for rgb in rgbs]
40 | return bgrs
41 |
42 |
43 | def draw_boxed_text(img, text, topleft, color):
44 | """Draw a transluent boxed text in white, overlayed on top of a
45 | colored patch surrounded by a black border. FONT, TEXT_SCALE,
46 | TEXT_THICKNESS and ALPHA values are constants (fixed) as defined
47 | on top.
48 |
49 | # Arguments
50 | img: the input image as a numpy array.
51 | text: the text to be drawn.
52 | topleft: XY coordinate of the topleft corner of the boxed text.
53 | color: color of the patch, i.e. background of the text.
54 |
55 | # Output
56 | img: note the original image is modified inplace.
57 | """
58 | assert img.dtype == np.uint8
59 | img_h, img_w, _ = img.shape
60 | if topleft[0] >= img_w or topleft[1] >= img_h:
61 | return img
62 | margin = 3
63 | size = cv2.getTextSize(text, FONT, TEXT_SCALE, TEXT_THICKNESS)
64 | w = size[0][0] + margin * 2
65 | h = size[0][1] + margin * 2
66 | # the patch is used to draw boxed text
67 | patch = np.zeros((h, w, 3), dtype=np.uint8)
68 | patch[...] = color
69 | cv2.putText(patch, text, (margin+1, h-margin-2), FONT, TEXT_SCALE,
70 | WHITE, thickness=TEXT_THICKNESS, lineType=cv2.LINE_8)
71 | cv2.rectangle(patch, (0, 0), (w-1, h-1), BLACK, thickness=1)
72 | w = min(w, img_w - topleft[0]) # clip overlay at image boundary
73 | h = min(h, img_h - topleft[1])
74 | # Overlay the boxed text onto region of interest (roi) in img
75 | roi = img[topleft[1]:topleft[1]+h, topleft[0]:topleft[0]+w, :]
76 | cv2.addWeighted(patch[0:h, 0:w, :], ALPHA, roi, 1 - ALPHA, 0, roi)
77 | return img
78 |
79 |
80 | class BBoxVisualization():
81 | """BBoxVisualization class implements nice drawing of boudning boxes.
82 |
83 | # Arguments
84 | cls_dict: a dictionary used to translate class id to its name.
85 | """
86 |
87 | def __init__(self, cls_dict):
88 | self.cls_dict = cls_dict
89 | self.colors = gen_colors(len(cls_dict))
90 |
91 | def draw_bboxes(self, img, boxes, confs, clss):
92 | """Draw detected bounding boxes on the original image."""
93 | for bb, cf, cl in zip(boxes, confs, clss):
94 | cl = int(cl)
95 | x_min, y_min, x_max, y_max = bb[0], bb[1], bb[2], bb[3]
96 | color = self.colors[cl]
97 | cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2)
98 | txt_loc = (max(x_min+2, 0), max(y_min+2, 0))
99 | cls_name = self.cls_dict.get(cl, 'CLS{}'.format(cl))
100 | txt = '{} {:.2f}'.format(cls_name, cf)
101 | img = draw_boxed_text(img, txt, txt_loc, color)
102 | return img
103 |
--------------------------------------------------------------------------------
/utils/yolo_classes.py:
--------------------------------------------------------------------------------
1 | CLASSES_LIST = [
2 | 'bike',
3 | 'bus',
4 | 'car',
5 | 'motor',
6 | 'person',
7 | 'rider',
8 | 'traffic light',
9 | 'traffic sign',
10 | 'train',
11 | 'truck',
12 | ]
13 |
14 | def get_cls_dict(category_num):
15 | """Get the class ID to name translation dictionary."""
16 | if category_num == len(CLASSES_LIST):
17 | return {i: n for i, n in enumerate(CLASSES_LIST)}
18 | else:
19 | return {i: 'CLS%d' % i for i in range(category_num)}
20 |
--------------------------------------------------------------------------------
/utils/yolo_with_plugins.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import ctypes
4 |
5 | import rospkg
6 | import numpy as np
7 | import cv2
8 | import tensorrt as trt
9 | import pycuda.driver as cuda
10 | import pycuda.autoinit
11 |
12 | rospack = rospkg.RosPack()
13 | plugin_path = rospack.get_path("yolov4_trt_ros") + "/plugins/libyolo_layer.so"
14 |
15 | try:
16 | ctypes.cdll.LoadLibrary(plugin_path)
17 | except OSError as e:
18 | raise SystemExit('ERROR: failed to load ./plugins/libyolo_layer.so. '
19 | 'Did you forget to do a "make" in the "./plugins/" '
20 | 'subdirectory?')
21 |
22 |
23 | def _preprocess_yolo(img, input_shape):
24 | """Preprocess an image before TRT YOLO inferencing.
25 |
26 | # Args
27 | img: int8 numpy array of shape (img_h, img_w, 3)
28 | input_shape: a tuple of (H, W)
29 |
30 | # Returns
31 | preprocessed img: float32 numpy array of shape (3, H, W)
32 | """
33 | img = cv2.resize(img, (input_shape[1], input_shape[0]))
34 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
35 | img = img.transpose((2, 0, 1)).astype(np.float32)
36 | img /= 255.0
37 | return img
38 |
39 |
40 | def _nms_boxes(detections, nms_threshold):
41 | """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding
42 | boxes with their confidence scores and return an array with the
43 | indexes of the bounding boxes we want to keep.
44 |
45 | # Args
46 | detections: Nx7 numpy arrays of
47 | [[x, y, w, h, box_confidence, class_id, class_prob],
48 | ......]
49 | """
50 | x_coord = detections[:, 0]
51 | y_coord = detections[:, 1]
52 | width = detections[:, 2]
53 | height = detections[:, 3]
54 | box_confidences = detections[:, 4]
55 |
56 | areas = width * height
57 | ordered = box_confidences.argsort()[::-1]
58 |
59 | keep = list()
60 | while ordered.size > 0:
61 | # Index of the current element:
62 | i = ordered[0]
63 | keep.append(i)
64 | xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
65 | yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
66 | xx2 = np.minimum(x_coord[i] + width[i],
67 | x_coord[ordered[1:]] + width[ordered[1:]])
68 | yy2 = np.minimum(y_coord[i] + height[i],
69 | y_coord[ordered[1:]] + height[ordered[1:]])
70 |
71 | width1 = np.maximum(0.0, xx2 - xx1 + 1)
72 | height1 = np.maximum(0.0, yy2 - yy1 + 1)
73 | intersection = width1 * height1
74 | union = (areas[i] + areas[ordered[1:]] - intersection)
75 | iou = intersection / union
76 | indexes = np.where(iou <= nms_threshold)[0]
77 | ordered = ordered[indexes + 1]
78 |
79 | keep = np.array(keep)
80 | return keep
81 |
82 |
83 | def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold=0.5):
84 | """Postprocess TensorRT outputs.
85 |
86 | # Args
87 | trt_outputs: a list of 2 or 3 tensors, where each tensor
88 | contains a multiple of 7 float32 numbers in
89 | the order of [x, y, w, h, box_confidence, class_id, class_prob]
90 | conf_th: confidence threshold
91 |
92 | # Returns
93 | boxes, scores, classes (after NMS)
94 | """
95 | # concatenate outputs of all yolo layers
96 | detections = np.concatenate(
97 | [o.reshape(-1, 7) for o in trt_outputs], axis=0)
98 |
99 | # drop detections with score lower than conf_th
100 | box_scores = detections[:, 4] * detections[:, 6]
101 | pos = np.where(box_scores >= conf_th)
102 | detections = detections[pos]
103 |
104 | # scale x, y, w, h from [0, 1] to pixel values
105 | detections[:, 0] *= img_w
106 | detections[:, 1] *= img_h
107 | detections[:, 2] *= img_w
108 | detections[:, 3] *= img_h
109 |
110 | # NMS
111 | nms_detections = np.zeros((0, 7), dtype=detections.dtype)
112 | for class_id in set(detections[:, 5]):
113 | idxs = np.where(detections[:, 5] == class_id)
114 | cls_detections = detections[idxs]
115 | keep = _nms_boxes(cls_detections, nms_threshold)
116 | nms_detections = np.concatenate(
117 | [nms_detections, cls_detections[keep]], axis=0)
118 | if len(nms_detections) == 0:
119 | boxes = np.zeros((0, 4), dtype=np.int)
120 | scores = np.zeros((0, 1), dtype=np.float32)
121 | classes = np.zeros((0, 1), dtype=np.float32)
122 | else:
123 | xx = nms_detections[:, 0].reshape(-1, 1)
124 | yy = nms_detections[:, 1].reshape(-1, 1)
125 | ww = nms_detections[:, 2].reshape(-1, 1)
126 | hh = nms_detections[:, 3].reshape(-1, 1)
127 | boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5
128 | boxes = boxes.astype(np.int)
129 | scores = nms_detections[:, 4] * nms_detections[:, 6]
130 | classes = nms_detections[:, 5]
131 | return boxes, scores, classes
132 |
133 |
134 | class HostDeviceMem(object):
135 | """Simple helper data class that's a little nicer to use than a 2-tuple."""
136 |
137 | def __init__(self, host_mem, device_mem):
138 | self.host = host_mem
139 | self.device = device_mem
140 |
141 | def __str__(self):
142 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
143 |
144 | def __repr__(self):
145 | return self.__str__()
146 |
147 |
148 | def allocate_buffers(engine, grid_sizes):
149 | """Allocates all host/device in/out buffers required for an engine.
150 | Checked!
151 | """
152 |
153 | inputs = []
154 | outputs = []
155 | bindings = []
156 | output_idx = 0
157 | stream = cuda.Stream()
158 | for binding in engine:
159 | size = trt.volume(engine.get_binding_shape(binding)) * \
160 | engine.max_batch_size
161 | dtype = trt.nptype(engine.get_binding_dtype(binding))
162 | # Allocate host and device buffers
163 | host_mem = cuda.pagelocked_empty(size, dtype)
164 | device_mem = cuda.mem_alloc(host_mem.nbytes)
165 |
166 | # Append the device buffer to device bindings.
167 | bindings.append(int(device_mem))
168 |
169 | # Append to the appropriate list.
170 | if engine.binding_is_input(binding):
171 | inputs.append(HostDeviceMem(host_mem, device_mem))
172 |
173 | else:
174 | # each grid has 3 anchors, each anchor generates a detection
175 | # output of 7 float32 values
176 | assert size == grid_sizes[output_idx] * \
177 | 3 * 7 * engine.max_batch_size
178 | outputs.append(HostDeviceMem(host_mem, device_mem))
179 | output_idx += 1
180 |
181 | return inputs, outputs, bindings, stream
182 |
183 |
184 | def get_yolo_grid_sizes(model_name, h, w):
185 | """Get grid sizes (w*h) for all yolo layers in the model."""
186 | if 'yolov3' in model_name:
187 | if 'tiny' in model_name:
188 | return [(h // 32) * (w // 32), (h // 16) * (w // 16)]
189 | else:
190 | return [(h // 32) * (w // 32), (h // 16) * (w // 16), (h // 8) * (w // 8)]
191 | elif 'yolov4' in model_name:
192 | if 'tiny' in model_name:
193 | return [(h // 32) * (w // 32), (h // 16) * (w // 16)]
194 | else:
195 | return [(h // 8) * (w // 8), (h // 16) * (w // 16), (w // 32) * (h // 32)]
196 | else:
197 | raise ValueError('ERROR: unknown model (%s)!' % args.model)
198 |
199 |
200 | class TrtYOLO(object):
201 | """TrtYOLO class encapsulates things needed to run TRT YOLO."""
202 |
203 | def _load_engine(self):
204 | TRTbin = '%s.trt' % self.model
205 | with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
206 | return runtime.deserialize_cuda_engine(f.read())
207 |
208 | def __init__(self, model, input_shape, category_num=80, cuda_ctx=None):
209 | """Initialize TensorRT plugins, engine and context."""
210 | self.model = model
211 | self.input_shape = input_shape
212 | self.category_num = category_num
213 | self.cuda_ctx = cuda.Device(0).make_context()
214 | self.trt_logger = trt.Logger(trt.Logger.INFO)
215 | self.engine = self._load_engine()
216 | self.context = self.engine.create_execution_context()
217 | grid_sizes = get_yolo_grid_sizes(
218 | self.model, self.input_shape[0], self.input_shape[1])
219 | self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers(
220 | self.engine, grid_sizes)
221 |
222 | def __del__(self):
223 | """Free CUDA memories."""
224 | del self.outputs
225 | del self.inputs
226 | del self.stream
227 |
228 | def allocate_buffers(self, engine, grid_sizes):
229 | """Allocates all host/device in/out buffers required for an engine.
230 | Checked!
231 | """
232 |
233 | inputs = []
234 | outputs = []
235 | bindings = []
236 | output_idx = 0
237 | stream = cuda.Stream()
238 |
239 | for binding in engine:
240 | size = trt.volume(engine.get_binding_shape(
241 | binding)) * engine.max_batch_size
242 | dtype = trt.nptype(engine.get_binding_dtype(binding))
243 | # Allocate host and device buffers
244 | host_mem = cuda.pagelocked_empty(size, dtype)
245 | device_mem = cuda.mem_alloc(host_mem.nbytes)
246 |
247 | # Append the device buffer to device bindings.
248 | bindings.append(int(device_mem))
249 |
250 | # Append to the appropriate list.
251 | if engine.binding_is_input(binding):
252 | inputs.append(HostDeviceMem(host_mem, device_mem))
253 |
254 | else:
255 | # each grid has 3 anchors, each anchor generates a detection
256 | # output of 7 float32 values
257 | assert size == grid_sizes[output_idx] * \
258 | 3 * 7 * engine.max_batch_size
259 | outputs.append(HostDeviceMem(host_mem, device_mem))
260 | output_idx += 1
261 |
262 | return inputs, outputs, bindings, stream
263 |
264 | def detect(self, img, conf_th=0.3):
265 | """ Detect objects in the input image """
266 |
267 | img_resized = _preprocess_yolo(img, self.input_shape)
268 | trt_outputs = []
269 |
270 | # Set host input to the image. The do_inference() function
271 | # will copy the input to the GPU before executing.
272 | self.inputs[0].host = np.ascontiguousarray(img_resized)
273 |
274 | if self.cuda_ctx:
275 | self.cuda_ctx.push()
276 |
277 | self.bindings = [int(i) for i in self.bindings]
278 |
279 | context = self.context
280 | bindings = self.bindings
281 | inputs = self.inputs
282 | outputs = self.outputs
283 | stream = self.stream
284 |
285 | if trt.__version__[0] < '7':
286 | # Transfer input data to the GPU.
287 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
288 | # Run inference.
289 | context.execute_async(batch_size=1,
290 | bindings=bindings,
291 | stream_handle=stream.handle)
292 | # Transfer predictions back from the GPU.
293 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
294 | # Synchronize the stream
295 | stream.synchronize()
296 | # Return only the host outputs.
297 | else:
298 | # Transfer input data to the GPU.
299 | [cuda.memcpy_htod_async(inp.device, inp.host, stream)
300 | for inp in inputs]
301 | # Run inference.
302 | context.execute_async_v2(
303 | bindings=self.bindings, stream_handle=stream.handle)
304 | # Transfer predictions back from the GPU.
305 | [cuda.memcpy_dtoh_async(out.host, out.device, stream)
306 | for out in outputs]
307 | # Synchronize the stream
308 | stream.synchronize()
309 |
310 | trt_outputs = [out.host for out in outputs]
311 |
312 | del context
313 |
314 | if self.cuda_ctx:
315 | self.cuda_ctx.pop()
316 |
317 | boxes, scores, classes = _postprocess_yolo(
318 | trt_outputs, img.shape[1], img.shape[0], conf_th)
319 |
320 | # clip x1, y1, x2, y2 within original image
321 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
322 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
323 | return boxes, scores, classes
324 |
--------------------------------------------------------------------------------
/utils/yolo_with_plugins_batch.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import ctypes
4 |
5 | import rospkg
6 | import numpy as np
7 | import cv2
8 | import tensorrt as trt
9 | import pycuda.driver as cuda
10 | import pycuda.autoinit
11 | from multiprocessing import Process
12 |
13 | rospack = rospkg.RosPack()
14 | plugin_path = rospack.get_path("yolov4_trt_ros") + "/plugins/libyolo_layer.so"
15 |
16 | try:
17 | ctypes.cdll.LoadLibrary(plugin_path)
18 | except OSError as e:
19 | raise SystemExit('ERROR: failed to load ./plugins/libyolo_layer.so. '
20 | 'Did you forget to do a "make" in the "./plugins/" '
21 | 'subdirectory?')
22 |
23 |
24 | def _preprocess_yolo(img, input_shape):
25 | """Preprocess an image before TRT YOLO inferencing.
26 |
27 | # Args
28 | img: int8 numpy array of shape (img_h, img_w, 3)
29 | input_shape: a tuple of (H, W)
30 |
31 | # Returns
32 | preprocessed img: float32 numpy array of shape (3, H, W)
33 | """
34 | img = cv2.resize(img, (input_shape[1], input_shape[0]))
35 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
36 | img = img.transpose((2, 0, 1)).astype(np.float32)
37 | img /= 255.0
38 | return img
39 |
40 |
41 | def _nms_boxes(detections, nms_threshold):
42 | """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding
43 | boxes with their confidence scores and return an array with the
44 | indexes of the bounding boxes we want to keep.
45 |
46 | # Args
47 | detections: Nx7 numpy arrays of
48 | [[x, y, w, h, box_confidence, class_id, class_prob],
49 | ......]
50 | """
51 | x_coord = detections[:, 0]
52 | y_coord = detections[:, 1]
53 | width = detections[:, 2]
54 | height = detections[:, 3]
55 | box_confidences = detections[:, 4]
56 |
57 | areas = width * height
58 | ordered = box_confidences.argsort()[::-1]
59 |
60 | keep = list()
61 | while ordered.size > 0:
62 | # Index of the current element:
63 | i = ordered[0]
64 | keep.append(i)
65 | xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
66 | yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
67 | xx2 = np.minimum(x_coord[i] + width[i],
68 | x_coord[ordered[1:]] + width[ordered[1:]])
69 | yy2 = np.minimum(y_coord[i] + height[i],
70 | y_coord[ordered[1:]] + height[ordered[1:]])
71 |
72 | width1 = np.maximum(0.0, xx2 - xx1 + 1)
73 | height1 = np.maximum(0.0, yy2 - yy1 + 1)
74 | intersection = width1 * height1
75 | union = (areas[i] + areas[ordered[1:]] - intersection)
76 | iou = intersection / union
77 | indexes = np.where(iou <= nms_threshold)[0]
78 | ordered = ordered[indexes + 1]
79 |
80 | keep = np.array(keep)
81 | return keep
82 |
83 |
84 | def _postprocess_yolo(trt_outputs, img_w, img_h, conf_th, nms_threshold=0.5):
85 | """Postprocess TensorRT outputs.
86 |
87 | # Args
88 | trt_outputs: a list of 2 or 3 tensors, where each tensor
89 | contains a multiple of 7 float32 numbers in
90 | the order of [x, y, w, h, box_confidence, class_id, class_prob]
91 | conf_th: confidence threshold
92 |
93 | # Returns
94 | boxes, scores, classes (after NMS)
95 | """
96 | # concatenate outputs of all yolo layers
97 | detections = np.concatenate(
98 | [o.reshape(-1, 7) for o in trt_outputs], axis=0)
99 |
100 | # drop detections with score lower than conf_th
101 | box_scores = detections[:, 4] * detections[:, 6]
102 | pos = np.where(box_scores >= conf_th)
103 | detections = detections[pos]
104 |
105 | # scale x, y, w, h from [0, 1] to pixel values
106 | detections[:, 0] *= img_w
107 | detections[:, 1] *= img_h
108 | detections[:, 2] *= img_w
109 | detections[:, 3] *= img_h
110 |
111 | # NMS
112 | nms_detections = np.zeros((0, 7), dtype=detections.dtype)
113 | for class_id in set(detections[:, 5]):
114 | idxs = np.where(detections[:, 5] == class_id)
115 | cls_detections = detections[idxs]
116 | keep = _nms_boxes(cls_detections, nms_threshold)
117 | nms_detections = np.concatenate(
118 | [nms_detections, cls_detections[keep]], axis=0)
119 | if len(nms_detections) == 0:
120 | boxes = np.zeros((0, 4), dtype=np.int)
121 | scores = np.zeros((0, 1), dtype=np.float32)
122 | classes = np.zeros((0, 1), dtype=np.float32)
123 | else:
124 | xx = nms_detections[:, 0].reshape(-1, 1)
125 | yy = nms_detections[:, 1].reshape(-1, 1)
126 | ww = nms_detections[:, 2].reshape(-1, 1)
127 | hh = nms_detections[:, 3].reshape(-1, 1)
128 | boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5
129 | boxes = boxes.astype(np.int)
130 | scores = nms_detections[:, 4] * nms_detections[:, 6]
131 | classes = nms_detections[:, 5]
132 | return boxes, scores, classes
133 |
134 |
135 | class HostDeviceMem(object):
136 | """Simple helper data class that's a little nicer to use than a 2-tuple."""
137 |
138 | def __init__(self, host_mem, device_mem):
139 | self.host = host_mem
140 | self.device = device_mem
141 |
142 | def __str__(self):
143 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
144 |
145 | def __repr__(self):
146 | return self.__str__()
147 |
148 |
149 | def allocate_buffers(engine, grid_sizes):
150 | """Allocates all host/device in/out buffers required for an engine.
151 | Checked!
152 | """
153 |
154 | inputs = []
155 | outputs = []
156 | bindings = []
157 | output_idx = 0
158 | stream = cuda.Stream()
159 | for binding in engine:
160 | size = trt.volume(engine.get_binding_shape(binding)) * \
161 | engine.max_batch_size
162 | dtype = trt.nptype(engine.get_binding_dtype(binding))
163 | # Allocate host and device buffers
164 | host_mem = cuda.pagelocked_empty(size, dtype)
165 | device_mem = cuda.mem_alloc(host_mem.nbytes)
166 |
167 | # Append the device buffer to device bindings.
168 | bindings.append(int(device_mem))
169 |
170 | # Append to the appropriate list.
171 | if engine.binding_is_input(binding):
172 | inputs.append(HostDeviceMem(host_mem, device_mem))
173 |
174 | else:
175 | # each grid has 3 anchors, each anchor generates a detection
176 | # output of 7 float32 values
177 | assert size == grid_sizes[output_idx] * \
178 | 3 * 7 * engine.max_batch_size
179 | outputs.append(HostDeviceMem(host_mem, device_mem))
180 | output_idx += 1
181 |
182 | return inputs, outputs, bindings, stream
183 |
184 |
185 | def get_yolo_grid_sizes(model_name, h, w):
186 | """Get grid sizes (w*h) for all yolo layers in the model."""
187 | if 'yolov3' in model_name:
188 | if 'tiny' in model_name:
189 | return [(h // 32) * (w // 32), (h // 16) * (w // 16)]
190 | else:
191 | return [(h // 32) * (w // 32), (h // 16) * (w // 16), (h // 8) * (w // 8)]
192 | elif 'yolov4' in model_name:
193 | if 'tiny' in model_name:
194 | return [(h // 32) * (w // 32), (h // 16) * (w // 16)]
195 | else:
196 | return [(h // 8) * (w // 8), (h // 16) * (w // 16), (w // 32) * (h // 32)]
197 | else:
198 | raise ValueError('ERROR: unknown model (%s)!' % args.model)
199 |
200 |
201 | class TrtYOLO(object):
202 | """TrtYOLO class encapsulates things needed to run TRT YOLO."""
203 |
204 | def _load_engine(self):
205 | TRTbin = '%s.trt' % self.model
206 | with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
207 | return runtime.deserialize_cuda_engine(f.read())
208 |
209 | def __init__(self, model, input_shape, category_num=80, cuda_ctx=None):
210 | """Initialize TensorRT plugins, engine and context."""
211 | self.model = model
212 | self.input_shape = input_shape
213 | self.category_num = category_num
214 | self.cuda_ctx = cuda.Device(0).make_context()
215 | self.trt_logger = trt.Logger(trt.Logger.INFO)
216 | self.engine = self._load_engine()
217 | self.context = self.engine.create_execution_context()
218 | grid_sizes = get_yolo_grid_sizes(
219 | self.model, self.input_shape[0], self.input_shape[1])
220 | self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers(
221 | self.engine, grid_sizes)
222 | self.overall_boxes = []
223 | self.overall_scores = []
224 | self.overall_classes = []
225 |
226 | def __del__(self):
227 | """Free CUDA memories."""
228 | del self.outputs
229 | del self.inputs
230 | del self.stream
231 |
232 | def allocate_buffers(self, engine, grid_sizes):
233 | """Allocates all host/device in/out buffers required for an engine.
234 | Checked!
235 | """
236 |
237 | inputs = []
238 | outputs = []
239 | bindings = []
240 | output_idx = 0
241 | stream = cuda.Stream()
242 |
243 | for binding in engine:
244 | size = trt.volume(engine.get_binding_shape(
245 | binding)) * engine.max_batch_size
246 | dtype = trt.nptype(engine.get_binding_dtype(binding))
247 | # Allocate host and device buffers
248 | host_mem = cuda.pagelocked_empty(size, dtype)
249 | device_mem = cuda.mem_alloc(host_mem.nbytes)
250 |
251 | # Append the device buffer to device bindings.
252 | bindings.append(int(device_mem))
253 |
254 | # Append to the appropriate list.
255 | if engine.binding_is_input(binding):
256 | inputs.append(HostDeviceMem(host_mem, device_mem))
257 |
258 | else:
259 | # each grid has 3 anchors, each anchor generates a detection
260 | # output of 7 float32 values
261 | assert size == grid_sizes[output_idx] * \
262 | 3 * 7 * engine.max_batch_size
263 | outputs.append(HostDeviceMem(host_mem, device_mem))
264 | output_idx += 1
265 |
266 | return inputs, outputs, bindings, stream
267 |
268 | def detect1(self, img, conf_th=0.3):
269 | """ Detect objects in the input image """
270 |
271 | img_resized = _preprocess_yolo(img, self.input_shape)
272 | trt_outputs = []
273 |
274 | # Set host input to the image. The do_inference() function
275 | # will copy the input to the GPU before executing.
276 | self.inputs[0].host = np.ascontiguousarray(img_resized)
277 |
278 | # if self.cuda_ctx:
279 | self.cuda_ctx.push()
280 |
281 | self.bindings = [int(i) for i in self.bindings]
282 |
283 | context = self.context
284 | bindings = self.bindings
285 | inputs = self.inputs
286 | outputs = self.outputs
287 | stream = self.stream
288 |
289 | # Transfer input data to the GPU.
290 | [cuda.memcpy_htod_async(inp.device, inp.host, stream)
291 | for inp in inputs]
292 | # Run inference.
293 | context.execute_async_v2(
294 | bindings=self.bindings, stream_handle=stream.handle)
295 | # Transfer predictions back from the GPU.
296 | [cuda.memcpy_dtoh_async(out.host, out.device, stream)
297 | for out in outputs]
298 | # Synchronize the stream
299 | stream.synchronize()
300 |
301 | trt_outputs = [out.host for out in outputs]
302 |
303 | del context
304 |
305 | # if self.cuda_ctx:
306 | self.cuda_ctx.pop()
307 |
308 | boxes, scores, classes = _postprocess_yolo(
309 | trt_outputs, img.shape[1], img.shape[0], conf_th)
310 |
311 | # clip x1, y1, x2, y2 within original image
312 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
313 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
314 | return boxes, scores, classes
315 |
316 | def detect2(self, img, conf_th=0.3):
317 | """ Detect objects in the input image """
318 |
319 | img_resized = _preprocess_yolo(img, self.input_shape)
320 | trt_outputs = []
321 |
322 | # Set host input to the image. The do_inference() function
323 | # will copy the input to the GPU before executing.
324 | self.inputs[0].host = np.ascontiguousarray(img_resized)
325 |
326 | # if self.cuda_ctx:
327 | self.cuda_ctx.push()
328 |
329 | self.bindings = [int(i) for i in self.bindings]
330 |
331 | context = self.context
332 | bindings = self.bindings
333 | inputs = self.inputs
334 | outputs = self.outputs
335 | stream = self.stream
336 |
337 | # Transfer input data to the GPU.
338 | [cuda.memcpy_htod_async(inp.device, inp.host, stream)
339 | for inp in inputs]
340 | # Run inference.
341 | context.execute_async_v2(
342 | bindings=self.bindings, stream_handle=stream.handle)
343 | # Transfer predictions back from the GPU.
344 | [cuda.memcpy_dtoh_async(out.host, out.device, stream)
345 | for out in outputs]
346 | # Synchronize the stream
347 | stream.synchronize()
348 |
349 | trt_outputs = [out.host for out in outputs]
350 |
351 | del context
352 |
353 | # if self.cuda_ctx:
354 | self.cuda_ctx.pop()
355 |
356 | boxes, scores, classes = _postprocess_yolo(
357 | trt_outputs, img.shape[1], img.shape[0], conf_th)
358 |
359 | # clip x1, y1, x2, y2 within original image
360 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
361 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
362 | return boxes, scores, classes
363 |
364 | def detect3(self, img, conf_th=0.3):
365 | """ Detect objects in the input image """
366 |
367 | img_resized = _preprocess_yolo(img, self.input_shape)
368 | trt_outputs = []
369 |
370 | # Set host input to the image. The do_inference() function
371 | # will copy the input to the GPU before executing.
372 | self.inputs[0].host = np.ascontiguousarray(img_resized)
373 |
374 | # if self.cuda_ctx:
375 | self.cuda_ctx.push()
376 |
377 | self.bindings = [int(i) for i in self.bindings]
378 |
379 | context = self.context
380 | bindings = self.bindings
381 | inputs = self.inputs
382 | outputs = self.outputs
383 | stream = self.stream
384 |
385 | # Transfer input data to the GPU.
386 | [cuda.memcpy_htod_async(inp.device, inp.host, stream)
387 | for inp in inputs]
388 | # Run inference.
389 | context.execute_async_v2(
390 | bindings=self.bindings, stream_handle=stream.handle)
391 | # Transfer predictions back from the GPU.
392 | [cuda.memcpy_dtoh_async(out.host, out.device, stream)
393 | for out in outputs]
394 | # Synchronize the stream
395 | stream.synchronize()
396 |
397 | trt_outputs = [out.host for out in outputs]
398 |
399 | del context
400 |
401 | # if self.cuda_ctx:
402 | self.cuda_ctx.pop()
403 |
404 | boxes, scores, classes = _postprocess_yolo(
405 | trt_outputs, img.shape[1], img.shape[0], conf_th)
406 |
407 | # clip x1, y1, x2, y2 within original image
408 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
409 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
410 | return boxes, scores, classes
411 |
412 | def detect4(self, img, conf_th=0.3):
413 | """ Detect objects in the input image """
414 |
415 | img_resized = _preprocess_yolo(img, self.input_shape)
416 | trt_outputs = []
417 |
418 | # Set host input to the image. The do_inference() function
419 | # will copy the input to the GPU before executing.
420 | self.inputs[0].host = np.ascontiguousarray(img_resized)
421 |
422 | # if self.cuda_ctx:
423 | self.cuda_ctx.push()
424 |
425 | self.bindings = [int(i) for i in self.bindings]
426 |
427 | context = self.context
428 | bindings = self.bindings
429 | inputs = self.inputs
430 | outputs = self.outputs
431 | stream = self.stream
432 |
433 | # Transfer input data to the GPU.
434 | [cuda.memcpy_htod_async(inp.device, inp.host, stream)
435 | for inp in inputs]
436 | # Run inference.
437 | context.execute_async_v2(
438 | bindings=self.bindings, stream_handle=stream.handle)
439 | # Transfer predictions back from the GPU.
440 | [cuda.memcpy_dtoh_async(out.host, out.device, stream)
441 | for out in outputs]
442 | # Synchronize the stream
443 | stream.synchronize()
444 |
445 | trt_outputs = [out.host for out in outputs]
446 |
447 | del context
448 |
449 | # if self.cuda_ctx:
450 | self.cuda_ctx.pop()
451 |
452 | boxes, scores, classes = _postprocess_yolo(
453 | trt_outputs, img.shape[1], img.shape[0], conf_th)
454 |
455 | # clip x1, y1, x2, y2 within original image
456 | boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, img.shape[1]-1)
457 | boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, img.shape[0]-1)
458 | return boxes, scores, classes
459 |
--------------------------------------------------------------------------------
/yolo/__pycache__/plugins.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/indra4837/yolov4_trt_ros/4181eada9ae7d53fe61b6864c4d507ae9f6f7146/yolo/__pycache__/plugins.cpython-36.pyc
--------------------------------------------------------------------------------
/yolo/convert_yolo_trt.sh:
--------------------------------------------------------------------------------
1 | echo "YOLOv3 or YOLOv4. Input 3 or 4"
2 | read model_type
3 |
4 | echo "Do you want to download the model? [Y/N]"
5 | read download
6 |
7 | if [[ $download == 'Y' ]]
8 | then
9 | if [[ $model_type == 3 ]]
10 | then
11 | wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -q --show-progress --no-clobber
12 | wget https://pjreddie.com/media/files/yolov3.weights -q --show-progress --no-clobber
13 | else
14 | wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg -q --show-progress --no-clobber
15 | wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -q --show-progress --no-clobber
16 | fi
17 | fi
18 |
19 | echo "What is the input shape? Input 288 or 416 or 608"
20 | read input_shape
21 |
22 | if [[ $model_type == 3 ]]
23 | then
24 | if [[ $input_shape == 288 ]]
25 | then
26 | echo "Creating yolov3-288.cfg and yolov3-288.weights"
27 | cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov3-288.cfg
28 | ln -sf yolov3.weights yolov3-288.weights
29 | fi
30 | if [[ $input_shape == 416 ]]
31 | then
32 | echo "Creating yolov3-416.cfg and yolov3-416.weights"
33 | cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov3-416.cfg
34 | ln -sf yolov3.weights yolov3-416.weights
35 | fi
36 | if [[ $input_shape == 608 ]]
37 | then
38 | echo "Creating yolov3-608.cfg and yolov3-608.weights"
39 | cat yolov3.cfg | sed -e '2s/batch=64/batch=1/' > yolov3-608.cfg
40 | ln -sf yolov3.weights yolov3-608.weights
41 | fi
42 | else
43 | if [[ $input_shape == 288 ]]
44 | then
45 | echo "Creating yolov4-288.cfg and yolov4-288.weights"
46 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov4-288.cfg
47 | ln -sf yolov4.weights yolov4-288.weights
48 | fi
49 | if [[ $input_shape == 416 ]]
50 | then
51 | echo "Creating yolov4-416.cfg and yolov4-416.weights"
52 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov4-416.cfg
53 | ln -sf yolov4.weights yolov4-416.weights
54 | fi
55 | if [[ $input_shape == 608 ]]
56 | then
57 | echo "Creating yolov4-608.cfg and yolov4-608.weights"
58 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' > yolov4-608.cfg
59 | ln -sf yolov4.weights yolov4-608.weights
60 | fi
61 | fi
62 |
63 | echo "How many categories are there?"
64 | read category_num
65 | model_name="yolov${model_type}-${input_shape}"
66 |
67 | # convert from yolo to onnx
68 | python3 yolo_to_onnx.py -m $model_name -c $category_num
69 |
70 | echo "Done converting to .onnx"
71 | echo "..."
72 | echo "Now converting to .trt"
73 |
74 | # convert from onnx to trt
75 | python3 onnx_to_tensorrt.py -m $model_name -c $category_num --verbose
76 |
77 | echo "Conversion from yolo to trt done!"
78 |
--------------------------------------------------------------------------------
/yolo/download_yolo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | # yolov3-tiny
6 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-tiny.cfg -q --show-progress --no-clobber
7 | #wget https://pjreddie.com/media/files/yolov3-tiny.weights -q --show-progress --no-clobber
8 |
9 | # yolov3
10 | #wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg -q --show-progress --no-clobber
11 | #wget https://pjreddie.com/media/files/yolov3.weights -q --show-progress --no-clobber
12 |
13 | # yolov3-spp
14 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3-spp.cfg -q --show-progress --no-clobber
15 | #wget https://pjreddie.com/media/files/yolov3-spp.weights -q --show-progress --no-clobber
16 |
17 | # yolov4-tiny
18 | #wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg -q --show-progress --no-clobber
19 | #wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights -q --show-progress --no-clobber
20 |
21 | # yolov4
22 | wget https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg -q --show-progress --no-clobber
23 | wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -q --show-progress --no-clobber
24 |
25 | #echo
26 | #echo "Creating yolov3-tiny-288.cfg and yolov3-tiny-288.weights"
27 | #cat yolov3-tiny.cfg | sed -e '8s/width=416/width=288/' | sed -e '9s/height=416/height=288/' > yolov3-tiny-288.cfg
28 | #echo >> yolov3-tiny-288.cfg
29 | #ln -sf yolov3-tiny.weights yolov3-tiny-288.weights
30 | #echo "Creating yolov3-tiny-416.cfg and yolov3-tiny-416.weights"
31 | #cp yolov3-tiny.cfg yolov3-tiny-416.cfg
32 | #echo >> yolov3-tiny-416.cfg
33 | #ln -sf yolov3-tiny.weights yolov3-tiny-416.weights
34 |
35 | #echo "Creating yolov3-288.cfg and yolov3-288.weights"
36 | #cat yolov3.cfg | sed -e '8s/width=608/width=288/' | sed -e '9s/height=608/height=288/' > yolov3-288.cfg
37 | #ln -sf yolov3.weights yolov3-288.weights
38 | #echo "Creating yolov3-416.cfg and yolov3-416.weights"
39 | #cat yolov3.cfg | sed -e '8s/width=608/width=416/' | sed -e '9s/height=608/height=416/' > yolov3-416.cfg
40 | #ln -sf yolov3.weights yolov3-416.weights
41 | #echo "Creating yolov3-608.cfg and yolov3-608.weights"
42 | #cp yolov3.cfg yolov3-608.cfg
43 | #ln -sf yolov3.weights yolov3-608.weights
44 |
45 | #echo "Creating yolov3-spp-288.cfg and yolov3-spp-288.weights"
46 | #cat yolov3-spp.cfg | sed -e '8s/width=608/width=288/' | sed -e '9s/height=608/height=288/' > yolov3-spp-288.cfg
47 | #ln -sf yolov3-spp.weights yolov3-spp-288.weights
48 | #echo "Creating yolov3-spp-416.cfg and yolov3-spp-416.weights"
49 | #cat yolov3-spp.cfg | sed -e '8s/width=608/width=416/' | sed -e '9s/height=608/height=416/' > yolov3-spp-416.cfg
50 | #ln -sf yolov3-spp.weights yolov3-spp-416.weights
51 | #echo "Creating yolov3-spp-608.cfg and yolov3-spp-608.weights"
52 | #cp yolov3-spp.cfg yolov3-spp-608.cfg
53 | #ln -sf yolov3-spp.weights yolov3-spp-608.weights
54 |
55 | #echo "Creating yolov4-tiny-288.cfg and yolov4-tiny-288.weights"
56 | #cat yolov4-tiny.cfg | sed -e '6s/batch=64/batch=1/' | sed -e '8s/width=416/width=288/' | sed -e '9s/height=416/height=288/' > yolov4-tiny-288.cfg
57 | #echo >> yolov4-tiny-288.cfg
58 | #ln -sf yolov4-tiny.weights yolov4-tiny-288.weights
59 | #echo "Creating yolov4-tiny-416.cfg and yolov4-tiny-416.weights"
60 | #cat yolov4-tiny.cfg | sed -e '6s/batch=64/batch=1/' > yolov4-tiny-416.cfg
61 | #echo >> yolov4-tiny-416.cfg
62 | #ln -sf yolov4-tiny.weights yolov4-tiny-416.weights
63 |
64 | #echo "Creating yolov4-288.cfg and yolov4-288.weights"
65 | #cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=288/' | sed -e '8s/height=608/height=288/' > yolov4-288.cfg
66 | #ln -sf yolov4.weights yolov4-288.weights
67 | echo "Creating yolov4-416.cfg and yolov4-416.weights"
68 | cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' | sed -e '7s/width=608/width=416/' | sed -e '8s/height=608/height=416/' > yolov4-416.cfg
69 | ln -sf yolov4.weights yolov4-416.weights
70 | #echo "Creating yolov4-608.cfg and yolov4-608.weights"
71 | #cat yolov4.cfg | sed -e '2s/batch=64/batch=1/' > yolov4-608.cfg
72 | #ln -sf yolov4.weights yolov4-608.weights
73 |
74 | #echo
75 | #echo "Done."
76 |
77 |
--------------------------------------------------------------------------------
/yolo/onnx_to_tensorrt.py:
--------------------------------------------------------------------------------
1 | # onnx_to_tensorrt.py
2 | #
3 | # Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
4 | #
5 | # NOTICE TO LICENSEE:
6 | #
7 | # This source code and/or documentation ("Licensed Deliverables") are
8 | # subject to NVIDIA intellectual property rights under U.S. and
9 | # international Copyright laws.
10 | #
11 | # These Licensed Deliverables contained herein is PROPRIETARY and
12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and
13 | # conditions of a form of NVIDIA software license agreement by and
14 | # between NVIDIA and Licensee ("License Agreement") or electronically
15 | # accepted by Licensee. Notwithstanding any terms or conditions to
16 | # the contrary in the License Agreement, reproduction or disclosure
17 | # of the Licensed Deliverables to any third party without the express
18 | # written consent of NVIDIA is prohibited.
19 | #
20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
33 | # OF THESE LICENSED DELIVERABLES.
34 | #
35 | # U.S. Government End Users. These Licensed Deliverables are a
36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
37 | # 1995), consisting of "commercial computer software" and "commercial
38 | # computer software documentation" as such terms are used in 48
39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
40 | # only as a commercial end item. Consistent with 48 C.F.R.12.212 and
41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
42 | # U.S. Government End Users acquire the Licensed Deliverables with
43 | # only those rights set forth herein.
44 | #
45 | # Any use of the Licensed Deliverables in individual and commercial
46 | # software must include, in the user documentation and internal
47 | # comments to the code, the above Disclaimer and U.S. Government End
48 | # Users Notice.
49 | #
50 |
51 |
52 | from __future__ import print_function
53 |
54 | import os
55 | import argparse
56 |
57 | import tensorrt as trt
58 |
59 | from plugins import add_yolo_plugins
60 |
61 |
62 | EXPLICIT_BATCH = []
63 | if trt.__version__[0] >= '7':
64 | EXPLICIT_BATCH.append(
65 | 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
66 |
67 |
68 | def build_engine(onnx_file_path, category_num=80, verbose=False):
69 | """Build a TensorRT engine from an ONNX file."""
70 | TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
71 | with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
72 | builder.max_workspace_size = 1 << 28
73 | builder.max_batch_size = 1
74 | builder.fp16_mode = True
75 | #builder.strict_type_constraints = True
76 |
77 | # Parse model file
78 | print('Loading ONNX file from path {}...'.format(onnx_file_path))
79 | with open(onnx_file_path, 'rb') as model:
80 | if not parser.parse(model.read()):
81 | print('ERROR: Failed to parse the ONNX file.')
82 | for error in range(parser.num_errors):
83 | print(parser.get_error(error))
84 | return None
85 | if trt.__version__[0] >= '7':
86 | # The actual yolo*.onnx is generated with batch size 64.
87 | # Reshape input to batch size 1
88 | shape = list(network.get_input(0).shape)
89 | shape[0] = 1
90 | network.get_input(0).shape = shape
91 |
92 | print('Adding yolo_layer plugins...')
93 | model_name = onnx_file_path[:-5]
94 | network = add_yolo_plugins(
95 | network, model_name, category_num, TRT_LOGGER)
96 |
97 | print('Building an engine. This would take a while...')
98 | print('(Use "--verbose" to enable verbose logging.)')
99 | engine = builder.build_cuda_engine(network)
100 | print('Completed creating engine.')
101 | return engine
102 |
103 |
104 | def main():
105 | """Create a TensorRT engine for ONNX-based YOLO."""
106 | parser = argparse.ArgumentParser()
107 | parser.add_argument(
108 | '-v', '--verbose', action='store_true',
109 | help='enable verbose output (for debugging)')
110 | parser.add_argument(
111 | '-c', '--category_num', type=int, default=80,
112 | help='number of object categories [80]')
113 | parser.add_argument(
114 | '-m', '--model', type=str, required=True,
115 | help=('[yolov3|yolov3-tiny|yolov3-spp|yolov4|yolov4-tiny]-'
116 | '[{dimension}], where dimension could be a single '
117 | 'number (e.g. 288, 416, 608) or WxH (e.g. 416x256)'))
118 | args = parser.parse_args()
119 |
120 | onnx_file_path = '%s.onnx' % args.model
121 | if not os.path.isfile(onnx_file_path):
122 | raise SystemExit('ERROR: file (%s) not found! You might want to run yolo_to_onnx.py first to generate it.' % onnx_file_path)
123 | engine_file_path = '%s.trt' % args.model
124 | engine = build_engine(onnx_file_path, args.category_num, args.verbose)
125 | with open(engine_file_path, 'wb') as f:
126 | f.write(engine.serialize())
127 | print('Serialized the TensorRT engine to file: %s' % engine_file_path)
128 |
129 |
130 | if __name__ == '__main__':
131 | main()
132 |
--------------------------------------------------------------------------------
/yolo/plugins.py:
--------------------------------------------------------------------------------
1 | """plugins.py
2 |
3 | I referenced the code from https://github.com/dongfangduoshou123/YoloV3-TensorRT/blob/master/seralizeEngineFromPythonAPI.py
4 | """
5 |
6 |
7 | import ctypes
8 |
9 | import numpy as np
10 | import tensorrt as trt
11 |
12 | try:
13 | ctypes.cdll.LoadLibrary('../plugins/libyolo_layer.so')
14 | except OSError as e:
15 | raise SystemExit('ERROR: failed to load ../plugins/libyolo_layer.so. '
16 | 'Did you forget to do a "make" in the "../plugins/" '
17 | 'subdirectory?')
18 |
19 |
20 | def get_input_wh(model_name):
21 | """Get input_width and input_height of the model."""
22 | yolo_dim = model_name.split('-')[-1]
23 | if 'x' in yolo_dim:
24 | dim_split = yolo_dim.split('x')
25 | if len(dim_split) != 2:
26 | raise ValueError('ERROR: bad yolo_dim (%s)!' % yolo_dim)
27 | w, h = int(dim_split[0]), int(dim_split[1])
28 | else:
29 | h = w = int(yolo_dim)
30 | if h % 32 != 0 or w % 32 != 0:
31 | raise ValueError('ERROR: bad yolo_dim (%s)!' % yolo_dim)
32 | return w, h
33 |
34 |
35 | def get_yolo_whs(model_name, w, h):
36 | """Get yolo_width and yolo_height for all yolo layers in the model."""
37 | if 'yolov3' in model_name:
38 | if 'tiny' in model_name:
39 | return [[w // 32, h // 32], [w // 16, h // 16]]
40 | else:
41 | return [[w // 32, h // 32], [w // 16, h // 16], [w // 8, h // 8]]
42 | elif 'yolov4' in model_name:
43 | if 'tiny' in model_name:
44 | return [[w // 32, h // 32], [w // 16, h // 16]]
45 | else:
46 | return [[w // 8, h // 8], [w // 16, h // 16], [w // 32, h // 32]]
47 | else:
48 | raise ValueError('ERROR: unknown model (%s)!' % args.model)
49 |
50 |
51 | def verify_classes(model_name, num_classes):
52 | """Verify 'classes=??' in cfg matches user-specified num_classes."""
53 | cfg_file_path = model_name + '.cfg'
54 | with open(cfg_file_path, 'r') as f:
55 | cfg_lines = f.readlines()
56 | classes_lines = [l.strip() for l in cfg_lines if l.startswith('classes')]
57 | classes = [int(l.split('=')[-1]) for l in classes_lines]
58 | return all([c == num_classes for c in classes])
59 |
60 |
61 | def get_anchors(model_name):
62 | """Get anchors of all yolo layers from the cfg file."""
63 | cfg_file_path = model_name + '.cfg'
64 | with open(cfg_file_path, 'r') as f:
65 | cfg_lines = f.readlines()
66 | yolo_lines = [l.strip() for l in cfg_lines if l.startswith('[yolo]')]
67 | mask_lines = [l.strip() for l in cfg_lines if l.startswith('mask')]
68 | anch_lines = [l.strip() for l in cfg_lines if l.startswith('anchors')]
69 | assert len(mask_lines) == len(yolo_lines)
70 | assert len(anch_lines) == len(yolo_lines)
71 | anchor_list = eval('[%s]' % anch_lines[0].split('=')[-1])
72 | mask_strs = [l.split('=')[-1] for l in mask_lines]
73 | masks = [eval('[%s]' % s) for s in mask_strs]
74 | anchors = []
75 | for mask in masks:
76 | curr_anchors = []
77 | for m in mask:
78 | curr_anchors.append(anchor_list[m * 2])
79 | curr_anchors.append(anchor_list[m * 2 + 1])
80 | anchors.append(curr_anchors)
81 | return anchors
82 |
83 |
84 | def get_scales(model_name):
85 | """Get scale_x_y's of all yolo layers from the cfg file."""
86 | cfg_file_path = model_name + '.cfg'
87 | with open(cfg_file_path, 'r') as f:
88 | cfg_lines = f.readlines()
89 | yolo_lines = [l.strip() for l in cfg_lines if l.startswith('[yolo]')]
90 | scale_lines = [l.strip() for l in cfg_lines if l.startswith('scale_x_y')]
91 | if len(scale_lines) == 0:
92 | return [1.0] * len(yolo_lines)
93 | else:
94 | assert len(scale_lines) == len(yolo_lines)
95 | return [float(l.split('=')[-1]) for l in scale_lines]
96 |
97 |
98 | def get_plugin_creator(plugin_name, logger):
99 | """Get the TensorRT plugin creator."""
100 | trt.init_libnvinfer_plugins(logger, '')
101 | plugin_creator_list = trt.get_plugin_registry().plugin_creator_list
102 | for c in plugin_creator_list:
103 | if c.name == plugin_name:
104 | return c
105 | return None
106 |
107 |
108 | def add_yolo_plugins(network, model_name, num_classes, logger):
109 | """Add yolo plugins into a TensorRT network."""
110 | input_width, input_height = get_input_wh(model_name)
111 | yolo_whs = get_yolo_whs(model_name, input_width, input_height)
112 | if not verify_classes(model_name, num_classes):
113 | raise ValueError('bad num_classes (%d)' % num_classes)
114 | anchors = get_anchors(model_name)
115 | if len(anchors) != len(yolo_whs):
116 | raise ValueError('bad number of yolo layers: %d vs. %d' %
117 | (len(anchors), len(yolo_whs)))
118 | if network.num_outputs != len(anchors):
119 | raise ValueError('bad number of network outputs: %d vs. %d' %
120 | (network.num_outputs, len(anchors)))
121 | scales = get_scales(model_name)
122 | if any([s < 1.0 for s in scales]):
123 | raise ValueError('bad scale_x_y: %s' % str(scales))
124 |
125 | plugin_creator = get_plugin_creator('YoloLayer_TRT', logger)
126 | if not plugin_creator:
127 | raise RuntimeError('cannot get YoloLayer_TRT plugin creator')
128 | old_tensors = [network.get_output(i) for i in range(network.num_outputs)]
129 | new_tensors = [None] * network.num_outputs
130 | for i, old_tensor in enumerate(old_tensors):
131 | new_tensors[i] = network.add_plugin_v2(
132 | [old_tensor],
133 | plugin_creator.create_plugin('YoloLayer_TRT', trt.PluginFieldCollection([
134 | trt.PluginField("yoloWidth", np.array(yolo_whs[i][0], dtype=np.int32), trt.PluginFieldType.INT32),
135 | trt.PluginField("yoloHeight", np.array(yolo_whs[i][1], dtype=np.int32), trt.PluginFieldType.INT32),
136 | trt.PluginField("inputWidth", np.array(input_width, dtype=np.int32), trt.PluginFieldType.INT32),
137 | trt.PluginField("inputHeight", np.array(input_height, dtype=np.int32), trt.PluginFieldType.INT32),
138 | trt.PluginField("numClasses", np.array(num_classes, dtype=np.int32), trt.PluginFieldType.INT32),
139 | trt.PluginField("numAnchors", np.array(len(anchors[i]) // 2, dtype=np.int32), trt.PluginFieldType.INT32),
140 | trt.PluginField("anchors", np.ascontiguousarray(anchors[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
141 | trt.PluginField("scaleXY", np.array(scales[i], dtype=np.float32), trt.PluginFieldType.FLOAT32),
142 | ]))
143 | ).get_output(0)
144 |
145 | for new_tensor in new_tensors:
146 | network.mark_output(new_tensor)
147 | for old_tensor in old_tensors:
148 | network.unmark_output(old_tensor)
149 |
150 | return network
151 |
--------------------------------------------------------------------------------
/yolo/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.15.1
2 | onnx==1.4.1
3 | pycuda>=2017.1.1
4 |
--------------------------------------------------------------------------------
/yolo/yolo_to_onnx.py:
--------------------------------------------------------------------------------
1 | # yolo_to_onnx.py
2 | #
3 | # Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
4 | #
5 | # NOTICE TO LICENSEE:
6 | #
7 | # This source code and/or documentation ("Licensed Deliverables") are
8 | # subject to NVIDIA intellectual property rights under U.S. and
9 | # international Copyright laws.
10 | #
11 | # These Licensed Deliverables contained herein is PROPRIETARY and
12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and
13 | # conditions of a form of NVIDIA software license agreement by and
14 | # between NVIDIA and Licensee ("License Agreement") or electronically
15 | # accepted by Licensee. Notwithstanding any terms or conditions to
16 | # the contrary in the License Agreement, reproduction or disclosure
17 | # of the Licensed Deliverables to any third party without the express
18 | # written consent of NVIDIA is prohibited.
19 | #
20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
33 | # OF THESE LICENSED DELIVERABLES.
34 | #
35 | # U.S. Government End Users. These Licensed Deliverables are a
36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
37 | # 1995), consisting of "commercial computer software" and "commercial
38 | # computer software documentation" as such terms are used in 48
39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
40 | # only as a commercial end item. Consistent with 48 C.F.R.12.212 and
41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
42 | # U.S. Government End Users acquire the Licensed Deliverables with
43 | # only those rights set forth herein.
44 | #
45 | # Any use of the Licensed Deliverables in individual and commercial
46 | # software must include, in the user documentation and internal
47 | # comments to the code, the above Disclaimer and U.S. Government End
48 | # Users Notice.
49 | #
50 |
51 |
52 | from __future__ import print_function
53 |
54 | import os
55 | import sys
56 | import hashlib
57 | import argparse
58 | from collections import OrderedDict
59 |
60 | import numpy as np
61 | import onnx
62 | from onnx import helper, TensorProto
63 |
64 | from plugins import verify_classes, get_input_wh
65 |
66 |
67 | class DarkNetParser(object):
68 | """Definition of a parser for DarkNet-based YOLO model."""
69 |
70 | def __init__(self, supported_layers):
71 | """Initializes a DarkNetParser object.
72 |
73 | Keyword argument:
74 | supported_layers -- a string list of supported layers in DarkNet naming convention,
75 | parameters are only added to the class dictionary if a parsed layer is included.
76 | """
77 |
78 | # A list of YOLO layers containing dictionaries with all layer
79 | # parameters:
80 | self.layer_configs = OrderedDict()
81 | self.supported_layers = supported_layers
82 | self.layer_counter = 0
83 |
84 | def parse_cfg_file(self, cfg_file_path):
85 | """Takes the yolov?.cfg file and parses it layer by layer,
86 | appending each layer's parameters as a dictionary to layer_configs.
87 |
88 | Keyword argument:
89 | cfg_file_path
90 | """
91 | with open(cfg_file_path, 'r') as cfg_file:
92 | remainder = cfg_file.read()
93 | while remainder is not None:
94 | layer_dict, layer_name, remainder = self._next_layer(remainder)
95 | if layer_dict is not None:
96 | self.layer_configs[layer_name] = layer_dict
97 | return self.layer_configs
98 |
99 | def _next_layer(self, remainder):
100 | """Takes in a string and segments it by looking for DarkNet delimiters.
101 | Returns the layer parameters and the remaining string after the last delimiter.
102 | Example for the first Conv layer in yolo.cfg ...
103 |
104 | [convolutional]
105 | batch_normalize=1
106 | filters=32
107 | size=3
108 | stride=1
109 | pad=1
110 | activation=leaky
111 |
112 | ... becomes the following layer_dict return value:
113 | {'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32,
114 | 'batch_normalize': 1, 'type': 'convolutional', 'size': 3}.
115 |
116 | '001_convolutional' is returned as layer_name, and all lines that follow in yolo.cfg
117 | are returned as the next remainder.
118 |
119 | Keyword argument:
120 | remainder -- a string with all raw text after the previously parsed layer
121 | """
122 | remainder = remainder.split('[', 1)
123 | if len(remainder) == 2:
124 | remainder = remainder[1]
125 | else:
126 | return None, None, None
127 | remainder = remainder.split(']', 1)
128 | if len(remainder) == 2:
129 | layer_type, remainder = remainder
130 | else:
131 | return None, None, None
132 | if remainder.replace(' ', '')[0] == '#':
133 | remainder = remainder.split('\n', 1)[1]
134 |
135 | out = remainder.split('\n\n', 1)
136 | if len(out) == 2:
137 | layer_param_block, remainder = out[0], out[1]
138 | else:
139 | layer_param_block, remainder = out[0], ''
140 | if layer_type == 'yolo':
141 | layer_param_lines = []
142 | else:
143 | layer_param_lines = layer_param_block.split('\n')[1:]
144 | layer_name = str(self.layer_counter).zfill(3) + '_' + layer_type
145 | layer_dict = dict(type=layer_type)
146 | if layer_type in self.supported_layers:
147 | for param_line in layer_param_lines:
148 | if param_line[0] == '#':
149 | continue
150 | param_type, param_value = self._parse_params(param_line)
151 | layer_dict[param_type] = param_value
152 | self.layer_counter += 1
153 | return layer_dict, layer_name, remainder
154 |
155 | def _parse_params(self, param_line):
156 | """Identifies the parameters contained in one of the cfg file and returns
157 | them in the required format for each parameter type, e.g. as a list, an int or a float.
158 |
159 | Keyword argument:
160 | param_line -- one parsed line within a layer block
161 | """
162 | param_line = param_line.replace(' ', '')
163 | param_type, param_value_raw = param_line.split('=')
164 | param_value = None
165 | if param_type == 'layers':
166 | layer_indexes = list()
167 | for index in param_value_raw.split(','):
168 | layer_indexes.append(int(index))
169 | param_value = layer_indexes
170 | elif isinstance(param_value_raw, str) and not param_value_raw.isalpha():
171 | condition_param_value_positive = param_value_raw.isdigit()
172 | condition_param_value_negative = param_value_raw[0] == '-' and \
173 | param_value_raw[1:].isdigit()
174 | if condition_param_value_positive or condition_param_value_negative:
175 | param_value = int(param_value_raw)
176 | else:
177 | param_value = float(param_value_raw)
178 | else:
179 | param_value = str(param_value_raw)
180 | return param_type, param_value
181 |
182 |
183 | class MajorNodeSpecs(object):
184 | """Helper class used to store the names of ONNX output names,
185 | corresponding to the output of a DarkNet layer and its output channels.
186 | Some DarkNet layers are not created and there is no corresponding ONNX node,
187 | but we still need to track them in order to set up skip connections.
188 | """
189 |
190 | def __init__(self, name, channels):
191 | """ Initialize a MajorNodeSpecs object.
192 |
193 | Keyword arguments:
194 | name -- name of the ONNX node
195 | channels -- number of output channels of this node
196 | """
197 | self.name = name
198 | self.channels = channels
199 | self.created_onnx_node = False
200 | if name is not None and isinstance(channels, int) and channels > 0:
201 | self.created_onnx_node = True
202 |
203 |
204 | class ConvParams(object):
205 | """Helper class to store the hyper parameters of a Conv layer,
206 | including its prefix name in the ONNX graph and the expected dimensions
207 | of weights for convolution, bias, and batch normalization.
208 |
209 | Additionally acts as a wrapper for generating safe names for all
210 | weights, checking on feasible combinations.
211 | """
212 |
213 | def __init__(self, node_name, batch_normalize, conv_weight_dims):
214 | """Constructor based on the base node name (e.g. 101_convolutional), the batch
215 | normalization setting, and the convolutional weights shape.
216 |
217 | Keyword arguments:
218 | node_name -- base name of this YOLO convolutional layer
219 | batch_normalize -- bool value if batch normalization is used
220 | conv_weight_dims -- the dimensions of this layer's convolutional weights
221 | """
222 | self.node_name = node_name
223 | self.batch_normalize = batch_normalize
224 | assert len(conv_weight_dims) == 4
225 | self.conv_weight_dims = conv_weight_dims
226 |
227 | def generate_param_name(self, param_category, suffix):
228 | """Generates a name based on two string inputs,
229 | and checks if the combination is valid."""
230 | assert suffix
231 | assert param_category in ['bn', 'conv']
232 | assert(suffix in ['scale', 'mean', 'var', 'weights', 'bias'])
233 | if param_category == 'bn':
234 | assert self.batch_normalize
235 | assert suffix in ['scale', 'bias', 'mean', 'var']
236 | elif param_category == 'conv':
237 | assert suffix in ['weights', 'bias']
238 | if suffix == 'bias':
239 | assert not self.batch_normalize
240 | param_name = self.node_name + '_' + param_category + '_' + suffix
241 | return param_name
242 |
243 | class UpsampleParams(object):
244 | #Helper class to store the scale parameter for an Upsample node.
245 |
246 | def __init__(self, node_name, value):
247 | """Constructor based on the base node name (e.g. 86_Upsample),
248 | and the value of the scale input tensor.
249 |
250 | Keyword arguments:
251 | node_name -- base name of this YOLO Upsample layer
252 | value -- the value of the scale input to the Upsample layer as a numpy array
253 | """
254 | self.node_name = node_name
255 | self.value = value
256 |
257 | def generate_param_name(self):
258 | """Generates the scale parameter name for the Upsample node."""
259 | param_name = self.node_name + '_' + 'scale'
260 | return param_name
261 |
262 | class WeightLoader(object):
263 | """Helper class used for loading the serialized weights of a binary file stream
264 | and returning the initializers and the input tensors required for populating
265 | the ONNX graph with weights.
266 | """
267 |
268 | def __init__(self, weights_file_path):
269 | """Initialized with a path to the YOLO .weights file.
270 |
271 | Keyword argument:
272 | weights_file_path -- path to the weights file.
273 | """
274 | self.weights_file = self._open_weights_file(weights_file_path)
275 |
276 | def load_upsample_scales(self, upsample_params):
277 | """Returns the initializers with the value of the scale input
278 | tensor given by upsample_params.
279 |
280 | Keyword argument:
281 | upsample_params -- a UpsampleParams object
282 | """
283 | initializer = list()
284 | inputs = list()
285 | name = upsample_params.generate_param_name()
286 | shape = upsample_params.value.shape
287 | data = upsample_params.value
288 | scale_init = helper.make_tensor(
289 | name, TensorProto.FLOAT, shape, data)
290 | scale_input = helper.make_tensor_value_info(
291 | name, TensorProto.FLOAT, shape)
292 | initializer.append(scale_init)
293 | inputs.append(scale_input)
294 | return initializer, inputs
295 |
296 |
297 | def load_conv_weights(self, conv_params):
298 | """Returns the initializers with weights from the weights file and
299 | the input tensors of a convolutional layer for all corresponding ONNX nodes.
300 |
301 | Keyword argument:
302 | conv_params -- a ConvParams object
303 | """
304 | initializer = list()
305 | inputs = list()
306 | if conv_params.batch_normalize:
307 | bias_init, bias_input = self._create_param_tensors(
308 | conv_params, 'bn', 'bias')
309 | bn_scale_init, bn_scale_input = self._create_param_tensors(
310 | conv_params, 'bn', 'scale')
311 | bn_mean_init, bn_mean_input = self._create_param_tensors(
312 | conv_params, 'bn', 'mean')
313 | bn_var_init, bn_var_input = self._create_param_tensors(
314 | conv_params, 'bn', 'var')
315 | initializer.extend(
316 | [bn_scale_init, bias_init, bn_mean_init, bn_var_init])
317 | inputs.extend([bn_scale_input, bias_input,
318 | bn_mean_input, bn_var_input])
319 | else:
320 | bias_init, bias_input = self._create_param_tensors(
321 | conv_params, 'conv', 'bias')
322 | initializer.append(bias_init)
323 | inputs.append(bias_input)
324 | conv_init, conv_input = self._create_param_tensors(
325 | conv_params, 'conv', 'weights')
326 | initializer.append(conv_init)
327 | inputs.append(conv_input)
328 | return initializer, inputs
329 |
330 | def _open_weights_file(self, weights_file_path):
331 | """Opens a YOLO DarkNet file stream and skips the header.
332 |
333 | Keyword argument:
334 | weights_file_path -- path to the weights file.
335 | """
336 | weights_file = open(weights_file_path, 'rb')
337 | length_header = 5
338 | np.ndarray(shape=(length_header, ), dtype='int32',
339 | buffer=weights_file.read(length_header * 4))
340 | return weights_file
341 |
342 | def _create_param_tensors(self, conv_params, param_category, suffix):
343 | """Creates the initializers with weights from the weights file together with
344 | the input tensors.
345 |
346 | Keyword arguments:
347 | conv_params -- a ConvParams object
348 | param_category -- the category of parameters to be created ('bn' or 'conv')
349 | suffix -- a string determining the sub-type of above param_category (e.g.,
350 | 'weights' or 'bias')
351 | """
352 | param_name, param_data, param_data_shape = self._load_one_param_type(
353 | conv_params, param_category, suffix)
354 |
355 | initializer_tensor = helper.make_tensor(
356 | param_name, TensorProto.FLOAT, param_data_shape, param_data)
357 | input_tensor = helper.make_tensor_value_info(
358 | param_name, TensorProto.FLOAT, param_data_shape)
359 | return initializer_tensor, input_tensor
360 |
361 | def _load_one_param_type(self, conv_params, param_category, suffix):
362 | """Deserializes the weights from a file stream in the DarkNet order.
363 |
364 | Keyword arguments:
365 | conv_params -- a ConvParams object
366 | param_category -- the category of parameters to be created ('bn' or 'conv')
367 | suffix -- a string determining the sub-type of above param_category (e.g.,
368 | 'weights' or 'bias')
369 | """
370 | param_name = conv_params.generate_param_name(param_category, suffix)
371 | channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims
372 | if param_category == 'bn':
373 | param_shape = [channels_out]
374 | elif param_category == 'conv':
375 | if suffix == 'weights':
376 | param_shape = [channels_out, channels_in, filter_h, filter_w]
377 | #print(param_shape)
378 | elif suffix == 'bias':
379 | param_shape = [channels_out]
380 | param_size = np.product(np.array(param_shape))
381 | param_data = np.ndarray(
382 | shape=param_shape,
383 | dtype='float32',
384 | buffer=self.weights_file.read(param_size * 4))
385 | param_data = param_data.flatten().astype(float)
386 | return param_name, param_data, param_shape
387 |
388 |
389 | class GraphBuilderONNX(object):
390 | """Class for creating an ONNX graph from a previously generated list of layer dictionaries."""
391 |
392 | def __init__(self, model_name, output_tensors):
393 | """Initialize with all DarkNet default parameters used creating
394 | YOLO, and specify the output tensors as an OrderedDict for their
395 | output dimensions with their names as keys.
396 |
397 | Keyword argument:
398 | output_tensors -- the output tensors as an OrderedDict containing the keys'
399 | output dimensions
400 | """
401 | self.model_name = model_name
402 | self.output_tensors = output_tensors
403 | self._nodes = list()
404 | self.graph_def = None
405 | self.input_tensor = None
406 | self.epsilon_bn = 1e-5
407 | self.momentum_bn = 0.99
408 | self.alpha_lrelu = 0.1
409 | self.param_dict = OrderedDict()
410 | self.major_node_specs = list()
411 | self.batch_size = 1
412 | self.route_spec = 0 # keeping track of the current active 'route'
413 |
414 | def build_onnx_graph(
415 | self,
416 | layer_configs,
417 | weights_file_path,
418 | verbose=True):
419 | """Iterate over all layer configs (parsed from the DarkNet
420 | representation of YOLO), create an ONNX graph, populate it with
421 | weights from the weights file and return the graph definition.
422 |
423 | Keyword arguments:
424 | layer_configs -- an OrderedDict object with all parsed layers' configurations
425 | weights_file_path -- location of the weights file
426 | verbose -- toggles if the graph is printed after creation (default: True)
427 | """
428 | for layer_name in layer_configs.keys():
429 | layer_dict = layer_configs[layer_name]
430 | major_node_specs = self._make_onnx_node(layer_name, layer_dict)
431 | if major_node_specs.name is not None:
432 | self.major_node_specs.append(major_node_specs)
433 | # remove dummy 'route' and 'yolo' nodes
434 | self.major_node_specs = [node for node in self.major_node_specs
435 | if 'dummy' not in node.name]
436 | outputs = list()
437 | for tensor_name in self.output_tensors.keys():
438 | output_dims = [self.batch_size, ] + \
439 | self.output_tensors[tensor_name]
440 | output_tensor = helper.make_tensor_value_info(
441 | tensor_name, TensorProto.FLOAT, output_dims)
442 | outputs.append(output_tensor)
443 | inputs = [self.input_tensor]
444 | weight_loader = WeightLoader(weights_file_path)
445 | initializer = list()
446 | # If a layer has parameters, add them to the initializer and input lists.
447 | for layer_name in self.param_dict.keys():
448 | _, layer_type = layer_name.split('_', 1)
449 | params = self.param_dict[layer_name]
450 | if layer_type == 'convolutional':
451 | #print('%s ' % layer_name, end='')
452 | initializer_layer, inputs_layer = weight_loader.load_conv_weights(
453 | params)
454 | initializer.extend(initializer_layer)
455 | inputs.extend(inputs_layer)
456 | elif layer_type == 'upsample':
457 | initializer_layer, inputs_layer = weight_loader.load_upsample_scales(
458 | params)
459 | initializer.extend(initializer_layer)
460 | inputs.extend(inputs_layer)
461 | del weight_loader
462 | self.graph_def = helper.make_graph(
463 | nodes=self._nodes,
464 | name=self.model_name,
465 | inputs=inputs,
466 | outputs=outputs,
467 | initializer=initializer
468 | )
469 | if verbose:
470 | print(helper.printable_graph(self.graph_def))
471 | model_def = helper.make_model(self.graph_def,
472 | producer_name='NVIDIA TensorRT sample')
473 | return model_def
474 |
475 | def _make_onnx_node(self, layer_name, layer_dict):
476 | """Take in a layer parameter dictionary, choose the correct function for
477 | creating an ONNX node and store the information important to graph creation
478 | as a MajorNodeSpec object.
479 |
480 | Keyword arguments:
481 | layer_name -- the layer's name (also the corresponding key in layer_configs)
482 | layer_dict -- a layer parameter dictionary (one element of layer_configs)
483 | """
484 | layer_type = layer_dict['type']
485 | if self.input_tensor is None:
486 | if layer_type == 'net':
487 | major_node_output_name, major_node_output_channels = self._make_input_tensor(
488 | layer_name, layer_dict)
489 | major_node_specs = MajorNodeSpecs(major_node_output_name,
490 | major_node_output_channels)
491 | else:
492 | raise ValueError('The first node has to be of type "net".')
493 | else:
494 | node_creators = dict()
495 | node_creators['convolutional'] = self._make_conv_node
496 | node_creators['maxpool'] = self._make_maxpool_node
497 | node_creators['shortcut'] = self._make_shortcut_node
498 | node_creators['route'] = self._make_route_node
499 | node_creators['upsample'] = self._make_upsample_node
500 | node_creators['yolo'] = self._make_yolo_node
501 |
502 | if layer_type in node_creators.keys():
503 | major_node_output_name, major_node_output_channels = \
504 | node_creators[layer_type](layer_name, layer_dict)
505 | major_node_specs = MajorNodeSpecs(major_node_output_name,
506 | major_node_output_channels)
507 | else:
508 | print(
509 | 'Layer of type %s not supported, skipping ONNX node generation.' %
510 | layer_type)
511 | major_node_specs = MajorNodeSpecs(layer_name,
512 | None)
513 | return major_node_specs
514 |
515 | def _make_input_tensor(self, layer_name, layer_dict):
516 | """Create an ONNX input tensor from a 'net' layer and store the batch size.
517 |
518 | Keyword arguments:
519 | layer_name -- the layer's name (also the corresponding key in layer_configs)
520 | layer_dict -- a layer parameter dictionary (one element of layer_configs)
521 | """
522 | #batch_size = layer_dict['batch']
523 | channels = layer_dict['channels']
524 | height = layer_dict['height']
525 | width = layer_dict['width']
526 | #self.batch_size = batch_size
527 | input_tensor = helper.make_tensor_value_info(
528 | str(layer_name), TensorProto.FLOAT, [
529 | self.batch_size, channels, height, width])
530 | self.input_tensor = input_tensor
531 | return layer_name, channels
532 |
533 | def _get_previous_node_specs(self, target_index=0):
534 | """Get a previously ONNX node.
535 |
536 | Target index can be passed for jumping to a specific index.
537 |
538 | Keyword arguments:
539 | target_index -- optional for jumping to a specific index,
540 | default: 0 for the previous element, while
541 | taking 'route' spec into account
542 | """
543 | if target_index == 0:
544 | if self.route_spec != 0:
545 | previous_node = self.major_node_specs[self.route_spec]
546 | assert 'dummy' not in previous_node.name
547 | self.route_spec = 0
548 | else:
549 | previous_node = self.major_node_specs[-1]
550 | else:
551 | previous_node = self.major_node_specs[target_index]
552 | assert previous_node.created_onnx_node
553 | return previous_node
554 |
555 | def _make_conv_node(self, layer_name, layer_dict):
556 | """Create an ONNX Conv node with optional batch normalization and
557 | activation nodes.
558 |
559 | Keyword arguments:
560 | layer_name -- the layer's name (also the corresponding key in layer_configs)
561 | layer_dict -- a layer parameter dictionary (one element of layer_configs)
562 | """
563 | previous_node_specs = self._get_previous_node_specs()
564 | inputs = [previous_node_specs.name]
565 | previous_channels = previous_node_specs.channels
566 | kernel_size = layer_dict['size']
567 | stride = layer_dict['stride']
568 | filters = layer_dict['filters']
569 | batch_normalize = False
570 | if 'batch_normalize' in layer_dict.keys(
571 | ) and layer_dict['batch_normalize'] == 1:
572 | batch_normalize = True
573 |
574 | kernel_shape = [kernel_size, kernel_size]
575 | weights_shape = [filters, previous_channels] + kernel_shape
576 | conv_params = ConvParams(layer_name, batch_normalize, weights_shape)
577 |
578 | strides = [stride, stride]
579 | dilations = [1, 1]
580 | weights_name = conv_params.generate_param_name('conv', 'weights')
581 | inputs.append(weights_name)
582 | if not batch_normalize:
583 | bias_name = conv_params.generate_param_name('conv', 'bias')
584 | inputs.append(bias_name)
585 |
586 | conv_node = helper.make_node(
587 | 'Conv',
588 | inputs=inputs,
589 | outputs=[layer_name],
590 | kernel_shape=kernel_shape,
591 | strides=strides,
592 | auto_pad='SAME_LOWER',
593 | dilations=dilations,
594 | name=layer_name
595 | )
596 | self._nodes.append(conv_node)
597 | inputs = [layer_name]
598 | layer_name_output = layer_name
599 |
600 | if batch_normalize:
601 | layer_name_bn = layer_name + '_bn'
602 | bn_param_suffixes = ['scale', 'bias', 'mean', 'var']
603 | for suffix in bn_param_suffixes:
604 | bn_param_name = conv_params.generate_param_name('bn', suffix)
605 | inputs.append(bn_param_name)
606 | batchnorm_node = helper.make_node(
607 | 'BatchNormalization',
608 | inputs=inputs,
609 | outputs=[layer_name_bn],
610 | epsilon=self.epsilon_bn,
611 | momentum=self.momentum_bn,
612 | name=layer_name_bn
613 | )
614 | self._nodes.append(batchnorm_node)
615 | inputs = [layer_name_bn]
616 | layer_name_output = layer_name_bn
617 |
618 | if layer_dict['activation'] == 'leaky':
619 | layer_name_lrelu = layer_name + '_lrelu'
620 |
621 | lrelu_node = helper.make_node(
622 | 'LeakyRelu',
623 | inputs=inputs,
624 | outputs=[layer_name_lrelu],
625 | name=layer_name_lrelu,
626 | alpha=self.alpha_lrelu
627 | )
628 | self._nodes.append(lrelu_node)
629 | inputs = [layer_name_lrelu]
630 | layer_name_output = layer_name_lrelu
631 | elif layer_dict['activation'] == 'mish':
632 | layer_name_softplus = layer_name + '_softplus'
633 | layer_name_tanh = layer_name + '_tanh'
634 | layer_name_mish = layer_name + '_mish'
635 |
636 | softplus_node = helper.make_node(
637 | 'Softplus',
638 | inputs=inputs,
639 | outputs=[layer_name_softplus],
640 | name=layer_name_softplus,
641 | )
642 | self._nodes.append(softplus_node)
643 | tanh_node = helper.make_node(
644 | 'Tanh',
645 | inputs=[layer_name_softplus],
646 | outputs=[layer_name_tanh],
647 | name=layer_name_tanh,
648 | )
649 | self._nodes.append(tanh_node)
650 |
651 | inputs.append(layer_name_tanh)
652 | mish_node = helper.make_node(
653 | 'Mul',
654 | inputs=inputs,
655 | outputs=[layer_name_mish],
656 | name=layer_name_mish,
657 | )
658 | self._nodes.append(mish_node)
659 |
660 | inputs = [layer_name_mish]
661 | layer_name_output = layer_name_mish
662 | elif layer_dict['activation'] == 'linear':
663 | pass
664 | else:
665 | print('Activation not supported.')
666 |
667 | self.param_dict[layer_name] = conv_params
668 | return layer_name_output, filters
669 |
670 | def _make_shortcut_node(self, layer_name, layer_dict):
671 | """Create an ONNX Add node with the shortcut properties from
672 | the DarkNet-based graph.
673 |
674 | Keyword arguments:
675 | layer_name -- the layer's name (also the corresponding key in layer_configs)
676 | layer_dict -- a layer parameter dictionary (one element of layer_configs)
677 | """
678 | shortcut_index = layer_dict['from']
679 | activation = layer_dict['activation']
680 | assert activation == 'linear'
681 |
682 | first_node_specs = self._get_previous_node_specs()
683 | second_node_specs = self._get_previous_node_specs(
684 | target_index=shortcut_index)
685 | assert first_node_specs.channels == second_node_specs.channels
686 | channels = first_node_specs.channels
687 | inputs = [first_node_specs.name, second_node_specs.name]
688 | shortcut_node = helper.make_node(
689 | 'Add',
690 | inputs=inputs,
691 | outputs=[layer_name],
692 | name=layer_name,
693 | )
694 | self._nodes.append(shortcut_node)
695 | return layer_name, channels
696 |
697 | def _make_route_node(self, layer_name, layer_dict):
698 | """If the 'layers' parameter from the DarkNet configuration is only one index, continue
699 | node creation at the indicated (negative) index. Otherwise, create an ONNX Concat node
700 | with the route properties from the DarkNet-based graph.
701 |
702 | Keyword arguments:
703 | layer_name -- the layer's name (also the corresponding key in layer_configs)
704 | layer_dict -- a layer parameter dictionary (one element of layer_configs)
705 | """
706 | route_node_indexes = layer_dict['layers']
707 | if len(route_node_indexes) == 1:
708 | if 'groups' in layer_dict.keys():
709 | # for CSPNet-kind of architecture
710 | assert 'group_id' in layer_dict.keys()
711 | groups = layer_dict['groups']
712 | group_id = int(layer_dict['group_id'])
713 | assert group_id < groups
714 | index = route_node_indexes[0]
715 | if index > 0:
716 | # +1 for input node (same reason as below)
717 | index += 1
718 | route_node_specs = self._get_previous_node_specs(
719 | target_index=index)
720 | assert route_node_specs.channels % groups == 0
721 | channels = route_node_specs.channels // groups
722 |
723 | outputs = [layer_name + '_dummy%d' % i for i in range(groups)]
724 | outputs[group_id] = layer_name
725 | route_node = helper.make_node(
726 | 'Split',
727 | axis=1,
728 | split=[channels] * groups,
729 | inputs=[route_node_specs.name],
730 | outputs=outputs,
731 | name=layer_name,
732 | )
733 | self._nodes.append(route_node)
734 | else:
735 | if route_node_indexes[0] < 0:
736 | # route should skip self, thus -1
737 | self.route_spec = route_node_indexes[0] - 1
738 | elif route_node_indexes[0] > 0:
739 | # +1 for input node (same reason as below)
740 | self.route_spec = route_node_indexes[0] + 1
741 | # This dummy route node would be removed in the end.
742 | layer_name = layer_name + '_dummy'
743 | channels = 1
744 | else:
745 | assert 'groups' not in layer_dict.keys(), \
746 | 'groups not implemented for multiple-input route layer!'
747 | inputs = list()
748 | channels = 0
749 | for index in route_node_indexes:
750 | if index > 0:
751 | # Increment by one because we count the input as
752 | # a node (DarkNet does not)
753 | index += 1
754 | route_node_specs = self._get_previous_node_specs(
755 | target_index=index)
756 | inputs.append(route_node_specs.name)
757 | channels += route_node_specs.channels
758 | assert inputs
759 | assert channels > 0
760 |
761 | route_node = helper.make_node(
762 | 'Concat',
763 | axis=1,
764 | inputs=inputs,
765 | outputs=[layer_name],
766 | name=layer_name,
767 | )
768 | self._nodes.append(route_node)
769 | return layer_name, channels
770 |
771 | def _make_upsample_node(self, layer_name, layer_dict):
772 | """Create an ONNX Upsample node with the properties from
773 | the DarkNet-based graph.
774 |
775 | Keyword arguments:
776 | layer_name -- the layer's name (also the corresponding key in layer_configs)
777 | layer_dict -- a layer parameter dictionary (one element of layer_configs)
778 | """
779 | upsample_factor = float(layer_dict['stride'])
780 | # Create the scales array with node parameters
781 | scales = np.array([1.0, 1.0, upsample_factor, upsample_factor]).astype(np.float32)
782 | previous_node_specs = self._get_previous_node_specs()
783 | inputs = [previous_node_specs.name]
784 |
785 | channels = previous_node_specs.channels
786 | assert channels > 0
787 | upsample_params = UpsampleParams(layer_name, scales)
788 | scales_name = upsample_params.generate_param_name()
789 | # For ONNX opset >= 9, the Upsample node takes the scales array
790 | # as an input.
791 | inputs.append(scales_name)
792 |
793 | upsample_node = helper.make_node(
794 | 'Upsample',
795 | mode='nearest',
796 | inputs=inputs,
797 | outputs=[layer_name],
798 | name=layer_name,
799 | )
800 | self._nodes.append(upsample_node)
801 | self.param_dict[layer_name] = upsample_params
802 | return layer_name, channels
803 |
804 | def _make_maxpool_node(self, layer_name, layer_dict):
805 | """Create an ONNX Maxpool node with the properties from
806 | the DarkNet-based graph.
807 |
808 | Keyword arguments:
809 | layer_name -- the layer's name (also the corresponding key in layer_configs)
810 | layer_dict -- a layer parameter dictionary (one element of layer_configs)
811 | """
812 | stride = layer_dict['stride']
813 | kernel_size = layer_dict['size']
814 | previous_node_specs = self._get_previous_node_specs()
815 | inputs = [previous_node_specs.name]
816 | channels = previous_node_specs.channels
817 | kernel_shape = [kernel_size, kernel_size]
818 | strides = [stride, stride]
819 | assert channels > 0
820 | maxpool_node = helper.make_node(
821 | 'MaxPool',
822 | inputs=inputs,
823 | outputs=[layer_name],
824 | kernel_shape=kernel_shape,
825 | strides=strides,
826 | auto_pad='SAME_UPPER',
827 | name=layer_name,
828 | )
829 | self._nodes.append(maxpool_node)
830 | return layer_name, channels
831 |
832 | def _make_yolo_node(self, layer_name, layer_dict):
833 | """Create an ONNX Yolo node.
834 |
835 | These are dummy nodes which would be removed in the end.
836 | """
837 | channels = 1
838 | return layer_name + '_dummy', channels
839 |
840 |
841 | def generate_md5_checksum(local_path):
842 | """Returns the MD5 checksum of a local file.
843 |
844 | Keyword argument:
845 | local_path -- path of the file whose checksum shall be generated
846 | """
847 | with open(local_path, 'rb') as local_file:
848 | data = local_file.read()
849 | return hashlib.md5(data).hexdigest()
850 |
851 |
852 | def main():
853 | """Run the DarkNet-to-ONNX conversion for YOLO (v3 or v4)."""
854 | if sys.version_info[0] < 3:
855 | raise SystemExit('ERROR: This modified version of yolov3_to_onnx.py '
856 | 'script is only compatible with python3...')
857 |
858 | parser = argparse.ArgumentParser()
859 | parser.add_argument(
860 | '-c', '--category_num', type=int, default=80,
861 | help='number of object categories [80]')
862 | parser.add_argument(
863 | '-m', '--model', type=str, required=True,
864 | help=('[yolov3|yolov3-tiny|yolov3-spp|yolov4|yolov4-tiny]-'
865 | '[{dimension}], where dimension could be a single '
866 | 'number (e.g. 288, 416, 608) or WxH (e.g. 416x256)'))
867 | args = parser.parse_args()
868 | if args.category_num <= 0:
869 | raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
870 |
871 | cfg_file_path = '%s.cfg' % args.model
872 | if not os.path.isfile(cfg_file_path):
873 | raise SystemExit('ERROR: file (%s) not found!' % cfg_file_path)
874 | weights_file_path = '%s.weights' % args.model
875 | if not os.path.isfile(weights_file_path):
876 | raise SystemExit('ERROR: file (%s) not found!' % weights_file_path)
877 | output_file_path = '%s.onnx' % args.model
878 |
879 | if not verify_classes(args.model, args.category_num):
880 | raise SystemExit('ERROR: bad category_num (%d)' % args.category_num)
881 |
882 | # Derive yolo input width/height from model name.
883 | # For example, "yolov4-416x256" -> width=416, height=256
884 | w, h = get_input_wh(args.model)
885 |
886 | # These are the only layers DarkNetParser will extract parameters
887 | # from. The three layers of type 'yolo' are not parsed in detail
888 | # because they are included in the post-processing later.
889 | supported_layers = ['net', 'convolutional', 'maxpool',
890 | 'shortcut', 'route', 'upsample', 'yolo']
891 |
892 | # Create a DarkNetParser object, and the use it to generate an
893 | # OrderedDict with all layer's configs from the cfg file.
894 | print('Parsing DarkNet cfg file...')
895 | parser = DarkNetParser(supported_layers)
896 | layer_configs = parser.parse_cfg_file(cfg_file_path)
897 |
898 | # We do not need the parser anymore after we got layer_configs.
899 | del parser
900 |
901 | # In above layer_config, there are three outputs that we need to
902 | # know the output shape of (in CHW format).
903 | output_tensor_dims = OrderedDict()
904 | c = (args.category_num + 5) * 3
905 | if 'yolov3' in args.model:
906 | if 'tiny' in args.model:
907 | output_tensor_dims['016_convolutional'] = [c, h // 32, w // 32]
908 | output_tensor_dims['023_convolutional'] = [c, h // 16, w // 16]
909 | elif 'spp' in args.model:
910 | output_tensor_dims['089_convolutional'] = [c, h // 32, w // 32]
911 | output_tensor_dims['101_convolutional'] = [c, h // 16, w // 16]
912 | output_tensor_dims['113_convolutional'] = [c, h // 8, w // 8]
913 | else:
914 | output_tensor_dims['082_convolutional'] = [c, h // 32, w // 32]
915 | output_tensor_dims['094_convolutional'] = [c, h // 16, w // 16]
916 | output_tensor_dims['106_convolutional'] = [c, h // 8, w // 8]
917 | elif 'yolov4' in args.model:
918 | if 'tiny' in args.model:
919 | output_tensor_dims['030_convolutional'] = [c, h // 32, w // 32]
920 | output_tensor_dims['037_convolutional'] = [c, h // 16, w // 16]
921 | else:
922 | output_tensor_dims['139_convolutional'] = [c, h // 8, w // 8]
923 | output_tensor_dims['150_convolutional'] = [c, h // 16, w // 16]
924 | output_tensor_dims['161_convolutional'] = [c, h // 32, w // 32]
925 | else:
926 | raise SystemExit('ERROR: unknown model (%s)!' % args.model)
927 |
928 | # Create a GraphBuilderONNX object with the specified output tensor
929 | # dimensions.
930 | print('Building ONNX graph...')
931 | builder = GraphBuilderONNX(args.model, output_tensor_dims)
932 |
933 | # Now generate an ONNX graph with weights from the previously parsed
934 | # layer configurations and the weights file.
935 | yolo_model_def = builder.build_onnx_graph(
936 | layer_configs=layer_configs,
937 | weights_file_path=weights_file_path,
938 | verbose=True)
939 |
940 | # Once we have the model definition, we do not need the builder anymore.
941 | del builder
942 |
943 | # Perform a sanity check on the ONNX model definition.
944 | print('Checking ONNX model...')
945 | onnx.checker.check_model(yolo_model_def)
946 |
947 | # Serialize the generated ONNX graph to this file.
948 | print('Saving ONNX file...')
949 | onnx.save(yolo_model_def, output_file_path)
950 |
951 | print('Done.')
952 |
953 |
954 | if __name__ == '__main__':
955 | main()
956 |
--------------------------------------------------------------------------------